## Connect to EMC and fetch data

In [None]:
import requests

url = "https://www.emc.nemsdatasvc.wsi.emcsg.com:9534/nemsdsvc/CorpWebSiteDataReports"

payload_template = """
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:cor="http://com/emc/nems/wsd/webservices/reports/corpdata" xmlns:java="java:com.emc.nems.wsd.ui.beans.reports">
<soapenv:Header/>
<soapenv:Body>
    <cor:RealTimePriceDataWebService>
        <cor:reportBean>
            <!--Zero or more repetitions:-->
            <java:ReportBean>
                <java:ParamName>Date</java:ParamName>
                <java:ParamValue>{}</java:ParamValue>
            </java:ReportBean>
        </cor:reportBean>
    </cor:RealTimePriceDataWebService>
</soapenv:Body>
</soapenv:Envelope>
"""
headers = {
  'Content-Type': 'text/xml',
  'Accept-Charset': 'UTF-8',
  'Authorization': 'Basic Y2hlZWtlb25nYW5nOlNEQ3NkYzEyMzQ=',
  'Cookie': 'TS01bf107a=01c689c3c12dccd36dbcd5621c75d974db15318a93cb50165f27d8c3929d1a50fe89c682b5611fc77fbff36507d8c92677c9425edc'
}
cert_file = "./nems2024privatekey.pfx"


## Define runtime

In [None]:
from datetime import datetime, timedelta

repeated = False # If True, run as a regular routine.

start_date = datetime.strptime("01-Jan-2024", "%d-%b-%Y")
end_date = datetime.strptime("31-Mar-2024", "%d-%b-%Y")

delta = timedelta(days=1)

In [4]:
import pandas as pd
import xml.etree.ElementTree as ET
import html

def xml2Df (data):
    # Parse the SOAP response
    root = ET.fromstring(data)

    # Namespace map
    namespaces = {
        'env': 'http://schemas.xmlsoap.org/soap/envelope/',
        'm': 'http://com/emc/nems/wsd/webservices/reports/corpdata'
    }

    # Extract the embedded XML from <m:return> and unescape it
    embedded_xml_str = root.find('.//m:return', namespaces).text
    embedded_xml_str = html.unescape(embedded_xml_str)

    # Parse the embedded XML
    embedded_root = ET.fromstring(embedded_xml_str)

    # Define the columns for the DataFrame
    columns = [
        "period", "reportType", "tradingDate", "demand", "tcl", "USEP", "lcp",
        "regulation", "primaryReserve", "secondaryReserve", "contingencyReserve",
        "eheur", "solar"
    ]

    # Extract the data for the columns from each <RealTimePrice> element
    data_rows = []
    for rt_price in embedded_root.iter('RealTimePrice'):  # Use iter() to directly iterate over each RealTimePrice
        row_data = {col: rt_price.find(col).text if rt_price.find(col) is not None else None for col in columns}
        data_rows.append(row_data)
        
    RealTimePrice_df = pd.DataFrame(data_rows, columns=columns)
    
    return RealTimePrice_df

In [None]:
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load the environment variables from the .env file
env_file = f'{ROOT}/.env' if crontab else '.env'
load_dotenv(env_file)

# Get the values of host, user, pswd, db, and schema from the environment variables
host = os.getenv('host')
user = os.getenv('user')
pswd = os.getenv('pswd')
db = os.getenv('db')
schema = os.getenv('schema')


# Use the values as needed
engine = create_engine(
    f"postgresql://{user}:{pswd}@{host}/{db}?options=-csearch_path%3D{schema}", echo=False)
conn = engine.connect()

def df2Db (df):
    pass

## Main Process

In [None]:
with open('./nems2024.pem', 'r') as pemCert:
    current_date = start_date
    
    while current_date < end_date:
        payload = payload_template.format(current_date.strftime("%d-%b-%Y"))
        # print(payload)
        
        response = requests.request("POST", url, headers=headers, data=payload, cert=pemCert, verify=False)
        xml_data = response.text
        
        RealTimePrice_df = xml2Df(xml_data)
        
        current_date += delta
