In [5]:
import os
import time
import glob
import requests
import pandas as pd
import zipfile as zp
from functools import reduce
from selenium import webdriver


In [9]:
# Columns to keep.
usecols = [
    'INTERVALSTARTTIME_GMT',
    'INTERVALENDTIME_GMT',
    'OPR_DT',
    'OPR_HR',
    'NODE_ID',
    'MARKET_RUN_ID',
    'LMP_TYPE',
    'XML_DATA_ITEM',
    'MW'
]

# Dates for formatting URLs.
month_dict = {
    'Jan': ['0101', '0201'],
    'Feb': ['0201', '0301'],
    'Mar': ['0301', '0401'],
    'Apr': ['0401', '0501'],
    'May': ['0501', '0601'],
    'Jun': ['0601', '0701'],
    'Jul': ['0701', '0801'],
    'Aug': ['0801', '0901'],
    'Sep': ['0901', '1001'],
    'Oct': ['1001', '1101'],
    'Nov': ['1101', '1201'],
    'Dec': ['1201', '0101']
}

# TH (Trading Hub) nodes.
th_node_list = [
    'TH_NP15_GEN-APND',
    'TH_SP15_GEN-APND',
    'TH_ZP26_GEN-APND'
]

# Default LAP (Load Aggregation Point) nodes corresponding to utility territories.
lap_node_list = [
    'DLAP_PGAE-APND',
    'DLAP_SCE-APND',
    'DLAP_SDGE-APND',
    'DLAP_VEA-APND'
]

# Sub-LAP nodes.
sub_lap_node_list = [
    'SLAP_PGCC-APND', 'SLAP_PGEB-APND', 'SLAP_PGF1-APND', 'SLAP_PGFG-APND', 'SLAP_PGHB-APND', 'SLAP_PGKN-APND', 'SLAP_PGLP-APND', 'SLAP_PGNB-APND', 'SLAP_PGNC-APND', 'SLAP_PGNP-APND', 'SLAP_PGNV-APND', 'SLAP_PGP2-APND', 'SLAP_PGSA-APND', 'SLAP_PGSB-APND', 'SLAP_PGSF-APND', 'SLAP_PGSI-APND', 'SLAP_PGSN-APND', 'SLAP_PGST-APND', 'SLAP_PGZP-APND',
    'SLAP_SCEC-APND', 'SLAP_SCEN-APND', 'SLAP_SCEW-APND', 'SLAP_SCHD-APND', 'SLAP_SCLD-APND', 'SLAP_SCNW-APND', 
    'SLAP_SDG1-APND', 
    'SLAP_VEA-APND'
]

In [20]:
def extract_hourly_data(year, nodes):
    '''Downloads LMPs from Oasis Portal by month for a year of choice and aggregates into single CSV file.'''

    node_entry = reduce(lambda x, y: x +','+ y, nodes)
    name_entry = reduce(lambda x, y: x +', '+ y, nodes)

    download_dir = f'/Users/parkerwild/GitHub/ca_nem/data/CAISO_LMPs/{str(year)}/{name_entry}'

    chrome_options = webdriver.ChromeOptions()
    prefs = {'download.default_directory' : download_dir}
    chrome_options.add_experimental_option('prefs', prefs)

    print('Accessing Chrome driver...')
    
    driver = webdriver.Chrome(options=chrome_options, executable_path='/Users/parkerwild/GitHub/ca_nem/chromedriver_mac64/chromedriver.exe')

    for month in month_dict.keys():

        # Handle daylight savings time.
        t = 'T08'

        if month in ['May', 'Jul', 'Aug', 'Oct']:
            t = 'T07'

        # Handle last day of year.
        start_year = year
        end_year = year

        if month == 'Dec':
            start_year = year
            end_year = year + 1

        # URL.
        api_call = "http://oasis.caiso.com/oasisapi/SingleZip?queryname=PRC_LMP&resultformat=6&" + \
            "startdatetime=" + str(start_year) + month_dict[month][0] + t + ":00-0000&" + \
            "enddatetime=" + str(end_year) + month_dict[month][1] + t + ":00-0000&" + \
            "version=1&market_run_id=DAM&node=" + node_entry
        
        print(f'Downloading data for {month}...')

        # Request.
        driver.get(api_call)

        time.sleep(15)

    print('Closing Chrome driver...')

    driver.close()

    return download_dir

def combine_hourly_data(download_dir):
    '''Unzip files and concatenate CSVs.'''

    print('Unzipping files...')
        
    zip_files = glob.glob(f'{download_dir}/*.zip')

    for zip_filename in zip_files:

        dir_name = os.path.splitext(zip_filename)[0]
        
        if not os.path.isdir(dir_name):
            os.mkdir(dir_name)

        zip_handler = zp.ZipFile(zip_filename, "r")
        zip_handler.extractall(dir_name)

    print('Concatenating CSVs...')

    csv_files = glob.glob(f'{download_dir}/*/*.csv')

    entries = []

    for csv in csv_files:

        df = pd.read_csv(csv)
        entries.append(df)

    lmps = pd.concat(entries)

    print('Cleaning data...')

    # Drop duplicates caused by Daylight Savings Time.
    lmps.drop_duplicates(subset=['OPR_DT', 'OPR_HR', 'NODE', 'XML_DATA_ITEM'], inplace=True, ignore_index=True)

    # Keep subset of columns.
    lmps = lmps[usecols]

    # Sort by interval start time.
    lmps.sort_values(by=['INTERVALSTARTTIME_GMT', 'NODE_ID', 'LMP_TYPE'], inplace=True, ignore_index=True)

    # Export to CSV.    
    lmps.to_csv(f'{download_dir}/Aggregated_LMPs.csv', index=False)

    print('Process complete!')

    return lmps

In [8]:
# API call.
download_dir = extract_hourly_data(2022, ['DLAP_PGAE-APND', 'DLAP_SCE-APND', 'DLAP_SDGE-APND'])

Accessing Chrome driver...
Downloading data for May...
Downloading data for Jul...
Downloading data for Aug...
Downloading data for Oct...
Closing Chrome driver...
Unzipping files...
Concatenating CSVs...
Process complete!


In [21]:
# Process data.
lmps = combine_hourly_data(download_dir)

Unzipping files...
Concatenating CSVs...
Cleaning data...
Process complete!
