# CAISO Energy Storage Bid Data Scraping


<b>Author:</b> Neal Ma<br/>
<b>Creation Date:</b> September 11, 2024

<b>Description:</b> CAISO has released daily energy storage reports here (https://www.caiso.com/library/daily-energy-storage-reports) since August 4, 2022. These reports include extremely useful data but the data itself is not readily accessible. This notebook extracts that data and saves it to a local parquet file to read into a pandas dataframe for easy query and access.

In [26]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [33]:
# define all URLs and dates to search
BASE_URL = "https://www.caiso.com/documents/dailyenergystoragereport"
DATE_FORMAT = "%b%d-%Y"
EXTENSION = ".html"

ALT_BASE_URL = "https://www.caiso.com/documents/daily-energy-storage-report-"
ALT_DATE_FORMAT = "%b-%d-%Y"

ALT_2_DATE_FORMAT = "%b-%d%Y"

START_DATE = '2022-07-31' # NOTE: This is the earliest date with available data
END_DATE = '2024-09-11' # You can change this, just verify that all dates are actually in the data
# TODO: End date verification

In [34]:
date_range = pd.date_range(start=START_DATE, end=END_DATE)
date_strings = date_range.strftime(DATE_FORMAT)
alt_date_strings = date_range.strftime(ALT_DATE_FORMAT)
alt_2_date_strings = date_range.strftime(ALT_2_DATE_FORMAT)

In [None]:
# define data names and time granularity in minutes
key_dict = {'tot_energy_ifm': 5,
            'tot_energy_ruc': 5,
            'tot_energy_rtpd': 5,
            'tot_energy_rtd': 5,
            'tot_charge_ifm': 5,
            'tot_charge_ruc': 5,
            'tot_charge_rtpd': 5,
            'tot_charge_rtd': 5,
            'as_ru_ifm': 60,
            'as_rd_ifm': 60,
            'as_sr_ifm': 60,
            'as_nr_ifm': 60,
            'as_ru_rtpd': 15,
            'as_rd_rtpd': 15,
            'as_sr_rtpd': 15,
            'as_nr_rtpd': 15,
            'tot_energy_hybrid_ifm': 5,
            'tot_energy_hybrid_ruc': 5,
            'tot_energy_hybrid_rtpd': 5,
            'tot_energy_hybrid_rtd': 5,
            'tot_charge_hybrid_ifm': 5,
            'tot_charge_hybrid_ruc': 5,
            'tot_charge_hybrid_rtpd': 5,
            'tot_charge_hybrid_rtd': 5,
            'as_ru_hybrid_ifm': 60,
            'as_rd_hybrid_ifm': 60,
            'as_sr_hybrid_ifm': 60,
            'as_nr_hybrid_ifm': 60,
            'as_ru_hybrid_rtpd': 15,
            'as_rd_hybrid_rtpd': 15,
            'as_sr_hybrid_rtpd': 15,
            'as_nr_hybrid_rtpd': 15,
            }

# some larger labels to add
prefixes = ['bid_ifm', 'bid_rtpd']

In [47]:
# iterate through all dates and datestrings to find valid dates
failed_requests = []
for date_str, alt_date_str, alt_2_date_str in zip(date_strings, alt_date_strings, alt_2_date_strings):
    html_str = None
    if (page := requests.get(BASE_URL + date_str + EXTENSION)).status_code == 200:
        html_str = BeautifulSoup(page.content, "html.parser")
    elif (page := requests.get(ALT_BASE_URL + alt_date_str + EXTENSION)).status_code == 200:
        html_str = BeautifulSoup(page.content, "html.parser")
    elif (page := requests.get(ALT_BASE_URL + alt_2_date_str + EXTENSION)).status_code == 200:
        html_str = BeautifulSoup(page.content, "html.parser")
    else:
        failed_requests.append(date_str)


print(failed_requests)

Jul31-2022
Aug01-2022
Aug02-2022
Aug03-2022
Aug04-2022
Aug05-2022
Aug06-2022
Aug07-2022
Aug08-2022
Aug09-2022
Aug10-2022
Aug11-2022
Aug12-2022
Aug13-2022
Aug14-2022
Aug15-2022
Aug16-2022
Aug17-2022
Aug18-2022
Aug19-2022
Aug20-2022
Aug21-2022
Aug22-2022
Aug23-2022
Aug24-2022
Aug25-2022
Aug26-2022
Aug27-2022
Aug28-2022
Aug29-2022
Aug30-2022
Aug31-2022
Sep01-2022
Sep02-2022
Sep03-2022
Sep04-2022
Sep05-2022
Sep06-2022
Sep07-2022
Sep08-2022
Sep09-2022
Sep10-2022
Sep11-2022
Sep12-2022
Sep13-2022
Sep14-2022
Sep15-2022
Sep16-2022
Sep17-2022
Sep18-2022
Sep19-2022
Sep20-2022
Sep21-2022
Sep22-2022
Sep23-2022
Sep24-2022
Sep25-2022
Sep26-2022
Sep27-2022
Sep28-2022
Sep29-2022
Sep30-2022
Oct01-2022
Oct02-2022
Oct03-2022
Oct04-2022
Oct05-2022
Oct06-2022
Oct07-2022
Oct08-2022
Oct09-2022
Oct10-2022
Oct11-2022
Oct12-2022
Oct13-2022
Oct14-2022
Oct15-2022
Oct16-2022
Oct17-2022
Oct18-2022
Oct19-2022
Oct20-2022
Oct21-2022
Oct22-2022
Oct23-2022
Oct24-2022
Oct25-2022
Oct26-2022
Oct27-2022
Oct28-2022
Oct29-2022

288
