# Downloading DA data

In [2]:
# -------------------------------------------------------------------
# concat_zip_csvs.py   –  Run from the folder that holds the ZIP files
# -------------------------------------------------------------------
import zipfile
from pathlib import Path
import pandas as pd

# -------------------------------------------------------------------
# 1.  Where are the ZIPs?
# -------------------------------------------------------------------
ZIP_DIR   = Path.cwd()  / "../data"         # change if zips live elsewhere
print(f"Loading ZIPs from {ZIP_DIR}")

frames = []                     # accumulate individual DataFrames
# -------------------------------------------------------------------
# 2.  Loop over every *.zip in that folder
# -------------------------------------------------------------------
for zpath in sorted(ZIP_DIR.glob("*.zip")):   # sort for deterministic order
    print(f"Loading {zpath.name}...")
    with zipfile.ZipFile(zpath) as zf:
        # assume exactly ONE csv inside
        csv_name = [n for n in zf.namelist() if n.lower().endswith(".csv")]
        if len(csv_name) != 1:
            raise ValueError(f"{zpath} should contain exactly one CSV")
        csv_name = csv_name[0]

        # read the CSV directly into pandas
        with zf.open(csv_name) as f:
            df = pd.read_csv(f)              # pass file-like object
            df["source_zip"] = zpath.name    # optional provenance column
            frames.append(df)

        print(f"✔ loaded {csv_name} from {zpath.name}")




Loading ZIPs from c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data
Loading 2022_Oct-Dec_DA_LMPs.zip...
✔ loaded 2022_Oct-Dec_DA_LMPs.csv from 2022_Oct-Dec_DA_LMPs.zip
Loading 2023_Apr-Jun_DA_LMPs.zip...
✔ loaded 2023_Apr-Jun_DA_LMPs.csv from 2023_Apr-Jun_DA_LMPs.zip
Loading 2023_Jan-Mar_DA_LMPs.zip...
✔ loaded DA.csv from 2023_Jan-Mar_DA_LMPs.zip
Loading 2023_Jul-Sep_DA_LMPs.zip...


  df = pd.read_csv(f)              # pass file-like object


✔ loaded DA.csv from 2023_Jul-Sep_DA_LMPs.zip
Loading 2023_Oct-Dec_DA_LMPs.zip...
✔ loaded DA.csv from 2023_Oct-Dec_DA_LMPs.zip
Loading 2024-Jul-Sep_DA_LMPs.zip...
✔ loaded DA.csv from 2024-Jul-Sep_DA_LMPs.zip
Loading 2024-Oct-Dec_DA_LMPs.zip...
✔ loaded DA.csv from 2024-Oct-Dec_DA_LMPs.zip
Loading 2024_Apr-Jun_DA_LMPs.zip...


  df = pd.read_csv(f)              # pass file-like object


✔ loaded DA.csv from 2024_Apr-Jun_DA_LMPs.zip
Loading 2024_Jan-Mar_DA_LMPs.zip...


  df = pd.read_csv(f)              # pass file-like object


✔ loaded DA.csv from 2024_Jan-Mar_DA_LMPs.zip
Loading 2025_Jan-Mar_DA_LMP.zip...


  df = pd.read_csv(f)              # pass file-like object


✔ loaded DA.csv from 2025_Jan-Mar_DA_LMP.zip


In [3]:
all_df = pd.concat(frames, ignore_index=True)

In [4]:
all_df

Unnamed: 0,MARKET_DAY,NODE,TYPE,VALUE,HE1,HE2,HE3,HE4,HE5,HE6,...,HE16,HE17,HE18,HE19,HE20,HE21,HE22,HE23,HE24,source_zip
0,10/1/2022,AECI,Interface,LMP,25.34,24.61,25.60,24.66,27.42,32.57,...,45.05,47.91,52.38,52.04,41.39,34.38,32.99,26.14,25.97,2022_Oct-Dec_DA_LMPs.zip
1,10/1/2022,AECI,Interface,MCC,-9.42,-8.52,-6.95,-8.35,-8.25,-5.83,...,-3.33,-3.32,-2.5,-2.45,-3.7,-4.15,-0.54,-3.69,-2.91,2022_Oct-Dec_DA_LMPs.zip
2,10/1/2022,AECI,Interface,MLC,-2.32,-2.84,-2.79,-2.83,-2.90,-3.14,...,-2.89,-2.99,-3.18,-3.91,-3.76,-3.30,-2.64,-2.63,-1.94,2022_Oct-Dec_DA_LMPs.zip
3,10/1/2022,AECI.ALTW,Loadzone,LMP,28.0,26.00,26.00,25.95,30.55,35.35,...,42.69,46.07,51.21,52.11,41.99,33.55,24.64,24.95,21.97,2022_Oct-Dec_DA_LMPs.zip
4,10/1/2022,AECI.ALTW,Loadzone,MCC,-6.99,-9.12,-8.51,-9.04,-7.23,-5.33,...,-6.13,-6.09,-4.66,-4.59,-5.0,-6.99,-10.60,-6.16,-7.10,2022_Oct-Dec_DA_LMPs.zip
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6612391,3/31/2025,WR.THAYERLD,Loadzone,MCC,-1.04,-0.92,-0.47,-0.13,0.20,0.38,...,-7.24,-8.45,-9.25,-5.74,-4.71,-1.76,-1.07,-0.25,-1.17,2025_Jan-Mar_DA_LMP.zip
6612392,3/31/2025,WR.THAYERLD,Loadzone,MLC,0.24,0.24,0.24,0.25,-0.17,0.13,...,0.24,0.25,0.31,-0.59,-1.35,-0.97,0.34,0.01,-0.25,2025_Jan-Mar_DA_LMP.zip
6612393,3/31/2025,YAD,Interface,LMP,25.02,24.26,24.05,25.38,28.07,35.41,...,32.35,33.67,39.92,59.8,72.4,53.90,44.67,40.72,34.82,2025_Jan-Mar_DA_LMP.zip
6612394,3/31/2025,YAD,Interface,MCC,0.5,0.32,0.61,0.93,1.77,1.78,...,-3.63,-4.2,-4.46,-2.06,-0.63,0.34,-0.31,0.14,2.06,2025_Jan-Mar_DA_LMP.zip


In [30]:
node = 'NSP.PRISL1'
set_nodes = set(all_df['NODE'])  # check column names
if node not in set_nodes:
    raise ValueError(f"Node {node} not found in the data.")

In [31]:
filtered_df = all_df[all_df["VALUE"] == 'LMP']  
filtered_df = filtered_df[filtered_df['NODE']==node]

In [13]:
filtered_df.head()

Unnamed: 0,MARKET_DAY,NODE,TYPE,VALUE,HE1,HE2,HE3,HE4,HE5,HE6,...,HE16,HE17,HE18,HE19,HE20,HE21,HE22,HE23,HE24,source_zip
4899,10/1/2022,MEC.PPWIND,Gennode,LMP,9.71,13.26,15.14,14.94,19.21,24.82,...,42.31,44.91,49.48,45.4,33.91,27.3,14.53,6.43,8.16,2022_Oct-Dec_DA_LMPs.zip
11967,10/2/2022,MEC.PPWIND,Gennode,LMP,12.02,14.21,16.73,19.69,22.44,24.7,...,42.2,45.1,55.93,53.41,34.35,22.22,8.85,3.57,2.29,2022_Oct-Dec_DA_LMPs.zip
19035,10/3/2022,MEC.PPWIND,Gennode,LMP,-4.28,-4.08,-1.03,-0.58,4.48,14.14,...,54.87,56.83,62.94,63.04,48.32,30.69,16.35,7.53,12.59,2022_Oct-Dec_DA_LMPs.zip
26103,10/4/2022,MEC.PPWIND,Gennode,LMP,11.56,12.13,14.47,17.08,23.48,31.06,...,64.98,64.1,63.7,66.79,58.16,51.04,44.61,42.33,40.88,2022_Oct-Dec_DA_LMPs.zip
33171,10/5/2022,MEC.PPWIND,Gennode,LMP,31.74,31.09,31.89,32.15,37.7,51.89,...,66.28,66.46,70.17,75.25,63.96,50.44,43.01,33.77,29.77,2022_Oct-Dec_DA_LMPs.zip


In [32]:
OUT_FILE  = ZIP_DIR / "Node_DA_nuclear.csv"


In [33]:
# -------------------------------------------------------------------
# 3.  Concatenate and save
# -------------------------------------------------------------------
filtered_df.to_csv(OUT_FILE, index=False)
print(f"\nMerged {len(frames)} files ⇒ {OUT_FILE}")


Merged 10 files ⇒ c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data\Node_DA_nuclear.csv


## Downloading RT data

In [19]:
ZIP_DIR   = Path.cwd()  / "../data"         # change if zips live elsewhere
print(f"Loading ZIPs from {ZIP_DIR}")
import csv
frames = []

# ------------------------------------------------------------------
# 1. pick only files whose name contains "_RT_"  (case-insensitive)
#    e.g. 2022_Oct-Dec_RT_LMPs.zip, 2023_Jan-Mar_RT_LMPs.zip, ...
# ------------------------------------------------------------------
for zpath in sorted(ZIP_DIR.glob("*.zip")):
    if "_RT_" not in zpath.name.upper():      # skip DA archives
        continue

    print(f"Loading {zpath.name} …")

    with zipfile.ZipFile(zpath) as zf:
        # we expect exactly ONE CSV inside each zip
        csv_candidates = [n for n in zf.namelist() if n.lower().endswith(".csv")]
        if len(csv_candidates) != 1:
            raise ValueError(f"{zpath} should contain exactly one CSV")
        csv_name = csv_candidates[0]
        print(csv_name)
        # with zf.open(csv_name) as f:
        #     df = pd.read_csv(f)
        #     df["source_zip"] = zpath.name     # keep provenance if useful
        #     frames.append(df)
        with zf.open(csv_name) as f:
            df = pd.read_csv(
                f,
                engine="python",        # more forgiving parser
                on_bad_lines="warn",    # keep going, print a warning
                quoting=csv.QUOTE_MINIMAL,
            )
            frames.append(df)

        print(f"  ✔ loaded {csv_name}")

# ------------------------------------------------------------------
# 2. concatenate all RT frames into one DataFrame
# ------------------------------------------------------------------
if not frames:
    raise RuntimeError("No RT zip files found!")
RT_long = pd.concat(frames, ignore_index=True)
print(f"\nMerged {len(frames)} RT files → {len(RT_long):,} rows")


Loading ZIPs from c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data
Loading 2022_Oct-Dec_RT_LMPs.zip …
2022_Oct-Dec_RT_LMPs.csv
  ✔ loaded 2022_Oct-Dec_RT_LMPs.csv
Loading 2023_Apr-Jun_RT_LMPs.zip …
2023_Apr-Jun_RT_LMPs.csv
  ✔ loaded 2023_Apr-Jun_RT_LMPs.csv
Loading 2023_Jan-Mar_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2023_Jul-Sep_RT_LMPs.zip …
RT.csv



  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.read_csv(

  df = pd.re

  ✔ loaded RT.csv
Loading 2023_Oct-Dec_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2024-Oct-Dec_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2024_Apr-Jun_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2024_Jan-Mar_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2024_Jul-Sep_RT_LMPs.zip …
RT.csv
  ✔ loaded RT.csv
Loading 2025_Jan-Mar_RT_LMP.zip …
RT.csv
  ✔ loaded RT.csv

Merged 10 RT files → 6,612,333 rows


In [20]:
RT_df = pd.concat(frames, ignore_index=True)

In [21]:
RT_df.head()

Unnamed: 0,MARKET_DAY,NODE,TYPE,VALUE,HE1,HE2,HE3,HE4,HE5,HE6,...,HE15,HE16,HE17,HE18,HE19,HE20,HE21,HE22,HE23,HE24
0,10/1/2022,AECI,Interface,LMP,36.73,27.97,30.64,35.98,39.39,42.04,...,47.82,52.34,47.25,49.76,50.74,44.74,34.09,34.1,22.08,25.39
1,10/1/2022,AECI,Interface,MCC,-7.41,-3.69,-2.16,-0.34,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.01,-1.8,-0.97,-5.16,-2.78
2,10/1/2022,AECI,Interface,MLC,-4.18,-2.82,-2.86,-3.01,-2.98,-3.02,...,-2.89,-3.01,-2.62,-2.71,-2.91,-3.15,-2.67,-2.77,-2.15,-2.04
3,10/1/2022,AECI.ALTW,Loadzone,LMP,39.92,29.02,32.2,38.28,41.91,44.77,...,50.03,54.39,48.75,51.47,52.74,46.73,35.74,35.6,20.68,25.27
4,10/1/2022,AECI.ALTW,Loadzone,MCC,-7.0,-4.5,-2.6,-0.32,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.01,-1.86,-1.53,-8.08,-4.32


In [34]:
filtered_df_RT = RT_df[RT_df["VALUE"] == 'LMP']  
filtered_df_RT = filtered_df_RT[filtered_df_RT['NODE']==node]
filtered_df_RT.head()

Unnamed: 0,MARKET_DAY,NODE,TYPE,VALUE,HE1,HE2,HE3,HE4,HE5,HE6,...,HE15,HE16,HE17,HE18,HE19,HE20,HE21,HE22,HE23,HE24
5946,10/1/2022,NSP.PRISL1,Gennode,LMP,14.05,11.51,19.84,31.18,37.66,40.5,...,45.85,50.02,45.07,47.8,49.04,43.31,36.52,32.67,20.43,23.12
13014,10/2/2022,NSP.PRISL1,Gennode,LMP,12.88,22.69,22.58,22.49,22.95,24.96,...,38.73,36.86,44.37,53.78,48.5,43.38,25.08,1.02,-6.6,-8.26
20082,10/3/2022,NSP.PRISL1,Gennode,LMP,-10.55,-11.88,-10.52,-6.69,-6.16,16.02,...,43.23,47.22,50.56,59.81,119.06,49.62,35.0,44.96,35.63,17.86
27150,10/4/2022,NSP.PRISL1,Gennode,LMP,29.64,22.18,30.77,25.79,25.39,25.98,...,52.08,51.58,57.09,68.03,60.75,112.68,46.41,43.09,44.59,44.35
34218,10/5/2022,NSP.PRISL1,Gennode,LMP,39.64,35.79,31.64,32.71,34.75,65.46,...,57.6,56.62,57.35,60.56,58.9,78.59,107.94,45.69,40.63,37.0


In [35]:
OUT_FILE_RT  = ZIP_DIR / "Node_RT_nuclear.csv"
filtered_df_RT.to_csv(OUT_FILE_RT, index=False)
print(f"\nMerged {len(frames)} files ⇒ {OUT_FILE_RT}")


Merged 10 files ⇒ c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data\Node_RT_nuclear.csv
