# Downloading DA data

In [None]:
# -------------------------------------------------------------------
# concat_zip_csvs.py   –  Run from the folder that holds the ZIP files
# -------------------------------------------------------------------
import zipfile
from pathlib import Path
import pandas as pd

# -------------------------------------------------------------------
# 1.  Where are the ZIPs?
# -------------------------------------------------------------------
ZIP_DIR   = Path.cwd()  / "../data"         # change if zips live elsewhere
print(f"Loading ZIPs from {ZIP_DIR}")

frames = []                     # accumulate individual DataFrames
# -------------------------------------------------------------------
# 2.  Loop over every *.zip in that folder
# -------------------------------------------------------------------
for zpath in sorted(ZIP_DIR.glob("*.zip")):   # sort for deterministic order
    print(f"Loading {zpath.name}...")
    with zipfile.ZipFile(zpath) as zf:
        # assume exactly ONE csv inside
        csv_name = [n for n in zf.namelist() if n.lower().endswith(".csv")]
        if len(csv_name) != 1:
            raise ValueError(f"{zpath} should contain exactly one CSV")
        csv_name = csv_name[0]

        # read the CSV directly into pandas
        with zf.open(csv_name) as f:
            df = pd.read_csv(f)              # pass file-like object
            print(df.head())
            df["source_zip"] = zpath.name    # optional provenance column
            frames.append(df)

        print(f"✔ loaded {csv_name} from {zpath.name}")




Loading ZIPs from c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data
Loading 2022_Oct-Dec_DA_LMPs.zip...
  MARKET_DAY       NODE       TYPE VALUE    HE1    HE2    HE3    HE4    HE5  \
0  10/1/2022       AECI  Interface   LMP  25.34  24.61  25.60  24.66  27.42   
1  10/1/2022       AECI  Interface   MCC  -9.42  -8.52  -6.95  -8.35  -8.25   
2  10/1/2022       AECI  Interface   MLC  -2.32  -2.84  -2.79  -2.83  -2.90   
3  10/1/2022  AECI.ALTW   Loadzone   LMP  28.00  26.00  26.00  25.95  30.55   
4  10/1/2022  AECI.ALTW   Loadzone   MCC  -6.99  -9.12  -8.51  -9.04  -7.23   

     HE6  ...   HE15   HE16   HE17   HE18   HE19   HE20   HE21   HE22   HE23  \
0  32.57  ...  42.68  45.05  47.91  52.38  52.04  41.39  34.38  32.99  26.14   
1  -5.83  ...  -2.78  -3.33  -3.32  -2.50  -2.45  -3.70  -4.15  -0.54  -3.69   
2  -3.14  ...  -2.96  -2.89  -2.99  -3.18  -3.91  -3.76  -3.30  -2.64  -2.63   
3  35.35  ...  40.83  42.69  46.07  51.21  52.11  41.99  33.55  2

  df = pd.read_csv(f)              # pass file-like object


  MARKET_DAY       NODE       TYPE VALUE    HE1    HE2    HE3    HE4    HE5  \
0   7/1/2023       AECI  Interface   LMP  23.46  20.72  19.18  18.06  17.87   
1   7/1/2023       AECI  Interface   MCC  -4.28  -2.65  -2.27  -2.32  -2.32   
2   7/1/2023       AECI  Interface   MLC  -0.94  -0.64  -0.58  -0.55  -0.69   
3   7/1/2023  AECI.ALTW   Loadzone   LMP  29.22  24.33  22.24  21.16  21.00   
4   7/1/2023  AECI.ALTW   Loadzone   MCC   1.25   0.65   0.44   0.45   0.45   

     HE6  ...   HE15   HE16   HE17   HE18   HE19   HE20   HE21   HE22   HE23  \
0  17.53  ...  43.44  47.78  54.12  53.37  46.95  38.47  34.39  29.94  25.68   
1  -2.84  ...  -3.55  -4.25  -3.82  -3.26  -2.90  -3.74  -4.05  -3.64  -3.13   
2  -0.69  ...  -0.46  -0.53  -1.96  -0.61  -0.18  -1.06  -0.32  -0.40  -0.50   
3  21.43  ...  44.43  55.46  57.02  59.75  49.70  45.27  37.34  36.09  30.40   
4   0.70  ...  -4.93   0.78  -3.01   0.26  -2.57   0.16  -2.27   1.35   0.65   

    HE24  
0  24.76  
1  -1.74  
2  -0.90  


  df = pd.read_csv(f)              # pass file-like object


  MARKET_DAY       NODE       TYPE VALUE    HE1    HE2    HE3    HE4    HE5  \
0   4/1/2024       AECI  Interface   LMP   8.19   8.29   8.16   7.72  10.79   
1   4/1/2024       AECI  Interface   MCC  -6.34  -5.08  -5.77  -5.15  -6.81   
2   4/1/2024       AECI  Interface   MLC  -0.77  -0.80  -0.94  -0.74  -1.10   
3   4/1/2024  AECI.ALTW   Loadzone   LMP  20.87  21.04  19.52  20.61  25.56   
4   4/1/2024  AECI.ALTW   Loadzone   MCC   6.37   7.48   5.32   7.64   7.62   

     HE6  ...   HE15   HE16   HE17   HE18   HE19   HE20   HE21   HE22   HE23  \
0  15.17  ...   19.5  18.46  20.99  22.32  24.34  25.46  20.04  16.76  14.56   
1  -8.44  ...  -0.71  -2.23   -1.2  -1.71  -2.21  -2.64  -3.33  -2.98  -3.68   
2  -1.42  ...  -1.25  -1.28  -1.38  -1.49  -1.64  -1.74  -1.44  -1.23  -1.17   
3  32.53  ...  29.59  33.33  36.36  38.06   38.2  43.46  38.28  32.64  30.46   
4   8.44  ...   9.11  12.36  13.86   13.7  11.29  14.98  14.59  12.53  11.83   

    HE24  
0  11.79  
1  -4.91  
2  -0.62  


  df = pd.read_csv(f)              # pass file-like object


  MARKET_DAY       NODE       TYPE VALUE    HE1    HE2    HE3    HE4    HE5  \
0   1/1/2024       AECI  Interface   LMP  19.57  18.89  20.36  20.11  20.68   
1   1/1/2024       AECI  Interface   MCC  -2.44  -2.25  -1.37  -1.54  -1.35   
2   1/1/2024       AECI  Interface   MLC  -0.95  -1.31  -1.03  -0.72  -0.73   
3   1/1/2024  AECI.ALTW   Loadzone   LMP  25.26  22.46  22.12  22.05  22.31   
4   1/1/2024  AECI.ALTW   Loadzone   MCC   1.43   1.41   0.86   0.97   0.86   

     HE6  ...   HE15   HE16   HE17   HE18   HE19   HE20   HE21   HE22   HE23  \
0  20.60  ...  23.09  22.79  23.29  28.01  27.91  22.09  13.27  11.24   8.70   
1  -1.53  ...   1.23   0.61  -1.10  -2.32  -1.90  -3.21  -9.36  -9.29  -9.63   
2  -0.81  ...  -0.75  -0.86  -0.98  -0.85  -2.06  -0.91  -1.58  -1.13  -0.96   
3  22.97  ...  21.72  21.43  21.41  25.19  25.63  20.44  19.93  22.72  21.97   
4   0.94  ...   0.42  -0.29  -2.65  -5.81  -6.40  -5.24  -2.39   3.04   3.44   

    HE24  
0   7.17  
1  -9.75  
2  -1.04  


  df = pd.read_csv(f)              # pass file-like object


  MARKET_DAY       NODE       TYPE VALUE    HE1    HE2    HE3    HE4    HE5  \
0   1/1/2025       AECI  Interface   LMP  21.43  19.51  20.19  19.33  20.26   
1   1/1/2025       AECI  Interface   MCC  -0.30  -1.77  -0.19  -0.84  -0.23   
2   1/1/2025       AECI  Interface   MLC  -1.04  -1.03  -0.97  -0.76  -0.98   
3   1/1/2025  AECI.ALTW   Loadzone   LMP  17.44  17.08  16.46  15.61  16.67   
4   1/1/2025  AECI.ALTW   Loadzone   MCC  -4.59  -4.49  -4.19  -4.43  -4.10   

     HE6  ...   HE15   HE16   HE17   HE18   HE19   HE20   HE21   HE22   HE23  \
0  20.87  ...  22.06  23.08  27.24  32.86  42.47  30.47  30.32  30.63  32.38   
1  -0.51  ...  -0.52  -0.23  -0.29  -3.17  -2.94  -6.81  -6.21  -5.54  -3.24   
2  -1.01  ...  -1.12  -1.16  -1.28  -1.39  -1.66  -1.98  -1.94  -1.84  -0.33   
3  17.57  ...  22.12  23.21  27.56  36.84  46.41  38.06  37.27  36.07  36.13   
4  -4.10  ...  -0.66  -0.30  -0.43  -0.04  -0.33  -0.53  -0.54  -0.63   0.26   

    HE24  
0  27.78  
1  -5.85  
2  -1.48  


In [21]:
all_df = pd.concat(frames, ignore_index=True)

In [23]:
filtered_df = all_df[all_df["VALUE"] == 'LMP']  
filtered_df = filtered_df[filtered_df['NODE']=='NSP.NWELOAD']

In [26]:
OUT_FILE  = ZIP_DIR / "Node_DA.csv"


In [27]:
# -------------------------------------------------------------------
# 3.  Concatenate and save
# -------------------------------------------------------------------
filtered_df.to_csv(OUT_FILE, index=False)
print(f"\nMerged {len(frames)} files ⇒ {OUT_FILE}")


Merged 10 files ⇒ c:\Users\benja\OneDrive\Bureau\UchicagoMSFM\Financing_Grid_final_project\code\..\data\Node_DA.csv
