In [1]:
import os
import glob
import pandas as pd
import random

from IPython.display import clear_output

ROOT = ".." # Adjust to repository root

## Global Day-Ahead Electricity Price Dataset

This dataset presents a unified, cross-continental time-series day-ahead electricity prices compiled from major wholesale markets across Asia, Europe, North America, South America, and Oceania. The dataset offers a standardized format that supports time-series forecasting and enables robust comparative analysis across diverse global electricity markets.

Ullah, Md Habib; Reza, Sayed Mohsin; Gundapaneni, Lasya Madhuri; Balachander, Pranav; Babaiahgari, Bhanu; Khan, Abdullah Al Ahad (2025), “Global Day-Ahead Electricity Price Dataset”, Mendeley Data, V3, doi: 10.17632/s54n4tyyz4.3
https://data.mendeley.com/datasets/s54n4tyyz4/3

Creating a files index:

In [2]:
# Find all CSV files in the datasets/daep/ directory and subdirectories
csv_files = glob.glob(os.path.join(ROOT, 'datasets', 'daep', '**', '*.csv'), recursive=True)

# Create a list to store the results
results = []

# For each CSV file, get the file name and record count
for file_path in csv_files:
    file_name = os.path.basename(file_path)
    # Read the CSV file to get the record count
    df = pd.read_csv(file_path)
    record_count = len(df)
    results.append({'csv_name': file_name, 'record_count': record_count})
    print(f"Processed {file_name} with {record_count} records.")
    clear_output(wait=True)
    
    # Convert Timestamp column to datetime if it exists
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Save as pickle file with same name but .pkl extension
    pickle_file = os.path.join(ROOT, 'pkl', file_name.replace('.csv', '.pkl'))
    os.makedirs(os.path.dirname(pickle_file), exist_ok=True)
    df.to_pickle(pickle_file)

csvs_df = pd.DataFrame(results)

# Add a column with the pickle file name
csvs_df['pkl_name'] = csvs_df['csv_name'].str.replace('.csv', '.pkl')

display(csvs_df)

list_file_name = '0_daep_files_list.pkl'
csvs_df.to_pickle(os.path.join(ROOT, 'pkl', list_file_name))
print(f"Saved file list to {os.path.join(ROOT, 'pkl', list_file_name)}")

Unnamed: 0,csv_name,record_count,pkl_name
0,Additional_Information_by_Country.csv,49,Additional_Information_by_Country.pkl
1,Australia_AEMO_2006.csv,17519,Australia_AEMO_2006.pkl
2,Australia_AEMO_2007.csv,17520,Australia_AEMO_2007.pkl
3,Australia_AEMO_2008.csv,17568,Australia_AEMO_2008.pkl
4,Australia_AEMO_2009.csv,17520,Australia_AEMO_2009.pkl
...,...,...,...
501,USA_SPP_2021.csv,8759,USA_SPP_2021.pkl
502,USA_SPP_2022.csv,8759,USA_SPP_2022.pkl
503,USA_SPP_2023.csv,8759,USA_SPP_2023.pkl
504,USA_SPP_2024.csv,8778,USA_SPP_2024.pkl


Saved file list to ..\pkl\0_daep_files_list.pkl


Loading a saved files index:

In [3]:
list_file_name = '0_daep_files_list.pkl'
csvs_df = pd.read_pickle(os.path.join(ROOT, 'pkl', list_file_name))

### Files list exploration

Exploring random segments.

In [10]:
sample_size = 10
start_idx = random.randint(0, len(csvs_df) - sample_size)
display(csvs_df.iloc[start_idx:start_idx + sample_size])

Unnamed: 0,csv_name,record_count,pkl_name
20,Australia_AEMO_2025.csv,43489,Australia_AEMO_2025.pkl
21,Austria_ENTSO-E_2015.csv,8664,Austria_ENTSO-E_2015.pkl
22,Austria_ENTSO-E_2016.csv,8784,Austria_ENTSO-E_2016.pkl
23,Austria_ENTSO-E_2017.csv,8760,Austria_ENTSO-E_2017.pkl
24,Austria_ENTSO-E_2018.csv,8760,Austria_ENTSO-E_2018.pkl
25,Austria_ENTSO-E_2019.csv,35040,Austria_ENTSO-E_2019.pkl
26,Austria_ENTSO-E_2020.csv,35136,Austria_ENTSO-E_2020.pkl
27,Austria_ENTSO-E_2021.csv,35040,Austria_ENTSO-E_2021.pkl
28,Austria_ENTSO-E_2022.csv,35040,Austria_ENTSO-E_2022.pkl
29,Austria_ENTSO-E_2023.csv,35040,Austria_ENTSO-E_2023.pkl


Files with 2025 data:

In [11]:
csvs_2025 = csvs_df[csvs_df['csv_name'].str.contains('2025')]
print(len(csvs_2025), " files with 2025 data:")
display(csvs_2025)

14  files with 2025 data:


Unnamed: 0,csv_name,record_count,pkl_name
20,Australia_AEMO_2025.csv,43489,Australia_AEMO_2025.pkl
48,Brazil_ONS-CCEE_2025.csv,3696,Brazil_ONS-CCEE_2025.pkl
81,Canada_IESO_2025.csv,2880,Canada_IESO_2025.pkl
89,Chile_CEN_2025.csv,4381,Chile_CEN_2025.pkl
189,India_IEX_2025.csv,14496,India_IEX_2025.pkl
226,Japan_JEPX_2025.csv,8832,Japan_JEPX_2025.pkl
325,Singapore_NEMS_2025.csv,6768,Singapore_NEMS_2025.pkl
370,South Korea_KPX_2025.csv,4416,South Korea_KPX_2025.pkl
416,USA_CAISO_2025.csv,4199,USA_CAISO_2025.pkl
432,USA_ERCOT_2025.csv,3623,USA_ERCOT_2025.pkl


In [12]:
japan_jepx_2025 = pd.read_pickle(os.path.join(ROOT, 'pkl', 'Japan_JEPX_2025.pkl'))
display(japan_jepx_2025.head(6))
print("...")
display(japan_jepx_2025.tail(6))

Unnamed: 0,Timestamp,System Price,Hokkaido,Tohoku,Tokyo,Chubu,Hokuriko,Kansai,Chugoku,Shikoku,Kyushu
0,2025-01-01 00:00:00,12.8,13.51,13.51,13.51,13.51,10.45,10.45,10.45,10.45,10.45
1,2025-01-01 00:30:00,13.04,13.0,13.0,13.0,13.0,12.09,12.09,12.09,12.09,11.12
2,2025-01-01 01:00:00,12.76,12.42,12.42,12.42,12.42,12.42,12.42,12.42,12.42,10.82
3,2025-01-01 01:30:00,13.08,13.03,13.03,13.03,13.03,13.03,13.03,13.03,13.03,10.45
4,2025-01-01 02:00:00,12.57,12.42,12.42,12.42,12.42,12.09,12.09,12.09,12.09,10.6
5,2025-01-01 02:30:00,11.88,11.88,11.88,11.88,11.88,10.83,10.83,10.83,10.83,10.83


...


Unnamed: 0,Timestamp,System Price,Hokkaido,Tohoku,Tokyo,Chubu,Hokuriko,Kansai,Chugoku,Shikoku,Kyushu
8826,2025-07-03 21:00:00,16.59,16.59,16.59,16.59,16.59,16.59,16.59,16.59,16.59,16.59
8827,2025-07-03 21:30:00,16.21,16.21,16.21,16.21,16.21,16.21,16.21,16.21,16.21,16.21
8828,2025-07-03 22:00:00,15.76,15.76,15.76,15.76,15.76,15.76,15.76,15.76,15.76,15.76
8829,2025-07-03 22:30:00,14.22,14.22,14.22,14.22,14.22,14.22,14.22,14.22,14.22,14.22
8830,2025-07-03 23:00:00,13.95,13.95,13.95,13.95,13.95,13.95,13.95,13.95,13.95,13.95
8831,2025-07-03 23:30:00,13.37,13.92,13.92,13.92,11.5,11.5,11.5,11.5,11.5,11.5


## Worldwide Electricity Load Dataset

This dataset provides a comprehensive, global-scale collection of time-series electricity load data aggregated from major power markets across Asia, Europe, North America, and Oceania. It captures system-level demand profiles at consistent temporal resolution, enabling detailed analysis of consumption patterns, peak-demand behavior, and seasonal variability across regions. The dataset is structured in a standardized and harmonized format to support load forecasting, cross-market comparative studies, and data-driven research in power system planning, operation, and energy market analytics.

Ullah, Md Habib; Reza, Sayed Mohsin; Khan, Abdullah Al Ahad; Salekin, Siraj Us; Al-Bayati, Ali M. S.; Babaiahgari, Bhanu; Romero, David González; Molina, Jesús Rodríguez (2026), “Worldwide Electricity Load Dataset ”, Mendeley Data, V1, doi: 10.17632/ybggkc58fz.1

https://data.mendeley.com/datasets/ybggkc58fz/1

In [13]:
# Find all CSV files in the datasets/weld/ directory and subdirectories
csv_files = glob.glob(os.path.join(ROOT, 'datasets', 'weld', '**', '*.csv'), recursive=True)

# Create a list to store the results
results = []

# For each CSV file, get the file name and record count
for file_path in csv_files:
    file_name = os.path.basename(file_path)
    # Read the CSV file to get the record count
    df = pd.read_csv(file_path)
    record_count = len(df)
    results.append({'csv_name': file_name, 'record_count': record_count})
    print(f"Processed {file_name} with {record_count} records.")
    clear_output(wait=True)
    
    # Convert Timestamp column to datetime if it exists
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Save as pickle file with same name but .pkl extension
    pickle_file = os.path.join(ROOT, 'pkl', file_name.replace('.csv', '.pkl'))
    os.makedirs(os.path.dirname(pickle_file), exist_ok=True)
    df.to_pickle(pickle_file)

csvs_df = pd.DataFrame(results)

# Add a column with the pickle file name
csvs_df['pkl_name'] = csvs_df['csv_name'].str.replace('.csv', '.pkl')

display(csvs_df)

list_file_name = '0_weld_files_list.pkl'
csvs_df.to_pickle(os.path.join(ROOT, 'pkl', list_file_name))
print(f"Saved file list to {os.path.join(ROOT, 'pkl', list_file_name)}")

Unnamed: 0,csv_name,record_count,pkl_name
0,Additional_Information.csv,42,Additional_Information.pkl
1,Australia_AEMO_2006.csv,17519,Australia_AEMO_2006.pkl
2,Australia_AEMO_2007.csv,17520,Australia_AEMO_2007.pkl
3,Australia_AEMO_2008.csv,17568,Australia_AEMO_2008.pkl
4,Australia_AEMO_2009.csv,17520,Australia_AEMO_2009.pkl
...,...,...,...
484,USA_SPP_2021.csv,8758,USA_SPP_2021.pkl
485,USA_SPP_2022.csv,8749,USA_SPP_2022.pkl
486,USA_SPP_2023.csv,8757,USA_SPP_2023.pkl
487,USA_SPP_2024.csv,8782,USA_SPP_2024.pkl


Saved file list to ..\pkl\0_weld_files_list.pkl
