In [1]:
import os
import glob
import pandas as pd
import random

from IPython.display import clear_output

ROOT = ".." # Adjust to repository root

## Global Day-Ahead Electricity Price Dataset

This dataset presents a unified, cross-continental time-series day-ahead electricity prices compiled from major wholesale markets across Asia, Europe, North America, South America, and Oceania. The dataset offers a standardized format that supports time-series forecasting and enables robust comparative analysis across diverse global electricity markets.

Ullah, Md Habib; Reza, Sayed Mohsin; Gundapaneni, Lasya Madhuri; Balachander, Pranav; Babaiahgari, Bhanu; Khan, Abdullah Al Ahad (2025), “Global Day-Ahead Electricity Price Dataset”, Mendeley Data, V3, doi: 10.17632/s54n4tyyz4.3
https://data.mendeley.com/datasets/s54n4tyyz4/3

Creating a files index:

In [50]:
# Find all CSV files in the datasets/daep/ directory and subdirectories
csv_files = glob.glob(os.path.join(ROOT, 'datasets', 'daep', '**', '*.csv'), recursive=True)

# Create a list to store the results
results = []

# For each CSV file, get the file name and record count
for file_path in csv_files:
    file_name = os.path.basename(file_path)
    # Read the CSV file to get the record count
    df = pd.read_csv(file_path)
    record_count = len(df)
    results.append({'csv_name': file_name, 'record_count': record_count})
    print(f"Processed {file_name} with {record_count} records.")
    clear_output(wait=True)
    
    # Convert Timestamp column to datetime if it exists
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    pickle_file = os.path.join(ROOT, 'pkl', 'daep', file_name.replace('.csv', '.pkl'))
    os.makedirs(os.path.dirname(pickle_file), exist_ok=True)
    df.to_pickle(pickle_file)

csvs_df = pd.DataFrame(results)

# Add a column with the pickle file name
csvs_df['pkl_name'] = csvs_df['csv_name'].str.replace('.csv', '.pkl')

display(csvs_df)

list_file_name = '0_daep_files_list.pkl'
csvs_df.to_pickle(os.path.join(ROOT, 'pkl', 'daep', list_file_name))
print(f"Saved file list to {os.path.join(ROOT, 'pkl', 'daep', list_file_name)}")

Unnamed: 0,csv_name,record_count,pkl_name
0,Additional_Information_by_Country.csv,49,Additional_Information_by_Country.pkl
1,Australia_AEMO_2006.csv,17519,Australia_AEMO_2006.pkl
2,Australia_AEMO_2007.csv,17520,Australia_AEMO_2007.pkl
3,Australia_AEMO_2008.csv,17568,Australia_AEMO_2008.pkl
4,Australia_AEMO_2009.csv,17520,Australia_AEMO_2009.pkl
...,...,...,...
501,USA_SPP_2021.csv,8759,USA_SPP_2021.pkl
502,USA_SPP_2022.csv,8759,USA_SPP_2022.pkl
503,USA_SPP_2023.csv,8759,USA_SPP_2023.pkl
504,USA_SPP_2024.csv,8778,USA_SPP_2024.pkl


Saved file list to ..\pkl\daep\0_daep_files_list.pkl


Loading a saved files index:

In [51]:
list_file_name = '0_daep_files_list.pkl'
csvs_df = pd.read_pickle(os.path.join(ROOT, 'pkl', 'daep', list_file_name))

### Files list exploration

Exploring random segments.

In [53]:
sample_size = 10
start_idx = random.randint(0, len(csvs_df) - sample_size)
display(csvs_df.iloc[start_idx:start_idx + sample_size])

Unnamed: 0,csv_name,record_count,pkl_name
264,Netherlands_ENTSO-E_2020.csv,8784,Netherlands_ENTSO-E_2020.pkl
265,Netherlands_ENTSO-E_2021.csv,8760,Netherlands_ENTSO-E_2021.pkl
266,Netherlands_ENTSO-E_2022.csv,8760,Netherlands_ENTSO-E_2022.pkl
267,Netherlands_ENTSO-E_2023.csv,8760,Netherlands_ENTSO-E_2023.pkl
268,Netherlands_ENTSO-E_2024.csv,6672,Netherlands_ENTSO-E_2024.pkl
269,North Macedonia_ENTSO-E_2023.csv,5640,North Macedonia_ENTSO-E_2023.pkl
270,North Macedonia_ENTSO-E_2024.csv,6672,North Macedonia_ENTSO-E_2024.pkl
271,Norway_ENTSO-E_2015.csv,8759,Norway_ENTSO-E_2015.pkl
272,Norway_ENTSO-E_2016.csv,8783,Norway_ENTSO-E_2016.pkl
273,Norway_ENTSO-E_2017.csv,8759,Norway_ENTSO-E_2017.pkl


Files with 2025 data:

In [54]:
csvs_2025 = csvs_df[csvs_df['csv_name'].str.contains('2025')]
print(len(csvs_2025), " files with 2025 price data:")
display(csvs_2025)

14  files with 2025 price data:


Unnamed: 0,csv_name,record_count,pkl_name
20,Australia_AEMO_2025.csv,43489,Australia_AEMO_2025.pkl
48,Brazil_ONS-CCEE_2025.csv,3696,Brazil_ONS-CCEE_2025.pkl
81,Canada_IESO_2025.csv,2880,Canada_IESO_2025.pkl
89,Chile_CEN_2025.csv,4381,Chile_CEN_2025.pkl
189,India_IEX_2025.csv,14496,India_IEX_2025.pkl
226,Japan_JEPX_2025.csv,8832,Japan_JEPX_2025.pkl
325,Singapore_NEMS_2025.csv,6768,Singapore_NEMS_2025.pkl
370,South Korea_KPX_2025.csv,4416,South Korea_KPX_2025.pkl
416,USA_CAISO_2025.csv,4199,USA_CAISO_2025.pkl
432,USA_ERCOT_2025.csv,3623,USA_ERCOT_2025.pkl


File content example:

In [65]:
italy_price_data_2025 = pd.read_pickle(os.path.join(ROOT, 'pkl', 'daep', 'Italy_ENTSO-E_2018.pkl'))
display(italy_price_data_2025.head(6))
print("...")
display(italy_price_data_2025.tail(6))

Unnamed: 0,Timestamp,Centre-North,Centre-South,North,SACOAC,SACODC,Sardinia,Sicily,South
0,2018-01-01 00:00:00,45.73,45.73,45.73,45.73,45.73,45.73,46.99,45.73
1,2018-01-01 01:00:00,44.16,44.16,44.16,44.16,44.16,44.16,44.16,44.16
2,2018-01-01 02:00:00,42.24,42.24,42.24,42.24,42.24,42.24,42.24,42.24
3,2018-01-01 03:00:00,39.29,39.29,39.29,39.29,39.29,39.29,39.29,39.29
4,2018-01-01 04:00:00,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0
5,2018-01-01 05:00:00,41.99,41.99,41.99,41.99,41.99,41.99,41.99,41.99


...


Unnamed: 0,Timestamp,Centre-North,Centre-South,North,SACOAC,SACODC,Sardinia,Sicily,South
8753,2018-12-31 18:00:00,69.9,69.9,69.9,69.9,69.9,69.9,87.0,69.9
8754,2018-12-31 19:00:00,70.06,70.06,70.06,70.06,70.06,70.06,87.27,70.06
8755,2018-12-31 20:00:00,69.27,69.27,69.27,69.27,69.27,69.27,87.0,69.27
8756,2018-12-31 21:00:00,59.84,59.84,59.84,59.84,59.84,59.84,59.84,59.84
8757,2018-12-31 22:00:00,55.7,55.7,55.7,55.7,55.7,55.7,55.7,55.7
8758,2018-12-31 23:00:00,49.8,49.8,49.8,49.8,49.8,49.8,49.8,49.8


## Worldwide Electricity Load Dataset

This dataset provides a comprehensive, global-scale collection of time-series electricity load data aggregated from major power markets across Asia, Europe, North America, and Oceania. It captures system-level demand profiles at consistent temporal resolution, enabling detailed analysis of consumption patterns, peak-demand behavior, and seasonal variability across regions. The dataset is structured in a standardized and harmonized format to support load forecasting, cross-market comparative studies, and data-driven research in power system planning, operation, and energy market analytics.

Ullah, Md Habib; Reza, Sayed Mohsin; Khan, Abdullah Al Ahad; Salekin, Siraj Us; Al-Bayati, Ali M. S.; Babaiahgari, Bhanu; Romero, David González; Molina, Jesús Rodríguez (2026), “Worldwide Electricity Load Dataset ”, Mendeley Data, V1, doi: 10.17632/ybggkc58fz.1

https://data.mendeley.com/datasets/ybggkc58fz/1

In [56]:
# Find all CSV files in the datasets/weld/ directory and subdirectories
csv_files = glob.glob(os.path.join(ROOT, 'datasets', 'weld', '**', '*.csv'), recursive=True)

# Create a list to store the results
results = []

# For each CSV file, get the file name and record count
for file_path in csv_files:
    file_name = os.path.basename(file_path)
    # Read the CSV file to get the record count
    df = pd.read_csv(file_path)
    record_count = len(df)
    results.append({'csv_name': file_name, 'record_count': record_count})
    print(f"Processed {file_name} with {record_count} records.")
    clear_output(wait=True)
    
    # Convert Timestamp column to datetime if it exists
    if 'Timestamp' in df.columns:
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Save as pickle file with same name but .pkl extension
    pickle_file = os.path.join(ROOT, 'pkl', 'weld', file_name.replace('.csv', '.pkl'))
    os.makedirs(os.path.dirname(pickle_file), exist_ok=True)
    df.to_pickle(pickle_file)

csvs_df = pd.DataFrame(results)

# Add a column with the pickle file name
csvs_df['pkl_name'] = csvs_df['csv_name'].str.replace('.csv', '.pkl')

display(csvs_df)

list_file_name = '0_weld_files_list.pkl'
csvs_df.to_pickle(os.path.join(ROOT, 'pkl', 'weld', list_file_name))
print(f"Saved file list to {os.path.join(ROOT, 'pkl', 'weld', list_file_name)}")

Unnamed: 0,csv_name,record_count,pkl_name
0,Additional_Information.csv,42,Additional_Information.pkl
1,Australia_AEMO_2006.csv,17519,Australia_AEMO_2006.pkl
2,Australia_AEMO_2007.csv,17520,Australia_AEMO_2007.pkl
3,Australia_AEMO_2008.csv,17568,Australia_AEMO_2008.pkl
4,Australia_AEMO_2009.csv,17520,Australia_AEMO_2009.pkl
...,...,...,...
484,USA_SPP_2021.csv,8758,USA_SPP_2021.pkl
485,USA_SPP_2022.csv,8749,USA_SPP_2022.pkl
486,USA_SPP_2023.csv,8757,USA_SPP_2023.pkl
487,USA_SPP_2024.csv,8782,USA_SPP_2024.pkl


Saved file list to ..\pkl\weld\0_weld_files_list.pkl


Loading a saved files index:

In [57]:
list_file_name = '0_weld_files_list.pkl'
csvs_df = pd.read_pickle(os.path.join(ROOT, 'pkl', 'weld', list_file_name))

### Files list exploration

Exploring random segments.

In [None]:
sample_size = 10
start_idx = random.randint(0, len(csvs_df) - sample_size)
# start_idx = 259
display(csvs_df.iloc[start_idx:start_idx + sample_size])

Unnamed: 0,csv_name,record_count,pkl_name
259,Netherlands_ENTSO-E_2024.csv,35140,Netherlands_ENTSO-E_2024.pkl
260,Netherlands_ENTSO-E_2025.csv,15354,Netherlands_ENTSO-E_2025.pkl
261,North Macedonia_ENTSO-E_2015.csv,8761,North Macedonia_ENTSO-E_2015.pkl
262,North Macedonia_ENTSO-E_2016.csv,8785,North Macedonia_ENTSO-E_2016.pkl
263,North Macedonia_ENTSO-E_2017.csv,8761,North Macedonia_ENTSO-E_2017.pkl
264,North Macedonia_ENTSO-E_2018.csv,8761,North Macedonia_ENTSO-E_2018.pkl
265,North Macedonia_ENTSO-E_2019.csv,8761,North Macedonia_ENTSO-E_2019.pkl
266,North Macedonia_ENTSO-E_2020.csv,8785,North Macedonia_ENTSO-E_2020.pkl
267,North Macedonia_ENTSO-E_2021.csv,8761,North Macedonia_ENTSO-E_2021.pkl
268,North Macedonia_ENTSO-E_2022.csv,8761,North Macedonia_ENTSO-E_2022.pkl


Files with 2025 data:

In [61]:
csvs_2025 = csvs_df[csvs_df['csv_name'].str.contains('2025')]
print(len(csvs_2025), " files with 2025 load data:")
display(csvs_2025)

41  files with 2025 load data:


Unnamed: 0,csv_name,record_count,pkl_name
20,Australia_AEMO_2025.csv,43489,Australia_AEMO_2025.pkl
31,Austria_ENTSO-E_2025.csv,15346,Austria_ENTSO-E_2025.pkl
42,Belgium_ENTSO-E_2025.csv,15347,Belgium_ENTSO-E_2025.pkl
53,Bulgaria_ENTSO-E_2025.csv,3836,Bulgaria_ENTSO-E_2025.pkl
76,CANADA_IESO_2025.csv,3815,CANADA_IESO_2025.pkl
87,Croatia_ENTSO-E_2025.csv,15347,Croatia_ENTSO-E_2025.pkl
98,Czech Republic_ENTSO-E_2025.csv,15348,Czech Republic_ENTSO-E_2025.pkl
109,Denmark_ENTSO-E_2025.csv,3837,Denmark_ENTSO-E_2025.pkl
120,Estonia_ENTSO-E_2025.csv,3837,Estonia_ENTSO-E_2025.pkl
131,Finland_ENTSO-E_2025.csv,15348,Finland_ENTSO-E_2025.pkl


File content example:

In [66]:
italy_load_data_2025 = pd.read_pickle(os.path.join(ROOT, 'pkl', 'weld', 'Italy_ENTSO-E_2018.pkl'))
display(italy_load_data_2025.head(6))
print("...")
display(italy_load_data_2025.tail(6))

Unnamed: 0,Timestamp,Italy
0,2018-01-01 00:00:00,23071.0
1,2018-01-01 01:00:00,22305.0
2,2018-01-01 02:00:00,20971.0
3,2018-01-01 03:00:00,19875.0
4,2018-01-01 04:00:00,19074.0
5,2018-01-01 05:00:00,19346.0


...


Unnamed: 0,Timestamp,Italy
8755,2018-12-31 18:00:00,34316.0
8756,2018-12-31 19:00:00,33331.0
8757,2018-12-31 20:00:00,30839.0
8758,2018-12-31 21:00:00,28301.0
8759,2018-12-31 22:00:00,26516.0
8760,2018-12-31 23:00:00,24941.0
