#### Library

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import os

#### Data Loading

In [2]:
# Folder containing CSV files
folder_path = "/home/kevin/Downloads/BESS/data/raw/2023"

df_list = []
# Loop through each file in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        file_path = os.path.join(folder_path, file)
        df_name = os.path.splitext(file)[0].split('20')[0]  # Remove the .csv extension for naming
        globals()[df_name] = pd.read_csv(file_path)  # Create a variable dynamically
        print(f"Loaded DataFrame: {df_name}, Shape: {globals()[df_name].shape}")
        df_list.append(df_name)

# Sort the list of DataFrames
df_list.sort()


Loaded DataFrame: AIL_, Shape: (8760, 4)
Loaded DataFrame: temperature_edmonton_, Shape: (8720, 3)
Loaded DataFrame: windspeed_fortmc_, Shape: (8752, 3)
Loaded DataFrame: solar_generation_, Shape: (8759, 3)
Loaded DataFrame: temperature_fortmc_, Shape: (8752, 3)
Loaded DataFrame: price_, Shape: (8760, 5)
Loaded DataFrame: windspeed_edmonton_, Shape: (8720, 3)
Loaded DataFrame: wind_generation_, Shape: (8759, 3)
Loaded DataFrame: temperature_calgary_, Shape: (8758, 3)
Loaded DataFrame: windspeed_calgary_, Shape: (8758, 3)


In [3]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0.1,Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,0,2023-01-01 00:00,9824,9832
1,1,2023-01-01 01:00,9712,9723
2,2,2023-01-01 02:00,9623,9634
3,3,2023-01-01 03:00,9578,9578
4,4,2023-01-01 04:00,9599,9541



DataFrame: price_


Unnamed: 0.1,Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,0,2023-01-01 00:00,80.55,88.39,304.67
1,1,2023-01-01 01:00,80.84,102.36,304.57
2,2,2023-01-01 02:00,80.63,80.82,304.54
3,3,2023-01-01 03:00,79.76,80.61,304.39
4,4,2023-01-01 04:00,79.53,77.88,303.67



DataFrame: solar_generation_


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,17518,2023-01-01 00:00:00,0.0
1,17519,2023-01-01 01:00:00,0.0
2,17520,2023-01-01 02:00:00,0.0
3,17521,2023-01-01 03:00:00,0.0
4,17522,2023-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,64,2023-01-01 00:00:00,-4.5
1,65,2023-01-01 01:00:00,-2.5
2,0,2023-01-01 02:00:00,-3.6
3,1,2023-01-01 03:00:00,-3.9
4,2,2023-01-01 04:00:00,-5.8



DataFrame: temperature_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,7,2023-01-01 00:00:00,-11.9
1,8,2023-01-01 01:00:00,-12.8
2,9,2023-01-01 02:00:00,-10.7
3,10,2023-01-01 03:00:00,-7.8
4,11,2023-01-01 04:00:00,-7.9



DataFrame: temperature_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,1,2023-01-01 00:00:00,-13.4
1,2,2023-01-01 01:00:00,-11.0
2,3,2023-01-01 02:00:00,-9.4
3,4,2023-01-01 03:00:00,-8.7
4,5,2023-01-01 04:00:00,-9.0



DataFrame: wind_generation_


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,17518,2023-01-01 00:00:00,780.206753
1,17519,2023-01-01 01:00:00,732.207446
2,17520,2023-01-01 02:00:00,743.88406
3,17521,2023-01-01 03:00:00,759.293766
4,17522,2023-01-01 04:00:00,746.124896



DataFrame: windspeed_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,64,2023-01-01 00:00:00,3.0
1,65,2023-01-01 01:00:00,4.0
2,0,2023-01-01 02:00:00,9.0
3,1,2023-01-01 03:00:00,8.0
4,2,2023-01-01 04:00:00,1.0



DataFrame: windspeed_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,7,2023-01-01 00:00:00,6
1,8,2023-01-01 01:00:00,5
2,9,2023-01-01 02:00:00,6
3,10,2023-01-01 03:00:00,6
4,11,2023-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,1,2023-01-01 00:00:00,4
1,2,2023-01-01 01:00:00,6
2,3,2023-01-01 02:00:00,8
3,4,2023-01-01 03:00:00,11
4,5,2023-01-01 04:00:00,13


#### Column Name Adjustment

In [4]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    globals()[df_name].drop(['Unnamed: 0'], axis=1, inplace=True)
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,2023-01-01 00:00,9824,9832
1,2023-01-01 01:00,9712,9723
2,2023-01-01 02:00,9623,9634
3,2023-01-01 03:00,9578,9578
4,2023-01-01 04:00,9599,9541



DataFrame: price_


Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,2023-01-01 00:00,80.55,88.39,304.67
1,2023-01-01 01:00,80.84,102.36,304.57
2,2023-01-01 02:00,80.63,80.82,304.54
3,2023-01-01 03:00,79.76,80.61,304.39
4,2023-01-01 04:00,79.53,77.88,303.67



DataFrame: solar_generation_


Unnamed: 0,Date (MPT),Volume
0,2023-01-01 00:00:00,0.0
1,2023-01-01 01:00:00,0.0
2,2023-01-01 02:00:00,0.0
3,2023-01-01 03:00:00,0.0
4,2023-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2023-01-01 00:00:00,-4.5
1,2023-01-01 01:00:00,-2.5
2,2023-01-01 02:00:00,-3.6
3,2023-01-01 03:00:00,-3.9
4,2023-01-01 04:00:00,-5.8



DataFrame: temperature_edmonton_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2023-01-01 00:00:00,-11.9
1,2023-01-01 01:00:00,-12.8
2,2023-01-01 02:00:00,-10.7
3,2023-01-01 03:00:00,-7.8
4,2023-01-01 04:00:00,-7.9



DataFrame: temperature_fortmc_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2023-01-01 00:00:00,-13.4
1,2023-01-01 01:00:00,-11.0
2,2023-01-01 02:00:00,-9.4
3,2023-01-01 03:00:00,-8.7
4,2023-01-01 04:00:00,-9.0



DataFrame: wind_generation_


Unnamed: 0,Date (MPT),Volume
0,2023-01-01 00:00:00,780.206753
1,2023-01-01 01:00:00,732.207446
2,2023-01-01 02:00:00,743.88406
3,2023-01-01 03:00:00,759.293766
4,2023-01-01 04:00:00,746.124896



DataFrame: windspeed_calgary_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2023-01-01 00:00:00,3.0
1,2023-01-01 01:00:00,4.0
2,2023-01-01 02:00:00,9.0
3,2023-01-01 03:00:00,8.0
4,2023-01-01 04:00:00,1.0



DataFrame: windspeed_edmonton_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2023-01-01 00:00:00,6
1,2023-01-01 01:00:00,5
2,2023-01-01 02:00:00,6
3,2023-01-01 03:00:00,6
4,2023-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2023-01-01 00:00:00,4
1,2023-01-01 01:00:00,6
2,2023-01-01 02:00:00,8
3,2023-01-01 03:00:00,11
4,2023-01-01 04:00:00,13


In [5]:
# Rename columns
price_.rename(columns={'begin_datetime_mpt': 'datetime_', 'rolling_30day_avg': 'rolling_30day_avg_price'}, inplace=True)

AIL_.rename(columns={'begin_datetime_mpt': 'datetime_'}, inplace=True)

solar_generation_.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'solar_generation'}, inplace=True)

wind_generation_.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'wind_generation'}, inplace=True)

temperature_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_calgary'}, inplace=True)

temperature_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_edmonton'}, inplace=True)

temperature_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_fortmc'}, inplace=True)

windspeed_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_calgary'}, inplace=True)

windspeed_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_edmonton'}, inplace=True)

windspeed_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_fortmc'}, inplace=True)

In [6]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2023-01-01 00:00,9824,9832
1,2023-01-01 01:00,9712,9723
2,2023-01-01 02:00,9623,9634
3,2023-01-01 03:00,9578,9578
4,2023-01-01 04:00,9599,9541



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2023-01-01 00:00,80.55,88.39,304.67
1,2023-01-01 01:00,80.84,102.36,304.57
2,2023-01-01 02:00,80.63,80.82,304.54
3,2023-01-01 03:00,79.76,80.61,304.39
4,2023-01-01 04:00,79.53,77.88,303.67



DataFrame: solar_generation_


Unnamed: 0,datetime_,solar_generation
0,2023-01-01 00:00:00,0.0
1,2023-01-01 01:00:00,0.0
2,2023-01-01 02:00:00,0.0
3,2023-01-01 03:00:00,0.0
4,2023-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2023-01-01 00:00:00,-4.5
1,2023-01-01 01:00:00,-2.5
2,2023-01-01 02:00:00,-3.6
3,2023-01-01 03:00:00,-3.9
4,2023-01-01 04:00:00,-5.8



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2023-01-01 00:00:00,-11.9
1,2023-01-01 01:00:00,-12.8
2,2023-01-01 02:00:00,-10.7
3,2023-01-01 03:00:00,-7.8
4,2023-01-01 04:00:00,-7.9



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2023-01-01 00:00:00,-13.4
1,2023-01-01 01:00:00,-11.0
2,2023-01-01 02:00:00,-9.4
3,2023-01-01 03:00:00,-8.7
4,2023-01-01 04:00:00,-9.0



DataFrame: wind_generation_


Unnamed: 0,datetime_,wind_generation
0,2023-01-01 00:00:00,780.206753
1,2023-01-01 01:00:00,732.207446
2,2023-01-01 02:00:00,743.88406
3,2023-01-01 03:00:00,759.293766
4,2023-01-01 04:00:00,746.124896



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2023-01-01 00:00:00,3.0
1,2023-01-01 01:00:00,4.0
2,2023-01-01 02:00:00,9.0
3,2023-01-01 03:00:00,8.0
4,2023-01-01 04:00:00,1.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2023-01-01 00:00:00,6
1,2023-01-01 01:00:00,5
2,2023-01-01 02:00:00,6
3,2023-01-01 03:00:00,6
4,2023-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2023-01-01 00:00:00,4
1,2023-01-01 01:00:00,6
2,2023-01-01 02:00:00,8
3,2023-01-01 03:00:00,11
4,2023-01-01 04:00:00,13


In [7]:
# convert columns to datetime
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    globals()[df_name]['datetime_'] = pd.to_datetime(globals()[df_name]['datetime_'])
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2023-01-01 00:00:00,9824,9832
1,2023-01-01 01:00:00,9712,9723
2,2023-01-01 02:00:00,9623,9634
3,2023-01-01 03:00:00,9578,9578
4,2023-01-01 04:00:00,9599,9541



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2023-01-01 00:00:00,80.55,88.39,304.67
1,2023-01-01 01:00:00,80.84,102.36,304.57
2,2023-01-01 02:00:00,80.63,80.82,304.54
3,2023-01-01 03:00:00,79.76,80.61,304.39
4,2023-01-01 04:00:00,79.53,77.88,303.67



DataFrame: solar_generation_


Unnamed: 0,datetime_,solar_generation
0,2023-01-01 00:00:00,0.0
1,2023-01-01 01:00:00,0.0
2,2023-01-01 02:00:00,0.0
3,2023-01-01 03:00:00,0.0
4,2023-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2023-01-01 00:00:00,-4.5
1,2023-01-01 01:00:00,-2.5
2,2023-01-01 02:00:00,-3.6
3,2023-01-01 03:00:00,-3.9
4,2023-01-01 04:00:00,-5.8



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2023-01-01 00:00:00,-11.9
1,2023-01-01 01:00:00,-12.8
2,2023-01-01 02:00:00,-10.7
3,2023-01-01 03:00:00,-7.8
4,2023-01-01 04:00:00,-7.9



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2023-01-01 00:00:00,-13.4
1,2023-01-01 01:00:00,-11.0
2,2023-01-01 02:00:00,-9.4
3,2023-01-01 03:00:00,-8.7
4,2023-01-01 04:00:00,-9.0



DataFrame: wind_generation_


Unnamed: 0,datetime_,wind_generation
0,2023-01-01 00:00:00,780.206753
1,2023-01-01 01:00:00,732.207446
2,2023-01-01 02:00:00,743.88406
3,2023-01-01 03:00:00,759.293766
4,2023-01-01 04:00:00,746.124896



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2023-01-01 00:00:00,3.0
1,2023-01-01 01:00:00,4.0
2,2023-01-01 02:00:00,9.0
3,2023-01-01 03:00:00,8.0
4,2023-01-01 04:00:00,1.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2023-01-01 00:00:00,6
1,2023-01-01 01:00:00,5
2,2023-01-01 02:00:00,6
3,2023-01-01 03:00:00,6
4,2023-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2023-01-01 00:00:00,4
1,2023-01-01 01:00:00,6
2,2023-01-01 02:00:00,8
3,2023-01-01 03:00:00,11
4,2023-01-01 04:00:00,13


In [8]:
# Initialize the merged dataframe with the first dataframe in the list
merged_df = globals()[df_list[0]]

# Loop through the remaining dataframes and merge them one by one
for df_name in df_list[1:]:
    merged_df = merged_df.merge(globals()[df_name], on='datetime_', how='outer')

# Display the merged dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc
0,2023-01-01 00:00:00,9824.0,9832.0,80.55,88.39,304.67,0.0,-4.5,-11.9,-13.4,780.206753,3.0,6.0,4.0
1,2023-01-01 01:00:00,9712.0,9723.0,80.84,102.36,304.57,0.0,-2.5,-12.8,-11.0,732.207446,4.0,5.0,6.0
2,2023-01-01 02:00:00,9623.0,9634.0,80.63,80.82,304.54,0.0,-3.6,-10.7,-9.4,743.88406,9.0,6.0,8.0
3,2023-01-01 03:00:00,9578.0,9578.0,79.76,80.61,304.39,0.0,-3.9,-7.8,-8.7,759.293766,8.0,6.0,11.0
4,2023-01-01 04:00:00,9599.0,9541.0,79.53,77.88,303.67,0.0,-5.8,-7.9,-9.0,746.124896,1.0,5.0,13.0


In [9]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 14 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   datetime_                       8763 non-null   datetime64[ns]
 1   alberta_internal_load           8762 non-null   float64       
 2   forecast_alberta_internal_load  8762 non-null   float64       
 3   pool_price                      8762 non-null   float64       
 4   forecast_pool_price             8762 non-null   float64       
 5   rolling_30day_avg_price         8762 non-null   float64       
 6   solar_generation                8762 non-null   float64       
 7   temp_calgary                    8760 non-null   float64       
 8   temp_edmonton                   8723 non-null   float64       
 9   temp_fortmc                     8755 non-null   float64       
 10  wind_generation                 8762 non-null   float64       
 11  ws_c

In [10]:
merged_df.shape

(8763, 14)

In [11]:
merged_df.isnull().sum()

datetime_                          0
alberta_internal_load              1
forecast_alberta_internal_load     1
pool_price                         1
forecast_pool_price                1
rolling_30day_avg_price            1
solar_generation                   1
temp_calgary                       3
temp_edmonton                     40
temp_fortmc                        8
wind_generation                    1
ws_calgary                         3
ws_edmonton                       40
ws_fortmc                          8
dtype: int64

#### Feature Engineering

##### time based variables

In [12]:
merged_df['hour_of_day'] = merged_df['datetime_'].dt.hour
merged_df['day_of_week'] = merged_df['datetime_'].dt.dayofweek
merged_df['week_of_month'] = merged_df['datetime_'].dt.isocalendar().week
merged_df['month'] = merged_df['datetime_'].dt.month

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,week_of_month,month
0,2023-01-01 00:00:00,9824.0,9832.0,80.55,88.39,304.67,0.0,-4.5,-11.9,-13.4,780.206753,3.0,6.0,4.0,0,6,52,1
1,2023-01-01 01:00:00,9712.0,9723.0,80.84,102.36,304.57,0.0,-2.5,-12.8,-11.0,732.207446,4.0,5.0,6.0,1,6,52,1
2,2023-01-01 02:00:00,9623.0,9634.0,80.63,80.82,304.54,0.0,-3.6,-10.7,-9.4,743.88406,9.0,6.0,8.0,2,6,52,1
3,2023-01-01 03:00:00,9578.0,9578.0,79.76,80.61,304.39,0.0,-3.9,-7.8,-8.7,759.293766,8.0,6.0,11.0,3,6,52,1
4,2023-01-01 04:00:00,9599.0,9541.0,79.53,77.88,303.67,0.0,-5.8,-7.9,-9.0,746.124896,1.0,5.0,13.0,4,6,52,1


##### seasonality

In [13]:
# Define a function to determine the season
def get_season(month):
    if month in [5, 6, 7, 8, 9]:
        return 0 # Summer
    else:
        return 1 # Winter

# Apply the function to create a new column 'season'
merged_df['is_winter'] = merged_df['month'].apply(get_season)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,week_of_month,month,is_winter
0,2023-01-01 00:00:00,9824.0,9832.0,80.55,88.39,304.67,0.0,-4.5,-11.9,-13.4,780.206753,3.0,6.0,4.0,0,6,52,1,1
1,2023-01-01 01:00:00,9712.0,9723.0,80.84,102.36,304.57,0.0,-2.5,-12.8,-11.0,732.207446,4.0,5.0,6.0,1,6,52,1,1
2,2023-01-01 02:00:00,9623.0,9634.0,80.63,80.82,304.54,0.0,-3.6,-10.7,-9.4,743.88406,9.0,6.0,8.0,2,6,52,1,1
3,2023-01-01 03:00:00,9578.0,9578.0,79.76,80.61,304.39,0.0,-3.9,-7.8,-8.7,759.293766,8.0,6.0,11.0,3,6,52,1,1
4,2023-01-01 04:00:00,9599.0,9541.0,79.53,77.88,303.67,0.0,-5.8,-7.9,-9.0,746.124896,1.0,5.0,13.0,4,6,52,1,1


##### pool_price_lagged_values_till_24_hours

In [14]:
def generate_lagged_features(df, column, num_lags):
    for lag in range(1, num_lags + 1):
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

# Generate 24-hour lagged values for the pool_price column
merged_df = generate_lagged_features(merged_df, 'pool_price', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,week_of_month,month,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24
0,2023-01-01 00:00:00,9824.0,9832.0,80.55,88.39,304.67,0.0,-4.5,-11.9,-13.4,780.206753,3.0,6.0,4.0,0,6,52,1,1,,,,,,,,,,,,,,,,,,,,,,,,
1,2023-01-01 01:00:00,9712.0,9723.0,80.84,102.36,304.57,0.0,-2.5,-12.8,-11.0,732.207446,4.0,5.0,6.0,1,6,52,1,1,80.55,,,,,,,,,,,,,,,,,,,,,,,
2,2023-01-01 02:00:00,9623.0,9634.0,80.63,80.82,304.54,0.0,-3.6,-10.7,-9.4,743.88406,9.0,6.0,8.0,2,6,52,1,1,80.84,80.55,,,,,,,,,,,,,,,,,,,,,,
3,2023-01-01 03:00:00,9578.0,9578.0,79.76,80.61,304.39,0.0,-3.9,-7.8,-8.7,759.293766,8.0,6.0,11.0,3,6,52,1,1,80.63,80.84,80.55,,,,,,,,,,,,,,,,,,,,,
4,2023-01-01 04:00:00,9599.0,9541.0,79.53,77.88,303.67,0.0,-5.8,-7.9,-9.0,746.124896,1.0,5.0,13.0,4,6,52,1,1,79.76,80.63,80.84,80.55,,,,,,,,,,,,,,,,,,,,


##### AIL_lagged_values_till_24_hours

In [15]:
# Generate 24-hour lagged values for the AIL column
merged_df = generate_lagged_features(merged_df, 'alberta_internal_load', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,week_of_month,month,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24,alberta_internal_load_lag_1,alberta_internal_load_lag_2,alberta_internal_load_lag_3,alberta_internal_load_lag_4,alberta_internal_load_lag_5,alberta_internal_load_lag_6,alberta_internal_load_lag_7,alberta_internal_load_lag_8,alberta_internal_load_lag_9,alberta_internal_load_lag_10,alberta_internal_load_lag_11,alberta_internal_load_lag_12,alberta_internal_load_lag_13,alberta_internal_load_lag_14,alberta_internal_load_lag_15,alberta_internal_load_lag_16,alberta_internal_load_lag_17,alberta_internal_load_lag_18,alberta_internal_load_lag_19,alberta_internal_load_lag_20,alberta_internal_load_lag_21,alberta_internal_load_lag_22,alberta_internal_load_lag_23,alberta_internal_load_lag_24
0,2023-01-01 00:00:00,9824.0,9832.0,80.55,88.39,304.67,0.0,-4.5,-11.9,-13.4,780.206753,3.0,6.0,4.0,0,6,52,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2023-01-01 01:00:00,9712.0,9723.0,80.84,102.36,304.57,0.0,-2.5,-12.8,-11.0,732.207446,4.0,5.0,6.0,1,6,52,1,1,80.55,,,,,,,,,,,,,,,,,,,,,,,,9824.0,,,,,,,,,,,,,,,,,,,,,,,
2,2023-01-01 02:00:00,9623.0,9634.0,80.63,80.82,304.54,0.0,-3.6,-10.7,-9.4,743.88406,9.0,6.0,8.0,2,6,52,1,1,80.84,80.55,,,,,,,,,,,,,,,,,,,,,,,9712.0,9824.0,,,,,,,,,,,,,,,,,,,,,,
3,2023-01-01 03:00:00,9578.0,9578.0,79.76,80.61,304.39,0.0,-3.9,-7.8,-8.7,759.293766,8.0,6.0,11.0,3,6,52,1,1,80.63,80.84,80.55,,,,,,,,,,,,,,,,,,,,,,9623.0,9712.0,9824.0,,,,,,,,,,,,,,,,,,,,,
4,2023-01-01 04:00:00,9599.0,9541.0,79.53,77.88,303.67,0.0,-5.8,-7.9,-9.0,746.124896,1.0,5.0,13.0,4,6,52,1,1,79.76,80.63,80.84,80.55,,,,,,,,,,,,,,,,,,,,,9578.0,9623.0,9712.0,9824.0,,,,,,,,,,,,,,,,,,,,


#### Save Data

In [16]:
merged_df.to_csv('/home/kevin/Downloads/BESS/data/raw/2023/merged_df_2023.csv', index=False)

In [19]:
# done:
   # weather: wind_speed_per_city 
   # time based variable: hour_of_day, day_of_week, week_of_month ,month (derive from the datatime column, no need to fetch seperately)
   # seasonality: May to September is summer, October to April is winter (derive from the datatime column, no need to fetch seperately)
   # time series data: pool_price_lagged_values_till_24_hours, AIL_lagged_values_till_24_hours (derive from the dataframe, no need to fetch seperately)