In [1]:
# This notebook creates features and saves them to a file

#### Library

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import os

#### Data Loading

In [3]:
# Folder containing CSV files
folder_path = "/home/kevin/Downloads/BESS/data/raw/2024"

df_list = []
# Loop through each file in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        file_path = os.path.join(folder_path, file)
        df_name = os.path.splitext(file)[0].split('20')[0]  # Remove the .csv extension for naming
        globals()[df_name] = pd.read_csv(file_path)  # Create a variable dynamically
        print(f"Loaded DataFrame: {df_name}, Shape: {globals()[df_name].shape}")
        df_list.append(df_name)

# Sort the list of DataFrames
df_list.sort()


Loaded DataFrame: price_, Shape: (8784, 5)
Loaded DataFrame: temperature_calgary_, Shape: (8775, 3)
Loaded DataFrame: temperature_edmonton_, Shape: (8769, 3)
Loaded DataFrame: windspeed_edmonton_, Shape: (8769, 3)
Loaded DataFrame: wind_generation_, Shape: (8783, 3)
Loaded DataFrame: solar_generation_, Shape: (8783, 3)
Loaded DataFrame: windspeed_fortmc_, Shape: (8776, 3)
Loaded DataFrame: AIL_, Shape: (8784, 4)
Loaded DataFrame: windspeed_calgary_, Shape: (8775, 3)
Loaded DataFrame: temperature_fortmc_, Shape: (8776, 3)


In [4]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0.1,Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,0,2024-01-01 00:00,9809,9779
1,1,2024-01-01 01:00,9702,9709
2,2,2024-01-01 02:00,9560,9615
3,3,2024-01-01 03:00,9547,9510
4,4,2024-01-01 04:00,9511,9545



DataFrame: price_


Unnamed: 0.1,Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,0,2024-01-01 00:00,23.05,20.57,51.98
1,1,2024-01-01 01:00,22.67,19.69,51.97
2,2,2024-01-01 02:00,24.07,21.46,51.96
3,3,2024-01-01 03:00,24.33,24.21,51.93
4,4,2024-01-01 04:00,24.7,24.48,51.93



DataFrame: solar_generation_


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,43819,2024-01-01 00:00:00,0.0
1,43820,2024-01-01 01:00:00,0.0
2,43821,2024-01-01 02:00:00,0.0
3,43822,2024-01-01 03:00:00,0.0
4,43823,2024-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,5276,2024-01-01 00:00:00,3.0
1,5277,2024-01-01 01:00:00,3.0
2,5278,2024-01-01 02:00:00,3.2
3,5279,2024-01-01 03:00:00,2.9
4,5280,2024-01-01 04:00:00,2.4



DataFrame: temperature_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,5178,2024-01-01 00:00:00,-0.5
1,5179,2024-01-01 01:00:00,-2.5
2,5180,2024-01-01 02:00:00,-4.3
3,5181,2024-01-01 03:00:00,-3.5
4,5182,2024-01-01 04:00:00,-4.7



DataFrame: temperature_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,4943,2024-01-01 00:00:00,2.5
1,4944,2024-01-01 01:00:00,2.0
2,4945,2024-01-01 02:00:00,0.7
3,4946,2024-01-01 03:00:00,-0.5
4,4947,2024-01-01 04:00:00,0.1



DataFrame: wind_generation_


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,43819,2024-01-01 00:00:00,2905.627906
1,43820,2024-01-01 01:00:00,2857.032152
2,43821,2024-01-01 02:00:00,2519.803339
3,43822,2024-01-01 03:00:00,2386.924303
4,43823,2024-01-01 04:00:00,2222.338043



DataFrame: windspeed_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,5276,2024-01-01 00:00:00,8.0
1,5277,2024-01-01 01:00:00,5.0
2,5278,2024-01-01 02:00:00,5.0
3,5279,2024-01-01 03:00:00,16.0
4,5280,2024-01-01 04:00:00,19.0



DataFrame: windspeed_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,5178,2024-01-01 00:00:00,10
1,5179,2024-01-01 01:00:00,7
2,5180,2024-01-01 02:00:00,9
3,5181,2024-01-01 03:00:00,12
4,5182,2024-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,4943,2024-01-01 00:00:00,16
1,4944,2024-01-01 01:00:00,16
2,4945,2024-01-01 02:00:00,16
3,4946,2024-01-01 03:00:00,14
4,4947,2024-01-01 04:00:00,16


#### Column Name Adjustment

In [5]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    globals()[df_name].drop(['Unnamed: 0'], axis=1, inplace=True)
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,2024-01-01 00:00,9809,9779
1,2024-01-01 01:00,9702,9709
2,2024-01-01 02:00,9560,9615
3,2024-01-01 03:00,9547,9510
4,2024-01-01 04:00,9511,9545



DataFrame: price_


Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,2024-01-01 00:00,23.05,20.57,51.98
1,2024-01-01 01:00,22.67,19.69,51.97
2,2024-01-01 02:00,24.07,21.46,51.96
3,2024-01-01 03:00,24.33,24.21,51.93
4,2024-01-01 04:00,24.7,24.48,51.93



DataFrame: solar_generation_


Unnamed: 0,Date (MPT),Volume
0,2024-01-01 00:00:00,0.0
1,2024-01-01 01:00:00,0.0
2,2024-01-01 02:00:00,0.0
3,2024-01-01 03:00:00,0.0
4,2024-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2024-01-01 00:00:00,3.0
1,2024-01-01 01:00:00,3.0
2,2024-01-01 02:00:00,3.2
3,2024-01-01 03:00:00,2.9
4,2024-01-01 04:00:00,2.4



DataFrame: temperature_edmonton_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2024-01-01 00:00:00,-0.5
1,2024-01-01 01:00:00,-2.5
2,2024-01-01 02:00:00,-4.3
3,2024-01-01 03:00:00,-3.5
4,2024-01-01 04:00:00,-4.7



DataFrame: temperature_fortmc_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2024-01-01 00:00:00,2.5
1,2024-01-01 01:00:00,2.0
2,2024-01-01 02:00:00,0.7
3,2024-01-01 03:00:00,-0.5
4,2024-01-01 04:00:00,0.1



DataFrame: wind_generation_


Unnamed: 0,Date (MPT),Volume
0,2024-01-01 00:00:00,2905.627906
1,2024-01-01 01:00:00,2857.032152
2,2024-01-01 02:00:00,2519.803339
3,2024-01-01 03:00:00,2386.924303
4,2024-01-01 04:00:00,2222.338043



DataFrame: windspeed_calgary_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2024-01-01 00:00:00,8.0
1,2024-01-01 01:00:00,5.0
2,2024-01-01 02:00:00,5.0
3,2024-01-01 03:00:00,16.0
4,2024-01-01 04:00:00,19.0



DataFrame: windspeed_edmonton_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2024-01-01 00:00:00,10
1,2024-01-01 01:00:00,7
2,2024-01-01 02:00:00,9
3,2024-01-01 03:00:00,12
4,2024-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2024-01-01 00:00:00,16
1,2024-01-01 01:00:00,16
2,2024-01-01 02:00:00,16
3,2024-01-01 03:00:00,14
4,2024-01-01 04:00:00,16


In [6]:
# Rename columns
price_.rename(columns={'begin_datetime_mpt': 'datetime_', 'rolling_30day_avg': 'rolling_30day_avg_price'}, inplace=True)

AIL_.rename(columns={'begin_datetime_mpt': 'datetime_'}, inplace=True)

solar_generation_.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'solar_generation'}, inplace=True)

wind_generation_.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'wind_generation'}, inplace=True)

temperature_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_calgary'}, inplace=True)

temperature_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_edmonton'}, inplace=True)

temperature_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_fortmc'}, inplace=True)

windspeed_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_calgary'}, inplace=True)

windspeed_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_edmonton'}, inplace=True)

windspeed_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_fortmc'}, inplace=True)

In [7]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2024-01-01 00:00,9809,9779
1,2024-01-01 01:00,9702,9709
2,2024-01-01 02:00,9560,9615
3,2024-01-01 03:00,9547,9510
4,2024-01-01 04:00,9511,9545



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2024-01-01 00:00,23.05,20.57,51.98
1,2024-01-01 01:00,22.67,19.69,51.97
2,2024-01-01 02:00,24.07,21.46,51.96
3,2024-01-01 03:00,24.33,24.21,51.93
4,2024-01-01 04:00,24.7,24.48,51.93



DataFrame: solar_generation_


Unnamed: 0,datetime_,solar_generation
0,2024-01-01 00:00:00,0.0
1,2024-01-01 01:00:00,0.0
2,2024-01-01 02:00:00,0.0
3,2024-01-01 03:00:00,0.0
4,2024-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2024-01-01 00:00:00,3.0
1,2024-01-01 01:00:00,3.0
2,2024-01-01 02:00:00,3.2
3,2024-01-01 03:00:00,2.9
4,2024-01-01 04:00:00,2.4



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2024-01-01 00:00:00,-0.5
1,2024-01-01 01:00:00,-2.5
2,2024-01-01 02:00:00,-4.3
3,2024-01-01 03:00:00,-3.5
4,2024-01-01 04:00:00,-4.7



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2024-01-01 00:00:00,2.5
1,2024-01-01 01:00:00,2.0
2,2024-01-01 02:00:00,0.7
3,2024-01-01 03:00:00,-0.5
4,2024-01-01 04:00:00,0.1



DataFrame: wind_generation_


Unnamed: 0,datetime_,wind_generation
0,2024-01-01 00:00:00,2905.627906
1,2024-01-01 01:00:00,2857.032152
2,2024-01-01 02:00:00,2519.803339
3,2024-01-01 03:00:00,2386.924303
4,2024-01-01 04:00:00,2222.338043



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2024-01-01 00:00:00,8.0
1,2024-01-01 01:00:00,5.0
2,2024-01-01 02:00:00,5.0
3,2024-01-01 03:00:00,16.0
4,2024-01-01 04:00:00,19.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2024-01-01 00:00:00,10
1,2024-01-01 01:00:00,7
2,2024-01-01 02:00:00,9
3,2024-01-01 03:00:00,12
4,2024-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2024-01-01 00:00:00,16
1,2024-01-01 01:00:00,16
2,2024-01-01 02:00:00,16
3,2024-01-01 03:00:00,14
4,2024-01-01 04:00:00,16


In [8]:
# convert columns to datetime
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    globals()[df_name]['datetime_'] = pd.to_datetime(globals()[df_name]['datetime_'])
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2024-01-01 00:00:00,9809,9779
1,2024-01-01 01:00:00,9702,9709
2,2024-01-01 02:00:00,9560,9615
3,2024-01-01 03:00:00,9547,9510
4,2024-01-01 04:00:00,9511,9545



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2024-01-01 00:00:00,23.05,20.57,51.98
1,2024-01-01 01:00:00,22.67,19.69,51.97
2,2024-01-01 02:00:00,24.07,21.46,51.96
3,2024-01-01 03:00:00,24.33,24.21,51.93
4,2024-01-01 04:00:00,24.7,24.48,51.93



DataFrame: solar_generation_


Unnamed: 0,datetime_,solar_generation
0,2024-01-01 00:00:00,0.0
1,2024-01-01 01:00:00,0.0
2,2024-01-01 02:00:00,0.0
3,2024-01-01 03:00:00,0.0
4,2024-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2024-01-01 00:00:00,3.0
1,2024-01-01 01:00:00,3.0
2,2024-01-01 02:00:00,3.2
3,2024-01-01 03:00:00,2.9
4,2024-01-01 04:00:00,2.4



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2024-01-01 00:00:00,-0.5
1,2024-01-01 01:00:00,-2.5
2,2024-01-01 02:00:00,-4.3
3,2024-01-01 03:00:00,-3.5
4,2024-01-01 04:00:00,-4.7



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2024-01-01 00:00:00,2.5
1,2024-01-01 01:00:00,2.0
2,2024-01-01 02:00:00,0.7
3,2024-01-01 03:00:00,-0.5
4,2024-01-01 04:00:00,0.1



DataFrame: wind_generation_


Unnamed: 0,datetime_,wind_generation
0,2024-01-01 00:00:00,2905.627906
1,2024-01-01 01:00:00,2857.032152
2,2024-01-01 02:00:00,2519.803339
3,2024-01-01 03:00:00,2386.924303
4,2024-01-01 04:00:00,2222.338043



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2024-01-01 00:00:00,8.0
1,2024-01-01 01:00:00,5.0
2,2024-01-01 02:00:00,5.0
3,2024-01-01 03:00:00,16.0
4,2024-01-01 04:00:00,19.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2024-01-01 00:00:00,10
1,2024-01-01 01:00:00,7
2,2024-01-01 02:00:00,9
3,2024-01-01 03:00:00,12
4,2024-01-01 04:00:00,5



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2024-01-01 00:00:00,16
1,2024-01-01 01:00:00,16
2,2024-01-01 02:00:00,16
3,2024-01-01 03:00:00,14
4,2024-01-01 04:00:00,16


In [9]:
# Initialize the merged dataframe with the first dataframe in the list
merged_df = globals()[df_list[0]]

# Loop through the remaining dataframes and merge them one by one
for df_name in df_list[1:]:
    merged_df = merged_df.merge(globals()[df_name], on='datetime_', how='outer')

# Display the merged dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc
0,2024-01-01 00:00:00,9809.0,9779.0,23.05,20.57,51.98,0.0,3.0,-0.5,2.5,2905.627906,8.0,10.0,16.0
1,2024-01-01 01:00:00,9702.0,9709.0,22.67,19.69,51.97,0.0,3.0,-2.5,2.0,2857.032152,5.0,7.0,16.0
2,2024-01-01 02:00:00,9560.0,9615.0,24.07,21.46,51.96,0.0,3.2,-4.3,0.7,2519.803339,5.0,9.0,16.0
3,2024-01-01 03:00:00,9547.0,9510.0,24.33,24.21,51.93,0.0,2.9,-3.5,-0.5,2386.924303,16.0,12.0,14.0
4,2024-01-01 04:00:00,9511.0,9545.0,24.7,24.48,51.93,0.0,2.4,-4.7,0.1,2222.338043,19.0,5.0,16.0


In [10]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8787 entries, 0 to 8786
Data columns (total 14 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   datetime_                       8787 non-null   datetime64[ns]
 1   alberta_internal_load           8786 non-null   float64       
 2   forecast_alberta_internal_load  8786 non-null   float64       
 3   pool_price                      8786 non-null   float64       
 4   forecast_pool_price             8786 non-null   float64       
 5   rolling_30day_avg_price         8786 non-null   float64       
 6   solar_generation                8786 non-null   float64       
 7   temp_calgary                    8778 non-null   float64       
 8   temp_edmonton                   8772 non-null   float64       
 9   temp_fortmc                     8779 non-null   float64       
 10  wind_generation                 8786 non-null   float64       
 11  ws_c

In [11]:
merged_df.shape

(8787, 14)

In [12]:
merged_df.isnull().sum()

datetime_                          0
alberta_internal_load              1
forecast_alberta_internal_load     1
pool_price                         1
forecast_pool_price                1
rolling_30day_avg_price            1
solar_generation                   1
temp_calgary                       9
temp_edmonton                     15
temp_fortmc                        8
wind_generation                    1
ws_calgary                        11
ws_edmonton                       15
ws_fortmc                          8
dtype: int64

#### Feature Engineering

##### time based variables

In [13]:
merged_df['hour_of_day'] = merged_df['datetime_'].dt.hour
merged_df['day_of_week'] = merged_df['datetime_'].dt.dayofweek
merged_df['day_of_month'] = merged_df['datetime_'].dt.day
# Calculate the week of the month (from 1 to 5)
merged_df['week_of_month'] = merged_df['datetime_'].apply(lambda x: (x.day - 1) // 7 + 1)
merged_df['month'] = merged_df['datetime_'].dt.month
merged_df['year'] = merged_df['datetime_'].dt.year

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year
0,2024-01-01 00:00:00,9809.0,9779.0,23.05,20.57,51.98,0.0,3.0,-0.5,2.5,2905.627906,8.0,10.0,16.0,0,0,1,1,1,2024
1,2024-01-01 01:00:00,9702.0,9709.0,22.67,19.69,51.97,0.0,3.0,-2.5,2.0,2857.032152,5.0,7.0,16.0,1,0,1,1,1,2024
2,2024-01-01 02:00:00,9560.0,9615.0,24.07,21.46,51.96,0.0,3.2,-4.3,0.7,2519.803339,5.0,9.0,16.0,2,0,1,1,1,2024
3,2024-01-01 03:00:00,9547.0,9510.0,24.33,24.21,51.93,0.0,2.9,-3.5,-0.5,2386.924303,16.0,12.0,14.0,3,0,1,1,1,2024
4,2024-01-01 04:00:00,9511.0,9545.0,24.7,24.48,51.93,0.0,2.4,-4.7,0.1,2222.338043,19.0,5.0,16.0,4,0,1,1,1,2024


##### seasonality

In [14]:
# Define a function to determine the season
def get_season(month):
    if month in [5, 6, 7, 8, 9]:
        return 0 # Summer
    else:
        return 1 # Winter

# Apply the function to create a new column 'season'
merged_df['is_winter'] = merged_df['month'].apply(get_season)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter
0,2024-01-01 00:00:00,9809.0,9779.0,23.05,20.57,51.98,0.0,3.0,-0.5,2.5,2905.627906,8.0,10.0,16.0,0,0,1,1,1,2024,1
1,2024-01-01 01:00:00,9702.0,9709.0,22.67,19.69,51.97,0.0,3.0,-2.5,2.0,2857.032152,5.0,7.0,16.0,1,0,1,1,1,2024,1
2,2024-01-01 02:00:00,9560.0,9615.0,24.07,21.46,51.96,0.0,3.2,-4.3,0.7,2519.803339,5.0,9.0,16.0,2,0,1,1,1,2024,1
3,2024-01-01 03:00:00,9547.0,9510.0,24.33,24.21,51.93,0.0,2.9,-3.5,-0.5,2386.924303,16.0,12.0,14.0,3,0,1,1,1,2024,1
4,2024-01-01 04:00:00,9511.0,9545.0,24.7,24.48,51.93,0.0,2.4,-4.7,0.1,2222.338043,19.0,5.0,16.0,4,0,1,1,1,2024,1


##### pool_price_lagged_values_till_24_hours

In [15]:
def generate_lagged_features(df, column, num_lags):
    for lag in range(1, num_lags + 1):
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

# Generate 24-hour lagged values for the pool_price column
merged_df = generate_lagged_features(merged_df, 'pool_price', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24
0,2024-01-01 00:00:00,9809.0,9779.0,23.05,20.57,51.98,0.0,3.0,-0.5,2.5,2905.627906,8.0,10.0,16.0,0,0,1,1,1,2024,1,,,,,,,,,,,,,,,,,,,,,,,,
1,2024-01-01 01:00:00,9702.0,9709.0,22.67,19.69,51.97,0.0,3.0,-2.5,2.0,2857.032152,5.0,7.0,16.0,1,0,1,1,1,2024,1,23.05,,,,,,,,,,,,,,,,,,,,,,,
2,2024-01-01 02:00:00,9560.0,9615.0,24.07,21.46,51.96,0.0,3.2,-4.3,0.7,2519.803339,5.0,9.0,16.0,2,0,1,1,1,2024,1,22.67,23.05,,,,,,,,,,,,,,,,,,,,,,
3,2024-01-01 03:00:00,9547.0,9510.0,24.33,24.21,51.93,0.0,2.9,-3.5,-0.5,2386.924303,16.0,12.0,14.0,3,0,1,1,1,2024,1,24.07,22.67,23.05,,,,,,,,,,,,,,,,,,,,,
4,2024-01-01 04:00:00,9511.0,9545.0,24.7,24.48,51.93,0.0,2.4,-4.7,0.1,2222.338043,19.0,5.0,16.0,4,0,1,1,1,2024,1,24.33,24.07,22.67,23.05,,,,,,,,,,,,,,,,,,,,


##### AIL_lagged_values_till_24_hours

In [16]:
# Generate 24-hour lagged values for the AIL column
merged_df = generate_lagged_features(merged_df, 'alberta_internal_load', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24,alberta_internal_load_lag_1,alberta_internal_load_lag_2,alberta_internal_load_lag_3,alberta_internal_load_lag_4,alberta_internal_load_lag_5,alberta_internal_load_lag_6,alberta_internal_load_lag_7,alberta_internal_load_lag_8,alberta_internal_load_lag_9,alberta_internal_load_lag_10,alberta_internal_load_lag_11,alberta_internal_load_lag_12,alberta_internal_load_lag_13,alberta_internal_load_lag_14,alberta_internal_load_lag_15,alberta_internal_load_lag_16,alberta_internal_load_lag_17,alberta_internal_load_lag_18,alberta_internal_load_lag_19,alberta_internal_load_lag_20,alberta_internal_load_lag_21,alberta_internal_load_lag_22,alberta_internal_load_lag_23,alberta_internal_load_lag_24
0,2024-01-01 00:00:00,9809.0,9779.0,23.05,20.57,51.98,0.0,3.0,-0.5,2.5,2905.627906,8.0,10.0,16.0,0,0,1,1,1,2024,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2024-01-01 01:00:00,9702.0,9709.0,22.67,19.69,51.97,0.0,3.0,-2.5,2.0,2857.032152,5.0,7.0,16.0,1,0,1,1,1,2024,1,23.05,,,,,,,,,,,,,,,,,,,,,,,,9809.0,,,,,,,,,,,,,,,,,,,,,,,
2,2024-01-01 02:00:00,9560.0,9615.0,24.07,21.46,51.96,0.0,3.2,-4.3,0.7,2519.803339,5.0,9.0,16.0,2,0,1,1,1,2024,1,22.67,23.05,,,,,,,,,,,,,,,,,,,,,,,9702.0,9809.0,,,,,,,,,,,,,,,,,,,,,,
3,2024-01-01 03:00:00,9547.0,9510.0,24.33,24.21,51.93,0.0,2.9,-3.5,-0.5,2386.924303,16.0,12.0,14.0,3,0,1,1,1,2024,1,24.07,22.67,23.05,,,,,,,,,,,,,,,,,,,,,,9560.0,9702.0,9809.0,,,,,,,,,,,,,,,,,,,,,
4,2024-01-01 04:00:00,9511.0,9545.0,24.7,24.48,51.93,0.0,2.4,-4.7,0.1,2222.338043,19.0,5.0,16.0,4,0,1,1,1,2024,1,24.33,24.07,22.67,23.05,,,,,,,,,,,,,,,,,,,,,9547.0,9560.0,9702.0,9809.0,,,,,,,,,,,,,,,,,,,,


#### Save Data

In [17]:
merged_df.to_csv('/home/kevin/Downloads/BESS/data/raw/2024/merged_df_2024.csv', index=False)