In [1]:
# This notebook creates features and saves them to a file

#### Library

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import os

#### Data Loading

In [2]:
# Folder containing CSV files
folder_path = "/home/kevin/Downloads/BESS/Jobs/Retraining/data/raw"

df_list = []
# Loop through each file in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        file_path = os.path.join(folder_path, file)
        df_name = os.path.splitext(file)[0].split('20')[0]  # Remove the .csv extension for naming
        globals()[df_name] = pd.read_csv(file_path)  # Create a variable dynamically
        print(f"Loaded DataFrame: {df_name}, Shape: {globals()[df_name].shape}")
        df_list.append(df_name)

# Sort the list of DataFrames
df_list.sort()


Loaded DataFrame: price_, Shape: (26328, 4)
Loaded DataFrame: solar_generation, Shape: (52602, 3)
Loaded DataFrame: temperature_fortmc_, Shape: (26023, 3)
Loaded DataFrame: windspeed_edmonton_, Shape: (25956, 3)
Loaded DataFrame: windspeed_fortmc_, Shape: (26023, 3)
Loaded DataFrame: temperature_edmonton_, Shape: (25956, 3)
Loaded DataFrame: wind_generation, Shape: (52602, 3)
Loaded DataFrame: temperature_calgary_, Shape: (26308, 3)
Loaded DataFrame: AIL_, Shape: (26328, 3)
Loaded DataFrame: windspeed_calgary_, Shape: (26308, 3)


In [3]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,2022-03-23 00:00,9146,9170
1,2022-03-23 01:00,9083,9075
2,2022-03-23 02:00,9020,9029
3,2022-03-23 03:00,9007,9012
4,2022-03-23 04:00,9043,9023



DataFrame: price_


Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,2022-03-23 00:00,44.21,41.26,88.14
1,2022-03-23 01:00,47.55,47.55,88.09
2,2022-03-23 02:00,47.55,47.55,88.05
3,2022-03-23 03:00,47.11,47.86,88.01
4,2022-03-23 04:00,46.95,47.03,87.98



DataFrame: solar_generation


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,0,2019-01-01 00:00:00,0.0
1,1,2019-01-01 01:00:00,0.0
2,2,2019-01-01 02:00:00,0.0
3,3,2019-01-01 03:00:00,0.0
4,4,2019-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,0,2022-03-23 00:00:00,6.4
1,1,2022-03-23 01:00:00,8.9
2,81,2022-03-23 02:00:00,6.7
3,2,2022-03-23 03:00:00,5.5
4,3,2022-03-23 04:00:00,4.6



DataFrame: temperature_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,4,2022-03-23 00:00:00,2.9
1,5,2022-03-23 01:00:00,3.5
2,6,2022-03-23 02:00:00,4.2
3,7,2022-03-23 03:00:00,3.8
4,8,2022-03-23 04:00:00,2.0



DataFrame: temperature_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,1,2022-03-23 00:00:00,5.4
1,2,2022-03-23 01:00:00,5.6
2,3,2022-03-23 02:00:00,6.0
3,4,2022-03-23 03:00:00,4.9
4,5,2022-03-23 04:00:00,4.5



DataFrame: wind_generation


Unnamed: 0.1,Unnamed: 0,Date (MPT),Volume
0,0,2019-01-01 00:00:00,1126.509756
1,1,2019-01-01 01:00:00,1148.336475
2,2,2019-01-01 02:00:00,1091.46996
3,3,2019-01-01 03:00:00,1122.613398
4,4,2019-01-01 04:00:00,1101.101023



DataFrame: windspeed_calgary_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,0,2022-03-23 00:00:00,10.0
1,1,2022-03-23 01:00:00,7.0
2,81,2022-03-23 02:00:00,4.0
3,2,2022-03-23 03:00:00,2.0
4,3,2022-03-23 04:00:00,2.0



DataFrame: windspeed_edmonton_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,4,2022-03-23 00:00:00,13
1,5,2022-03-23 01:00:00,13
2,6,2022-03-23 02:00:00,12
3,7,2022-03-23 03:00:00,6
4,8,2022-03-23 04:00:00,6



DataFrame: windspeed_fortmc_


Unnamed: 0.1,Unnamed: 0,Timestamp_mst,WIND_SPEED
0,1,2022-03-23 00:00:00,5
1,2,2022-03-23 01:00:00,3
2,3,2022-03-23 02:00:00,6
3,4,2022-03-23 03:00:00,9
4,5,2022-03-23 04:00:00,10


#### Column Name Adjustment

In [4]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    try:
        globals()[df_name].drop(['Unnamed: 0'], axis=1, inplace=True)
    except:
        pass
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,begin_datetime_mpt,alberta_internal_load,forecast_alberta_internal_load
0,2022-03-23 00:00,9146,9170
1,2022-03-23 01:00,9083,9075
2,2022-03-23 02:00,9020,9029
3,2022-03-23 03:00,9007,9012
4,2022-03-23 04:00,9043,9023



DataFrame: price_


Unnamed: 0,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg
0,2022-03-23 00:00,44.21,41.26,88.14
1,2022-03-23 01:00,47.55,47.55,88.09
2,2022-03-23 02:00,47.55,47.55,88.05
3,2022-03-23 03:00,47.11,47.86,88.01
4,2022-03-23 04:00,46.95,47.03,87.98



DataFrame: solar_generation


Unnamed: 0,Date (MPT),Volume
0,2019-01-01 00:00:00,0.0
1,2019-01-01 01:00:00,0.0
2,2019-01-01 02:00:00,0.0
3,2019-01-01 03:00:00,0.0
4,2019-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2022-03-23 00:00:00,6.4
1,2022-03-23 01:00:00,8.9
2,2022-03-23 02:00:00,6.7
3,2022-03-23 03:00:00,5.5
4,2022-03-23 04:00:00,4.6



DataFrame: temperature_edmonton_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2022-03-23 00:00:00,2.9
1,2022-03-23 01:00:00,3.5
2,2022-03-23 02:00:00,4.2
3,2022-03-23 03:00:00,3.8
4,2022-03-23 04:00:00,2.0



DataFrame: temperature_fortmc_


Unnamed: 0,Timestamp_mst,Temperature (degree C)
0,2022-03-23 00:00:00,5.4
1,2022-03-23 01:00:00,5.6
2,2022-03-23 02:00:00,6.0
3,2022-03-23 03:00:00,4.9
4,2022-03-23 04:00:00,4.5



DataFrame: wind_generation


Unnamed: 0,Date (MPT),Volume
0,2019-01-01 00:00:00,1126.509756
1,2019-01-01 01:00:00,1148.336475
2,2019-01-01 02:00:00,1091.46996
3,2019-01-01 03:00:00,1122.613398
4,2019-01-01 04:00:00,1101.101023



DataFrame: windspeed_calgary_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2022-03-23 00:00:00,10.0
1,2022-03-23 01:00:00,7.0
2,2022-03-23 02:00:00,4.0
3,2022-03-23 03:00:00,2.0
4,2022-03-23 04:00:00,2.0



DataFrame: windspeed_edmonton_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2022-03-23 00:00:00,13
1,2022-03-23 01:00:00,13
2,2022-03-23 02:00:00,12
3,2022-03-23 03:00:00,6
4,2022-03-23 04:00:00,6



DataFrame: windspeed_fortmc_


Unnamed: 0,Timestamp_mst,WIND_SPEED
0,2022-03-23 00:00:00,5
1,2022-03-23 01:00:00,3
2,2022-03-23 02:00:00,6
3,2022-03-23 03:00:00,9
4,2022-03-23 04:00:00,10


In [5]:
# Rename columns
price_.rename(columns={'begin_datetime_mpt': 'datetime_', 'rolling_30day_avg': 'rolling_30day_avg_price'}, inplace=True)

AIL_.rename(columns={'begin_datetime_mpt': 'datetime_'}, inplace=True)

solar_generation.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'solar_generation'}, inplace=True)

wind_generation.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'wind_generation'}, inplace=True)

temperature_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_calgary'}, inplace=True)

temperature_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_edmonton'}, inplace=True)

temperature_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_fortmc'}, inplace=True)

windspeed_calgary_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_calgary'}, inplace=True)

windspeed_edmonton_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_edmonton'}, inplace=True)

windspeed_fortmc_.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_fortmc'}, inplace=True)

In [6]:
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2022-03-23 00:00,9146,9170
1,2022-03-23 01:00,9083,9075
2,2022-03-23 02:00,9020,9029
3,2022-03-23 03:00,9007,9012
4,2022-03-23 04:00,9043,9023



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2022-03-23 00:00,44.21,41.26,88.14
1,2022-03-23 01:00,47.55,47.55,88.09
2,2022-03-23 02:00,47.55,47.55,88.05
3,2022-03-23 03:00,47.11,47.86,88.01
4,2022-03-23 04:00,46.95,47.03,87.98



DataFrame: solar_generation


Unnamed: 0,datetime_,solar_generation
0,2019-01-01 00:00:00,0.0
1,2019-01-01 01:00:00,0.0
2,2019-01-01 02:00:00,0.0
3,2019-01-01 03:00:00,0.0
4,2019-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2022-03-23 00:00:00,6.4
1,2022-03-23 01:00:00,8.9
2,2022-03-23 02:00:00,6.7
3,2022-03-23 03:00:00,5.5
4,2022-03-23 04:00:00,4.6



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2022-03-23 00:00:00,2.9
1,2022-03-23 01:00:00,3.5
2,2022-03-23 02:00:00,4.2
3,2022-03-23 03:00:00,3.8
4,2022-03-23 04:00:00,2.0



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2022-03-23 00:00:00,5.4
1,2022-03-23 01:00:00,5.6
2,2022-03-23 02:00:00,6.0
3,2022-03-23 03:00:00,4.9
4,2022-03-23 04:00:00,4.5



DataFrame: wind_generation


Unnamed: 0,datetime_,wind_generation
0,2019-01-01 00:00:00,1126.509756
1,2019-01-01 01:00:00,1148.336475
2,2019-01-01 02:00:00,1091.46996
3,2019-01-01 03:00:00,1122.613398
4,2019-01-01 04:00:00,1101.101023



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2022-03-23 00:00:00,10.0
1,2022-03-23 01:00:00,7.0
2,2022-03-23 02:00:00,4.0
3,2022-03-23 03:00:00,2.0
4,2022-03-23 04:00:00,2.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2022-03-23 00:00:00,13
1,2022-03-23 01:00:00,13
2,2022-03-23 02:00:00,12
3,2022-03-23 03:00:00,6
4,2022-03-23 04:00:00,6



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2022-03-23 00:00:00,5
1,2022-03-23 01:00:00,3
2,2022-03-23 02:00:00,6
3,2022-03-23 03:00:00,9
4,2022-03-23 04:00:00,10


In [7]:
# convert columns to datetime
for df_name in df_list:
    print(f"\nDataFrame: {df_name}")
    globals()[df_name]['datetime_'] = pd.to_datetime(globals()[df_name]['datetime_'])
    display(globals()[df_name].head())


DataFrame: AIL_


Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load
0,2022-03-23 00:00:00,9146,9170
1,2022-03-23 01:00:00,9083,9075
2,2022-03-23 02:00:00,9020,9029
3,2022-03-23 03:00:00,9007,9012
4,2022-03-23 04:00:00,9043,9023



DataFrame: price_


Unnamed: 0,datetime_,pool_price,forecast_pool_price,rolling_30day_avg_price
0,2022-03-23 00:00:00,44.21,41.26,88.14
1,2022-03-23 01:00:00,47.55,47.55,88.09
2,2022-03-23 02:00:00,47.55,47.55,88.05
3,2022-03-23 03:00:00,47.11,47.86,88.01
4,2022-03-23 04:00:00,46.95,47.03,87.98



DataFrame: solar_generation


Unnamed: 0,datetime_,solar_generation
0,2019-01-01 00:00:00,0.0
1,2019-01-01 01:00:00,0.0
2,2019-01-01 02:00:00,0.0
3,2019-01-01 03:00:00,0.0
4,2019-01-01 04:00:00,0.0



DataFrame: temperature_calgary_


Unnamed: 0,datetime_,temp_calgary
0,2022-03-23 00:00:00,6.4
1,2022-03-23 01:00:00,8.9
2,2022-03-23 02:00:00,6.7
3,2022-03-23 03:00:00,5.5
4,2022-03-23 04:00:00,4.6



DataFrame: temperature_edmonton_


Unnamed: 0,datetime_,temp_edmonton
0,2022-03-23 00:00:00,2.9
1,2022-03-23 01:00:00,3.5
2,2022-03-23 02:00:00,4.2
3,2022-03-23 03:00:00,3.8
4,2022-03-23 04:00:00,2.0



DataFrame: temperature_fortmc_


Unnamed: 0,datetime_,temp_fortmc
0,2022-03-23 00:00:00,5.4
1,2022-03-23 01:00:00,5.6
2,2022-03-23 02:00:00,6.0
3,2022-03-23 03:00:00,4.9
4,2022-03-23 04:00:00,4.5



DataFrame: wind_generation


Unnamed: 0,datetime_,wind_generation
0,2019-01-01 00:00:00,1126.509756
1,2019-01-01 01:00:00,1148.336475
2,2019-01-01 02:00:00,1091.46996
3,2019-01-01 03:00:00,1122.613398
4,2019-01-01 04:00:00,1101.101023



DataFrame: windspeed_calgary_


Unnamed: 0,datetime_,ws_calgary
0,2022-03-23 00:00:00,10.0
1,2022-03-23 01:00:00,7.0
2,2022-03-23 02:00:00,4.0
3,2022-03-23 03:00:00,2.0
4,2022-03-23 04:00:00,2.0



DataFrame: windspeed_edmonton_


Unnamed: 0,datetime_,ws_edmonton
0,2022-03-23 00:00:00,13
1,2022-03-23 01:00:00,13
2,2022-03-23 02:00:00,12
3,2022-03-23 03:00:00,6
4,2022-03-23 04:00:00,6



DataFrame: windspeed_fortmc_


Unnamed: 0,datetime_,ws_fortmc
0,2022-03-23 00:00:00,5
1,2022-03-23 01:00:00,3
2,2022-03-23 02:00:00,6
3,2022-03-23 03:00:00,9
4,2022-03-23 04:00:00,10


In [12]:
# Initialize the merged dataframe with the first dataframe in the list
merged_df = globals()[df_list[0]]

# Loop through the remaining dataframes and merge them one by one
for df_name in df_list[1:]:
    merged_df = merged_df.merge(globals()[df_name], on='datetime_', how='inner')

# Display the merged dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc
0,2022-03-23 00:00:00,9146,9170,44.21,41.26,88.14,0.0,6.4,2.9,5.4,1543.927681,10.0,13,5
1,2022-03-23 01:00:00,9083,9075,47.55,47.55,88.09,0.0,8.9,3.5,5.6,1548.392596,7.0,13,3
2,2022-03-23 02:00:00,9020,9029,47.55,47.55,88.05,0.0,6.7,4.2,6.0,1523.83849,4.0,12,6
3,2022-03-23 03:00:00,9007,9012,47.11,47.86,88.01,0.0,5.5,3.8,4.9,1498.064082,2.0,6,9
4,2022-03-23 04:00:00,9043,9023,46.95,47.03,87.98,0.0,4.6,2.0,4.5,1507.651054,2.0,6,10


In [13]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24244 entries, 0 to 24243
Data columns (total 14 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   datetime_                       24244 non-null  datetime64[ns]
 1   alberta_internal_load           24244 non-null  int64         
 2   forecast_alberta_internal_load  24244 non-null  int64         
 3   pool_price                      24244 non-null  float64       
 4   forecast_pool_price             24244 non-null  float64       
 5   rolling_30day_avg_price         24244 non-null  float64       
 6   solar_generation                24244 non-null  float64       
 7   temp_calgary                    24244 non-null  float64       
 8   temp_edmonton                   24244 non-null  float64       
 9   temp_fortmc                     24244 non-null  float64       
 10  wind_generation                 24244 non-null  float64       
 11  ws

In [14]:
merged_df.shape

(24244, 14)

In [15]:
merged_df.isnull().sum()

datetime_                         0
alberta_internal_load             0
forecast_alberta_internal_load    0
pool_price                        0
forecast_pool_price               0
rolling_30day_avg_price           0
solar_generation                  0
temp_calgary                      0
temp_edmonton                     0
temp_fortmc                       0
wind_generation                   0
ws_calgary                        2
ws_edmonton                       0
ws_fortmc                         0
dtype: int64

#### Feature Engineering

##### time based variables

In [16]:
merged_df['hour_of_day'] = merged_df['datetime_'].dt.hour
merged_df['day_of_week'] = merged_df['datetime_'].dt.dayofweek
merged_df['day_of_month'] = merged_df['datetime_'].dt.day
# Calculate the week of the month (from 1 to 5)
merged_df['week_of_month'] = merged_df['datetime_'].apply(lambda x: (x.day - 1) // 7 + 1)
merged_df['month'] = merged_df['datetime_'].dt.month
merged_df['year'] = merged_df['datetime_'].dt.year

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year
0,2022-03-23 00:00:00,9146,9170,44.21,41.26,88.14,0.0,6.4,2.9,5.4,1543.927681,10.0,13,5,0,2,23,4,3,2022
1,2022-03-23 01:00:00,9083,9075,47.55,47.55,88.09,0.0,8.9,3.5,5.6,1548.392596,7.0,13,3,1,2,23,4,3,2022
2,2022-03-23 02:00:00,9020,9029,47.55,47.55,88.05,0.0,6.7,4.2,6.0,1523.83849,4.0,12,6,2,2,23,4,3,2022
3,2022-03-23 03:00:00,9007,9012,47.11,47.86,88.01,0.0,5.5,3.8,4.9,1498.064082,2.0,6,9,3,2,23,4,3,2022
4,2022-03-23 04:00:00,9043,9023,46.95,47.03,87.98,0.0,4.6,2.0,4.5,1507.651054,2.0,6,10,4,2,23,4,3,2022


##### seasonality

In [17]:
# Define a function to determine the season
def get_season(month):
    if month in [5, 6, 7, 8, 9]:
        return 0 # Summer
    else:
        return 1 # Winter

# Apply the function to create a new column 'season'
merged_df['is_winter'] = merged_df['month'].apply(get_season)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter
0,2022-03-23 00:00:00,9146,9170,44.21,41.26,88.14,0.0,6.4,2.9,5.4,1543.927681,10.0,13,5,0,2,23,4,3,2022,1
1,2022-03-23 01:00:00,9083,9075,47.55,47.55,88.09,0.0,8.9,3.5,5.6,1548.392596,7.0,13,3,1,2,23,4,3,2022,1
2,2022-03-23 02:00:00,9020,9029,47.55,47.55,88.05,0.0,6.7,4.2,6.0,1523.83849,4.0,12,6,2,2,23,4,3,2022,1
3,2022-03-23 03:00:00,9007,9012,47.11,47.86,88.01,0.0,5.5,3.8,4.9,1498.064082,2.0,6,9,3,2,23,4,3,2022,1
4,2022-03-23 04:00:00,9043,9023,46.95,47.03,87.98,0.0,4.6,2.0,4.5,1507.651054,2.0,6,10,4,2,23,4,3,2022,1


##### pool_price_lagged_values_till_24_hours

In [18]:
def generate_lagged_features(df, column, num_lags):
    for lag in range(1, num_lags + 1):
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

# Generate 24-hour lagged values for the pool_price column
merged_df = generate_lagged_features(merged_df, 'pool_price', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24
0,2022-03-23 00:00:00,9146,9170,44.21,41.26,88.14,0.0,6.4,2.9,5.4,1543.927681,10.0,13,5,0,2,23,4,3,2022,1,,,,,,,,,,,,,,,,,,,,,,,,
1,2022-03-23 01:00:00,9083,9075,47.55,47.55,88.09,0.0,8.9,3.5,5.6,1548.392596,7.0,13,3,1,2,23,4,3,2022,1,44.21,,,,,,,,,,,,,,,,,,,,,,,
2,2022-03-23 02:00:00,9020,9029,47.55,47.55,88.05,0.0,6.7,4.2,6.0,1523.83849,4.0,12,6,2,2,23,4,3,2022,1,47.55,44.21,,,,,,,,,,,,,,,,,,,,,,
3,2022-03-23 03:00:00,9007,9012,47.11,47.86,88.01,0.0,5.5,3.8,4.9,1498.064082,2.0,6,9,3,2,23,4,3,2022,1,47.55,47.55,44.21,,,,,,,,,,,,,,,,,,,,,
4,2022-03-23 04:00:00,9043,9023,46.95,47.03,87.98,0.0,4.6,2.0,4.5,1507.651054,2.0,6,10,4,2,23,4,3,2022,1,47.11,47.55,47.55,44.21,,,,,,,,,,,,,,,,,,,,


##### AIL_lagged_values_till_24_hours

In [19]:
# Generate 24-hour lagged values for the AIL column
merged_df = generate_lagged_features(merged_df, 'alberta_internal_load', 24)

# Display the updated dataframe
display(merged_df.head())

Unnamed: 0,datetime_,alberta_internal_load,forecast_alberta_internal_load,pool_price,forecast_pool_price,rolling_30day_avg_price,solar_generation,temp_calgary,temp_edmonton,temp_fortmc,wind_generation,ws_calgary,ws_edmonton,ws_fortmc,hour_of_day,day_of_week,day_of_month,week_of_month,month,year,is_winter,pool_price_lag_1,pool_price_lag_2,pool_price_lag_3,pool_price_lag_4,pool_price_lag_5,pool_price_lag_6,pool_price_lag_7,pool_price_lag_8,pool_price_lag_9,pool_price_lag_10,pool_price_lag_11,pool_price_lag_12,pool_price_lag_13,pool_price_lag_14,pool_price_lag_15,pool_price_lag_16,pool_price_lag_17,pool_price_lag_18,pool_price_lag_19,pool_price_lag_20,pool_price_lag_21,pool_price_lag_22,pool_price_lag_23,pool_price_lag_24,alberta_internal_load_lag_1,alberta_internal_load_lag_2,alberta_internal_load_lag_3,alberta_internal_load_lag_4,alberta_internal_load_lag_5,alberta_internal_load_lag_6,alberta_internal_load_lag_7,alberta_internal_load_lag_8,alberta_internal_load_lag_9,alberta_internal_load_lag_10,alberta_internal_load_lag_11,alberta_internal_load_lag_12,alberta_internal_load_lag_13,alberta_internal_load_lag_14,alberta_internal_load_lag_15,alberta_internal_load_lag_16,alberta_internal_load_lag_17,alberta_internal_load_lag_18,alberta_internal_load_lag_19,alberta_internal_load_lag_20,alberta_internal_load_lag_21,alberta_internal_load_lag_22,alberta_internal_load_lag_23,alberta_internal_load_lag_24
0,2022-03-23 00:00:00,9146,9170,44.21,41.26,88.14,0.0,6.4,2.9,5.4,1543.927681,10.0,13,5,0,2,23,4,3,2022,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2022-03-23 01:00:00,9083,9075,47.55,47.55,88.09,0.0,8.9,3.5,5.6,1548.392596,7.0,13,3,1,2,23,4,3,2022,1,44.21,,,,,,,,,,,,,,,,,,,,,,,,9146.0,,,,,,,,,,,,,,,,,,,,,,,
2,2022-03-23 02:00:00,9020,9029,47.55,47.55,88.05,0.0,6.7,4.2,6.0,1523.83849,4.0,12,6,2,2,23,4,3,2022,1,47.55,44.21,,,,,,,,,,,,,,,,,,,,,,,9083.0,9146.0,,,,,,,,,,,,,,,,,,,,,,
3,2022-03-23 03:00:00,9007,9012,47.11,47.86,88.01,0.0,5.5,3.8,4.9,1498.064082,2.0,6,9,3,2,23,4,3,2022,1,47.55,47.55,44.21,,,,,,,,,,,,,,,,,,,,,,9020.0,9083.0,9146.0,,,,,,,,,,,,,,,,,,,,,
4,2022-03-23 04:00:00,9043,9023,46.95,47.03,87.98,0.0,4.6,2.0,4.5,1507.651054,2.0,6,10,4,2,23,4,3,2022,1,47.11,47.55,47.55,44.21,,,,,,,,,,,,,,,,,,,,,9007.0,9020.0,9083.0,9146.0,,,,,,,,,,,,,,,,,,,,


#### Save Data

In [20]:
merged_df.to_csv('/home/kevin/Downloads/BESS/Jobs/Retraining/data/raw/merged_df.csv', index=False)