In [60]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta, timezone

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [11]:
# target variable
price = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\price_01062024_31122024.csv')

# independent variables
ail = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\AIL_01062024_31122024.csv')
solar = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\solar_generation_june_dec_2024_2.csv')
wind = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\wind_generation_june_dec_2024_2.csv')
temp_calgary = pd.read_csv('D:/Python Projects/git_projects/BESS/data/data_June2024_Dec2024/temperature_calgary_01062024_31122024.csv')
temp_edmonton = pd.read_csv('D:/Python Projects/git_projects/BESS/data/data_June2024_Dec2024/temperature_edmonton_01062024_31122024.csv')
temp_fortmc = pd.read_csv('D:/Python Projects/git_projects/BESS/data/data_June2024_Dec2024/temperature_fortmc_01062024_31122024.csv')
ws_calgary = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\windspeed_calgary_01062024_31122024.csv')
ws_edmonton = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\windspeed_edmonton_01062024_31122024.csv')
ws_fortmc = pd.read_csv('D:\Python Projects\git_projects\BESS\data\data_June2024_Dec2024\windspeed_fortmc_01062024_31122024.csv')

In [49]:
price.rename(columns={'begin_datetime_mpt': 'datetime_', 'rolling_30day_avg': 'rolling_30day_avg_price'}, inplace=True)

ail.rename(columns={'begin_datetime_mpt': 'datetime_'}, inplace=True)

solar.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'solar_generation'}, inplace=True)

wind.rename(columns={'Date (MPT)': 'datetime_', 'Volume':'wind_generation'}, inplace=True)

temp_calgary.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_calgary'}, inplace=True)

temp_edmonton.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_edmonton'}, inplace=True)

temp_fortmc.rename(columns={'Timestamp_mst': 'datetime_', 'Temperature (degree C)':'temp_fortmc'}, inplace=True)

ws_calgary.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_calgary'}, inplace=True)

ws_edmonton.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_edmonton'}, inplace=True)

ws_fortmc.rename(columns={'Timestamp_mst': 'datetime_', 'WIND_SPEED':'ws_fortmc'}, inplace=True)

In [50]:
def drop_unnamed_columns(df):
    """
    Drops all columns with 'Unnamed' in the column name from a DataFrame.
    
    Parameters:
        df (pd.DataFrame): Input DataFrame.
        
    Returns:
        pd.DataFrame: DataFrame with unnamed columns removed.
    """
    return df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [51]:
price_cleaned = drop_unnamed_columns(price)
ail_cleaned = drop_unnamed_columns(ail)
solar_cleaned = drop_unnamed_columns(solar)
wind_cleaned = drop_unnamed_columns(wind)
temp_calgary_cleaned = drop_unnamed_columns(temp_calgary)
temp_edmonton_cleaned = drop_unnamed_columns(temp_edmonton)
temp_fortmc_cleaned = drop_unnamed_columns(temp_fortmc)
ws_calgary_cleaned = drop_unnamed_columns(ws_calgary)
ws_edmonton_cleaned = drop_unnamed_columns(ws_edmonton)
ws_fortmc_cleaned = drop_unnamed_columns(ws_fortmc)

In [52]:
ws_calgary_cleaned.head()

Unnamed: 0,datetime_,ws_calgary
0,2024-06-01 00:00:00,6.0
1,2024-06-01 01:00:00,10.0
2,2024-06-01 02:00:00,13.0
3,2024-06-01 03:00:00,7.0
4,2024-06-01 04:00:00,3.0


In [None]:
def create_time_lagged_data(df, time_column, value_column, num_lags=24):
    # Sort data by time
    df = df.sort_values(by=time_column)

    # Create lagged columns
    for lag in range(1, num_lags + 1):
        df[f'{value_column}_lag_{lag}'] = df[value_column].shift(lag)

    return df


In [53]:
df_price_lagged = create_time_lagged_data(price_cleaned,'datetime_','pool_price')
df_ail_lagged_1 = create_time_lagged_data(ail_cleaned, 'datetime_', 'alberta_internal_load')
df_ail_lagged = create_time_lagged_data(df_ail_lagged_1, 'datetime_', 'forecast_alberta_internal_load')
df_solar_lagged = create_time_lagged_data(solar_cleaned, 'datetime_', 'solar_generation')
df_wind_lagged = create_time_lagged_data(wind_cleaned, 'datetime_', 'wind_generation')
df_temp_calgary_lagged = create_time_lagged_data(temp_calgary_cleaned, 'datetime_', 'temp_calgary')
df_temp_edmonton_lagged = create_time_lagged_data(temp_edmonton_cleaned, 'datetime_', 'temp_edmonton')
df_temp_fortmc_lagged = create_time_lagged_data(temp_fortmc_cleaned, 'datetime_', 'temp_fortmc')
df_ws_calgary_lagged = create_time_lagged_data(ws_calgary_cleaned, 'datetime_', 'ws_calgary')
df_ws_edmonton_lagged = create_time_lagged_data(ws_edmonton_cleaned, 'datetime_', 'ws_edmonton')
df_ws_fortmc_lagged = create_time_lagged_data(ws_fortmc_cleaned, 'datetime_', 'ws_fortmc')

In [59]:
df_ws_calgary_lagged.head()

Unnamed: 0,datetime_,ws_calgary,ws_calgary_lag_1,ws_calgary_lag_2,ws_calgary_lag_3,ws_calgary_lag_4,ws_calgary_lag_5,ws_calgary_lag_6,ws_calgary_lag_7,ws_calgary_lag_8,ws_calgary_lag_9,ws_calgary_lag_10,ws_calgary_lag_11,ws_calgary_lag_12,ws_calgary_lag_13,ws_calgary_lag_14,ws_calgary_lag_15,ws_calgary_lag_16,ws_calgary_lag_17,ws_calgary_lag_18,ws_calgary_lag_19,ws_calgary_lag_20,ws_calgary_lag_21,ws_calgary_lag_22,ws_calgary_lag_23,ws_calgary_lag_24
0,2024-06-01 00:00:00,6.0,,,,,,,,,,,,,,,,,,,,,,,,
1,2024-06-01 01:00:00,10.0,6.0,,,,,,,,,,,,,,,,,,,,,,,
2,2024-06-01 02:00:00,13.0,10.0,6.0,,,,,,,,,,,,,,,,,,,,,,
3,2024-06-01 03:00:00,7.0,13.0,10.0,6.0,,,,,,,,,,,,,,,,,,,,,
4,2024-06-01 04:00:00,3.0,7.0,13.0,10.0,6.0,,,,,,,,,,,,,,,,,,,,
