In [1]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error

import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append("../../utils/")

In [3]:
from sktime_custom_reduce import make_reduction
from lightgbm import LGBMRegressor
from sktime_custom_pipeline import ForecastingPipeline, TransformedTargetForecaster

### Notebook to run hyperparameter optimization for the model

As the ForecastingGridSearchCV and ForecastingRandomizedSearchCV of sktime are not capable of utilizing the warm initialization feature of LightGBM, we have to implement our own hyperparameter optimization. 

We're relying on an expanding window approach here. We consider the initial training window length as Jan 1st 2021 to Jan 31st 2021. We then expand the training window by 12 hours and retrain the model. We repeat this process until we reach the end of the training data. We'll try out different hyperparameter combinations for each training window and evaluate the performance on the validation set. The best performing hyperparameter combination will be used for the final model.

In [4]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

In [5]:
X_test = X_train["2023-01-01":]
y_test = y_train["2023-01-01":]

In [6]:
X_train = X_train[:"2022-12-31"]
y_train = y_train[:"2022-12-31"]

In [7]:
def initialize_lgbm_forecaster(boosting_type, learning_rate, max_depth, num_leaves=None, reg_alpha=None, reg_lambda=None, min_data_in_leaf=None):
    regressor = LGBMRegressor(
        device="gpu",
        n_jobs=-1,
        n_estimators=1000,
        boosting_type=boosting_type,
        learning_rate=learning_rate,
        max_depth=max_depth,
    )

    if num_leaves is not None:
        regressor.num_leaves = num_leaves
    if reg_alpha is not None:
        regressor.reg_alpha = reg_alpha
    if reg_lambda is not None:
        regressor.reg_lambda = reg_lambda
    if min_data_in_leaf is not None:
        regressor.min_data_in_leaf = min_data_in_leaf

    pipe = ForecastingPipeline(
        steps=[
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                regressor,
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe

# initialize pipelines with hyperparameters
pipelines = [
    initialize_lgbm_forecaster("gbdt", 0.01, 20),
    initialize_lgbm_forecaster("dart", 0.01, 20),
    initialize_lgbm_forecaster("gbdt", 0.08, 50, num_leaves=31, reg_alpha=10, reg_lambda=30, min_data_in_leaf=20),
    initialize_lgbm_forecaster("dart", 0.01, 100, num_leaves=70, reg_alpha=30, reg_lambda=10, min_data_in_leaf=30),
    initialize_lgbm_forecaster("dart", 0.15, 150, num_leaves=80, reg_alpha=10, reg_lambda=20, min_data_in_leaf=40),
    initialize_lgbm_forecaster("gbdt", 0.08, -1, num_leaves=31, reg_alpha=20, reg_lambda=10, min_data_in_leaf=30),
    initialize_lgbm_forecaster("dart", 0.01, 15, num_leaves=70, reg_alpha=30, reg_lambda=20, min_data_in_leaf=20),
    initialize_lgbm_forecaster("gbdt", 0.15, 25, num_leaves=80, reg_alpha=10, reg_lambda=30, min_data_in_leaf=40),
    initialize_lgbm_forecaster("dart", 0.08, 40, num_leaves=31, reg_alpha=20, reg_lambda=10, min_data_in_leaf=20),
    initialize_lgbm_forecaster("dart", 0.01, -1, num_leaves=70, reg_alpha=10, reg_lambda=20, min_data_in_leaf=30),
    initialize_lgbm_forecaster("dart", 0.15, 50, num_leaves=80, reg_alpha=30, reg_lambda=30, min_data_in_leaf=40),
    initialize_lgbm_forecaster("gbdt", 0.08, 100, num_leaves=31, reg_alpha=20, reg_lambda=10, min_data_in_leaf=20),
]

fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [8]:
pipeline_assets = []
for i, pipeline in enumerate(pipelines):
    
    print(f"Training pipeline {i+1}...")
    
    rolling_prediction_df = pd.DataFrame()
    pipeline.fit(y=y_train, X=X_train, fh=fh)

    y_pred = pipeline.predict(fh, X=X_train.tail(1))
    y_pred.columns = [f"cutoff_hour_{pipeline.cutoff.hour[0]}"]
    rolling_prediction_df = pd.concat([rolling_prediction_df, y_pred], axis=1)
      
      
    for i in range(0, len(y_test), 12):

            new_observation_y, new_observation_X  = y_test[i:i+12], X_test[i:i+12]
            
            new_observation_y = new_observation_y.asfreq('H')
            new_observation_X = new_observation_X.asfreq('H')

            pipeline.update(y=new_observation_y, X=new_observation_X, update_params=True)

            pipeline.cutoff.freq = 'H'

            cutoff_time = pipeline.cutoff
            prediction_for = cutoff_time + pd.DateOffset(hours=i)

            y_pred = pipeline.predict(fh, X=new_observation_X)
            
            y_pred.columns = [f"cutoff_hour_{pipeline.cutoff.hour[0]}"]
            
            rolling_prediction_df = pd.concat([rolling_prediction_df, y_pred], axis=1)
            
    rmse_list = []
    fold_actuals = []
    fold_predictions_list = []

    for col in range(rolling_prediction_df.shape[1]-1):
        
        fold_predictions = rolling_prediction_df.iloc[:, col].dropna()
        
        fold_indices = fold_predictions.index  

        y_test_subset = y_test.loc[fold_indices]  
        
        rmse = np.sqrt(mean_squared_error(y_test_subset, fold_predictions))  
        
        rmse_list.append(rmse)

        fold_actuals.append(y_test_subset)
        fold_predictions_list.append(fold_predictions)

    print(f"Average RMSE for each fold: {np.mean(rmse_list)}")

    asset_dict = {"actuals": fold_actuals, "predictions": fold_predictions_list, "rmse": rmse_list, "pipeline": pipeline, "rolling_prediction_df": rolling_prediction_df}

    pipeline_assets.append(asset_dict)

Training pipeline 1...
Average RMSE for each fold: 95.72728987761006
Training pipeline 2...
Average RMSE for each fold: 89.29048801316956
Training pipeline 3...
Average RMSE for each fold: 99.46342427422609
Training pipeline 4...
Average RMSE for each fold: 90.59810093166509
Training pipeline 5...
Average RMSE for each fold: 96.93036342804002
Training pipeline 6...
Average RMSE for each fold: 98.0828910076149
Training pipeline 7...
Average RMSE for each fold: 89.72298936169489
Training pipeline 8...
Average RMSE for each fold: 98.4681823243143
Training pipeline 9...
Average RMSE for each fold: 95.44288457500416
Training pipeline 10...
Average RMSE for each fold: 90.0493276150256
Training pipeline 11...
Average RMSE for each fold: 99.01450682378139
Training pipeline 12...
Average RMSE for each fold: 98.07196136334963


In [9]:
lowest_average_rmse = float('inf')
best_model = None

# Iterating over the list of dictionaries
for asset_dict in pipeline_assets:
    avg_rmse = sum(asset_dict["rmse"]) / len(asset_dict["rmse"])  # Calculate the average RMSE
    
    # If this model has a lower average RMSE than the current best model, update the best model and lowest RMSE
    if avg_rmse < lowest_average_rmse:
        lowest_average_rmse = avg_rmse
        best_model = asset_dict

print(f'The model with the lowest average RMSE is: {best_model["pipeline"]}')

The model with the lowest average RMSE is: ForecastingPipeline(steps=[('forecaster',
                            TransformedTargetForecaster(steps=[('forecast',
                                                                DirectTabularRegressionForecaster(estimator=LGBMRegressor(boosting_type='dart', device='gpu', learning_rate=0.01,
              max_depth=20, n_estimators=1000),
                                                                                                  window_length=24))]))])


In [10]:
import pickle

with open("tuning_results.pkl", "wb") as f:
    pickle.dump(pipeline_assets, f)