In [1]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px
from sktime.forecasting.model_selection import temporal_train_test_split

import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append("../utils/")

In [3]:
import pipeline_helpers as ph

### Pipeline to evaluate the optimized hyperparameters on the dates where AESO predictions are available.
- Update pool price one at a time.

In [4]:
X = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/complete_data/features.csv",
    parse_dates=["date"],
    index_col="date",
)

y = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/complete_data/target.csv",
    parse_dates=["date"],
    index_col="date",
)

X = X.sort_values(by="date")
X = X.asfreq("H")
y = y.sort_values(by="date")
y = y.asfreq("H")

In [5]:
# Train test split
forecast_len = 12

test_size = 24 * 4

y_train, y_test_full, X_train, X_test = temporal_train_test_split(
    y, X, test_size=test_size + forecast_len
)

y_test = y_test_full.iloc[:-forecast_len]

y_train = y_train.asfreq("H")
y_test = y_test.asfreq("H")
X_train = X_train.asfreq("H")
X_test = X_test.asfreq("H")

In [6]:
lgbm_pipeline = ph.initialize_optimized_lgbm_forecaster()
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [7]:
forecast_len = 12
step_length = 1

In [8]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)

In [9]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test_full, fh, 1, forecast_len, verbose=True)

Updating with actual values at 2023-05-26 13:00:00
Cut off before update: DatetimeIndex(['2023-05-26 12:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Cut off after update: DatetimeIndex(['2023-05-26 13:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Predicting for DatetimeIndex(['2023-05-26 13:00:00'], dtype='datetime64[ns]', name='date', freq=None)
Update and prediction done for 2023-05-26 13:00:00
----------------------------------------------------------------------------------
Updating with actual values at 2023-05-26 14:00:00
Cut off before update: DatetimeIndex(['2023-05-26 13:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Cut off after update: DatetimeIndex(['2023-05-26 14:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Predicting for DatetimeIndex(['2023-05-26 15:00:00'], dtype='datetime64[ns]', name='date', freq=None)
Update and prediction done for 2023-05-26 14:00:00
---------------------------------------------------------------------------

In [10]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test_full)

Average RMSE for each fold: 101.10226170746382
STD RMSE for each fold: 63.18022724178824


In [11]:
predictions = ph.generate_step_predictions(rolling_prediction_df, y_test_full, forecast_len)

In [12]:
actuals, rmses = ph.generate_step_errors(predictions, y_test_full, forecast_len)

1 Step RMSE for model: 102.52453084157199
2 Step RMSE for model: 114.7255445995059
3 Step RMSE for model: 121.72393309487403
4 Step RMSE for model: 122.37212353865517
5 Step RMSE for model: 126.80482462456261
6 Step RMSE for model: 134.9974078893695
7 Step RMSE for model: 125.10210636432656
8 Step RMSE for model: 122.42183653227043
9 Step RMSE for model: 114.38081712743362
10 Step RMSE for model: 111.21469875102551
11 Step RMSE for model: 114.85788127383957
12 Step RMSE for model: 110.56031491004418


In [13]:
ddf = ph.get_plotting_df(fold_actuals=fold_actuals, fold_predictions_list=fold_predictions_list, y_hist=y)

In [None]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

RMSE for the predictions by AESO for the same time period as the test set: 117.54 CAD/MWh
