In [1]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append("../utils/")

In [3]:
import pipeline_helpers as ph

### Pipeline to evaluate the optimized hyperparameters on the test set
- Update pool price one at a time.

In [4]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

In [5]:
X_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/X_test.csv",
    parse_dates=["date"],
    index_col="date",
)

y_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/y_test.csv",
    parse_dates=["date"],
    index_col="date",
)

X_test = X_test.sort_values(by="date")
X_test = X_test.asfreq("H")
y_test = y_test.sort_values(by="date")
y_test = y_test.asfreq("H")

In [6]:
y_hist = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/filtered_target_medium.csv",
    parse_dates=["date"],
    index_col="date",
)

y_hist = y_hist.sort_values(by="date")
y_hist = y_hist.asfreq("H")

In [7]:
X_test = X_test[:"2023-02-28"]
y_test = y_test[:"2023-02-28"]

In [8]:
lgbm_pipeline = ph.initialize_optimized_lgbm_forecaster()
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [9]:
forecast_len = 12
step_length = 1

In [10]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)

In [11]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, 1, forecast_len, verbose=True)

Updating with actual values at 2023-02-01 00:00:00
Cut off before update: DatetimeIndex(['2023-01-31 23:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Cut off after update: DatetimeIndex(['2023-02-01'], dtype='datetime64[ns]', name='date', freq='H')
Predicting for DatetimeIndex(['2023-02-01'], dtype='datetime64[ns]', name='date', freq=None)
Update and prediction done for 2023-02-01 00:00:00
----------------------------------------------------------------------------------
Updating with actual values at 2023-02-01 01:00:00
Cut off before update: DatetimeIndex(['2023-02-01'], dtype='datetime64[ns]', name='date', freq='H')
Cut off after update: DatetimeIndex(['2023-02-01 01:00:00'], dtype='datetime64[ns]', name='date', freq='H')
Predicting for DatetimeIndex(['2023-02-01 02:00:00'], dtype='datetime64[ns]', name='date', freq=None)
Update and prediction done for 2023-02-01 01:00:00
----------------------------------------------------------------------------------
Updating with actua

In [12]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

Average RMSE for each fold: 110.13433075135933
STD RMSE for each fold: 106.28125998616316


In [13]:
predictions = ph.generate_step_predictions(rolling_prediction_df, y_test, forecast_len)

In [14]:
actuals, rmses = ph.generate_step_errors(predictions, y_test, forecast_len)

1 Step RMSE for model: 107.11046737284137
2 Step RMSE for model: 136.06268056051417
3 Step RMSE for model: 149.2198136866857
4 Step RMSE for model: 152.4145569377403
5 Step RMSE for model: 154.82622498156888
6 Step RMSE for model: 156.02809931498643
7 Step RMSE for model: 158.49895748463464
8 Step RMSE for model: 159.54432905756894
9 Step RMSE for model: 161.89549550029065
10 Step RMSE for model: 164.1164286975117
11 Step RMSE for model: 163.75973373221444
12 Step RMSE for model: 163.43778999050693


In [15]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

One step prediction errors for AESO forecasts: 99.49 CAD/MWh.
As these are one step predictions, the error should be lesser than ours since ours is 12 step prediction errors.
