In [16]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

In [17]:
import sys
sys.path.append("../utils/")

In [18]:
import pipeline_helpers as ph

### Pipeline to evaluate the default hyperparameters on the test set
- Update pool price 12 steps at a time.

In [19]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

In [20]:
X_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/X_test.csv",
    parse_dates=["date"],
    index_col="date",
)

y_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/y_test.csv",
    parse_dates=["date"],
    index_col="date",
)

X_test = X_test.sort_values(by="date")
X_test = X_test.asfreq("H")
y_test = y_test.sort_values(by="date")
y_test = y_test.asfreq("H")

In [21]:
# Uncomment to run on a subset of the data
# X_test = X_test[:"2023-02-03"]
# y_test = y_test[:"2023-02-03"]

In [22]:
lgbm_pipeline = ph.initialize_default_lgbm_forecaster()
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [23]:
forecast_len = 12
step_length = 12

In [24]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)

In [25]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, step_length, forecast_len, verbose=False)

In [26]:
rolling_prediction_df

Unnamed: 0,cutoff_hour_23,cutoff_hour_11,cutoff_hour_23.1,cutoff_hour_11.1,cutoff_hour_23.2,cutoff_hour_11.2,cutoff_hour_23.3,cutoff_hour_11.3,cutoff_hour_23.4,cutoff_hour_11.4,...,cutoff_hour_11.5,cutoff_hour_23.5,cutoff_hour_11.6,cutoff_hour_23.6,cutoff_hour_11.7,cutoff_hour_23.7,cutoff_hour_11.8,cutoff_hour_23.8,cutoff_hour_11.9,cutoff_hour_23.9
2023-02-01 00:00:00,87.273249,,,,,,,,,,...,,,,,,,,,,
2023-02-01 01:00:00,56.974189,,,,,,,,,,...,,,,,,,,,,
2023-02-01 02:00:00,62.107441,,,,,,,,,,...,,,,,,,,,,
2023-02-01 03:00:00,89.923774,,,,,,,,,,...,,,,,,,,,,
2023-02-01 04:00:00,47.414508,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-31 07:00:00,,,,,,,,,,,...,,,,,,,,,,203.903806
2023-05-31 08:00:00,,,,,,,,,,,...,,,,,,,,,,274.721459
2023-05-31 09:00:00,,,,,,,,,,,...,,,,,,,,,,200.501441
2023-05-31 10:00:00,,,,,,,,,,,...,,,,,,,,,,252.264336


In [27]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

Average RMSE for each fold: 167.06530665588102
STD RMSE for each fold: 95.56903508365684


In [28]:
y_hist = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/filtered_target_medium.csv",
    parse_dates=["date"],
    index_col="date",
)

y_hist = y_hist.sort_values(by="date")
y_hist = y_hist.asfreq("H")

In [29]:
ddf = ph.get_plotting_df(fold_actuals=fold_actuals, fold_predictions_list=fold_predictions_list, y_hist=y_hist)

In [30]:
fig = px.line(ddf, x="periodstep", y=["HistoricalPrice", "FuturePrice", "Predicted"], animation_frame="timestep")
fig.update_layout(height=700)  
fig.show()

In [31]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

One step prediction errors for AESO forecasts: 118.55 CAD/MWh.
As these are one step predictions, the error should be lesser than ours since ours is 12 step prediction errors.
