In [1]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append("../utils/")

In [3]:
import pipeline_helpers as ph

### Pipeline to evaluate the default hyperparameters on the test set
- Update pool price one at a time.

In [4]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

In [5]:
X_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/X_test.csv",
    parse_dates=["date"],
    index_col="date",
)

y_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/y_test.csv",
    parse_dates=["date"],
    index_col="date",
)

X_test = X_test.sort_values(by="date")
X_test = X_test.asfreq("H")
y_test = y_test.sort_values(by="date")
y_test = y_test.asfreq("H")

In [6]:
X_test = X_test[:"2023-02-03"]
y_test = y_test[:"2023-02-03"]

In [7]:
lgbm_pipeline = ph.initialize_default_lgbm_forecaster(n_estimators=1)
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [8]:
forecast_len = 12
step_length = 1

In [9]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)

In [10]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, 1, forecast_len, verbose=False)

In [11]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

In [12]:
# Print Average RMSE of all folds
print(f"Average RMSE for each fold: {np.mean(rmse_list)}")

Average RMSE for each fold: 87.36114874604462


In [13]:
predictions = ph.generate_step_predictions(rolling_prediction_df, y_test, forecast_len)

In [14]:
actuals, rmses = ph.generate_step_errors(predictions, y_test, forecast_len)

In [15]:
# print rmse for each step
for step, rmse in zip(range(1, forecast_len + 1), rmses):
    print(f"{step} Step RMSE for model: {rmse}")

1 Step RMSE for model: 87.86196660240233
2 Step RMSE for model: 89.52852235578926
3 Step RMSE for model: 92.60853322990205
4 Step RMSE for model: 92.53822239134469
5 Step RMSE for model: 92.99105893164197
6 Step RMSE for model: 92.54250318557827
7 Step RMSE for model: 93.46687159125872
8 Step RMSE for model: 93.10866155638999
9 Step RMSE for model: 93.78179953416338
10 Step RMSE for model: 95.50674211186083
11 Step RMSE for model: 97.41722847570227
12 Step RMSE for model: 97.13119600447668


In [16]:
y_hist = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/filtered_target_medium.csv",
    parse_dates=["date"],
    index_col="date",
)

y_hist = y_hist.sort_values(by="date")
y_hist = y_hist.asfreq("H")

In [17]:
ddf = ph.get_plotting_df(fold_actuals=fold_actuals, fold_predictions_list=fold_predictions_list, y_hist=y_hist)

In [18]:
fig = px.line(ddf, x="periodstep", y=["HistoricalPrice", "FuturePrice", "Predicted"], animation_frame="timestep")
fig.update_layout(height=700)  
fig.show()

In [19]:
aeso_predictions_df = ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])
rmse_aeso_predictions = mean_squared_error(aeso_predictions_df['actual'], aeso_predictions_df['forecast'], squared=False)
print(f"RMSE for the predictions by AESO for the same time period as the test set: {round(rmse_aeso_predictions, 2)} CAD/MWh")

RMSE for the predictions by AESO for the same time period as the test set: 51.68 CAD/MWh
