In [1]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

In [2]:
import sys
sys.path.append("../utils/")

In [3]:
import pipeline_helpers as ph

### Pipeline to evaluate the Optimized Hyperparameters on the test set
- Update pool price 12 steps at a time.

In [4]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

In [5]:
X_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/X_test.csv",
    parse_dates=["date"],
    index_col="date",
)

y_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/y_test.csv",
    parse_dates=["date"],
    index_col="date",
)

X_test = X_test.sort_values(by="date")
X_test = X_test.asfreq("H")
y_test = y_test.sort_values(by="date")
y_test = y_test.asfreq("H")

In [6]:
# Adjust the test set based on how many hours we want to predict
X_test = X_test[:"2023-02-03"]
y_test = y_test[:"2023-02-03"]

In [7]:
# Adjust the params to test run the pipeline. If not specified, the default optimized parameters will be used.
lgbm_pipeline = ph.initialize_optimized_lgbm_forecaster(n_estimators=1)
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [8]:
forecast_len = 12
step_length = 12

In [9]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)



In [10]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, step_length, forecast_len, verbose=False)



In [11]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

Average RMSE for each fold: 86.37176995965294
STD RMSE for each fold: 36.23425632938713


In [12]:
# Print Average RMSE of all folds
print(f"Average RMSE for each fold: {np.mean(rmse_list)}")

Average RMSE for each fold: 86.37176995965294


In [13]:
# Print Std RMSE of all folds
print(f"RMSE Standard Deviation: {np.std(rmse_list)}")

RMSE Standard Deviation: 36.23425632938713


In [14]:
y_hist = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/filtered_target_medium.csv",
    parse_dates=["date"],
    index_col="date",
)

y_hist = y_hist.sort_values(by="date")
y_hist = y_hist.asfreq("H")

In [15]:
ddf = ph.get_plotting_df(fold_actuals=fold_actuals, fold_predictions_list=fold_predictions_list, y_hist=y_hist)

In [16]:
fig = px.line(ddf, x="periodstep", y=["HistoricalPrice", "FuturePrice", "Predicted"], animation_frame="timestep")
fig.update_layout(height=700)  
fig.show()

In [17]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

One step prediction errors for AESO forecasts: 51.68 CAD/MWh.
          As these are one step predictions, the error should be lesser than ours since ours is 12 step prediction errors.


Unnamed: 0,begin_datetime_utc,begin_datetime_mpt,pool_price,forecast_pool_price,rolling_30day_avg,actual,forecast
0,2023-02-01 07:00,2023-02-01 00:00,65.51,65.67,127.15,65.51,65.67
1,2023-02-01 08:00,2023-02-01 01:00,57.34,59.14,127.13,57.34,59.14
2,2023-02-01 09:00,2023-02-01 02:00,55.57,55.72,127.11,55.57,55.72
3,2023-02-01 10:00,2023-02-01 03:00,56.42,55.72,127.09,56.42,55.72
4,2023-02-01 11:00,2023-02-01 04:00,59.36,59.57,127.07,59.36,59.57
...,...,...,...,...,...,...,...
67,2023-02-04 02:00,2023-02-03 19:00,53.98,56.99,125.02,53.98,56.99
68,2023-02-04 03:00,2023-02-03 20:00,50.09,55.55,124.95,50.09,55.55
69,2023-02-04 04:00,2023-02-03 21:00,38.74,39.5,124.76,38.74,39.50
70,2023-02-04 05:00,2023-02-03 22:00,40.00,44.29,124.59,40.00,44.29
