In [None]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px
from sktime.forecasting.model_selection import temporal_train_test_split
import time

import warnings

warnings.filterwarnings("ignore")

In [None]:
import sys
sys.path.append("../utils/")
import pipeline_helpers as ph

### Pipeline to evaluate the optimized hyperparameters on the dates where AESO predictions are available.
- Updates pool price one at a time.
- AESO complete 6 hour predictions were logged for the dates from 26th May 2023 to 31st May 2023.

In [None]:
X = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/complete_data/features.csv",
    parse_dates=["date"],
    index_col="date",
)

y = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/complete_data/target.csv",
    parse_dates=["date"],
    index_col="date",
)

X = X.sort_values(by="date")
X = X.asfreq("H")
y = y.sort_values(by="date")
y = y.asfreq("H")

In [None]:
# Train test split
forecast_len = 12

# Split the data into training and test sets by slicing dates
X_train = X.loc[:"2023-05-25"]
y_train = y.loc[:"2023-05-25"]


y_test = y.loc["2023-05-26":]
X_test = X.loc["2023-05-26":]

y_train = y_train.asfreq("H")
y_test = y_test.asfreq("H")
X_train = X_train.asfreq("H")
X_test = X_test.asfreq("H")

In [None]:
lgbm_pipeline = ph.initialize_optimized_lgbm_forecaster()
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [None]:
forecast_len = 12
step_length = 1

In [None]:
# start stop watch to time the training
start_time = time.time()
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)
end_time = time.time()
print(f"Fit time: {end_time - start_time} seconds")

In [None]:
fit_time = end_time - start_time

In [None]:
# start stop watch to time the training
start_time = time.time()
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, 1, forecast_len, verbose=True)
end_time = time.time()
print(f"Generating Predictions Time: {end_time - start_time} seconds")

In [None]:
prediction_time = end_time - start_time

In [None]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

In [None]:
predictions = ph.generate_step_predictions(rolling_prediction_df, y_test, forecast_len)

In [None]:
actuals, rmses = ph.generate_step_errors(predictions, y_test, forecast_len)

In [None]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

In [None]:
data = {
    f"{step}_step_rmse": [rmse]
    for step, rmse in zip(range(1, forecast_len + 1), rmses)
}

error_df = pd.DataFrame(data)
error_df["avg_fold_rmse"] = round(np.mean(rmse_list), 2)
error_df["fit_time"] = fit_time
error_df["prediction_time"] = prediction_time 

# save the error_df
error_df.to_csv("aeso_test_results.csv", index=False)