In [9]:
import pandas as pd
import numpy as np
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
import plotly.express as px

import warnings

warnings.filterwarnings("ignore")

In [10]:
import sys
sys.path.append("../utils/")

In [11]:
import pipeline_helpers as ph

### Pipeline to evaluate the optimized hyperparameters on the test set
- Update pool price one at a time.

In [12]:
X_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/X_train.csv",
    parse_dates=["date"],
    index_col="date",
)

y_train = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/train/y_train.csv",
    parse_dates=["date"],
    index_col="date",
)

X_train = X_train.sort_values(by="date")
X_train = X_train.asfreq("H")
y_train = y_train.sort_values(by="date")
y_train = y_train.asfreq("H")

cols_for_log_transform = list(set(X_train.columns) - set(list(X_train.columns[X_train.lt(3).any()])) - set(["weekly_profile"]))

In [13]:
X_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/X_test.csv",
    parse_dates=["date"],
    index_col="date",
)

y_test = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/test/y_test.csv",
    parse_dates=["date"],
    index_col="date",
)

X_test = X_test.sort_values(by="date")
X_test = X_test.asfreq("H")
y_test = y_test.sort_values(by="date")
y_test = y_test.asfreq("H")

In [14]:
y_hist = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/filtered_target_medium.csv",
    parse_dates=["date"],
    index_col="date",
)

y_hist = y_hist.sort_values(by="date")
y_hist = y_hist.asfreq("H")

In [15]:
X_test = X_test[:"2023-02-28"]
y_test = y_test[:"2023-02-28"]

In [16]:
from sklearn.preprocessing import StandardScaler
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.compose import ColumnwiseTransformer
from sktime.transformations.series.boxcox import LogTransformer
from lightgbm import LGBMRegressor

from sktime_custom_reduce import make_reduction
from sktime_custom_pipeline import ForecastingPipeline, TransformedTargetForecaster

def initialize_lgbm_forecaster():
    pipe = ForecastingPipeline(
        steps=[
            ("log_column_transformer", ColumnwiseTransformer(LogTransformer(), columns=cols_for_log_transform)),
            ("std_column_transformer", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_column_transformer", LogTransformer()),
                        ("std_column_transformer", TabularToSeriesAdaptor(StandardScaler())),
                        (
                            "forecast",
                            make_reduction(
                                LGBMRegressor(
                                    device="gpu", num_threads=12, n_estimators=200
                                ),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe

lgbm_pipeline = initialize_lgbm_forecaster()
fh = ForecastingHorizon(np.arange(1, 12 + 1))

In [17]:
forecast_len = 12
step_length = 1

In [18]:
lgbm_pipeline.fit(y=y_train, X=X_train, fh=fh)



In [19]:
rolling_prediction_df = ph.get_rolling_predictions(lgbm_pipeline, X_train, X_test, y_test, fh, 1, forecast_len, verbose=False)



In [20]:
fold_actuals, fold_predictions_list, rmse_list = ph.get_fold_predictions(rolling_prediction_df, y_test)

Average RMSE for each fold: 108.87183849443089
STD RMSE for each fold: 110.4556561365339


In [21]:
predictions = ph.generate_step_predictions(rolling_prediction_df, y_test, forecast_len)

In [22]:
actuals, rmses = ph.generate_step_errors(predictions, y_test, forecast_len)

1 Step RMSE for model: 101.72650850367008
2 Step RMSE for model: 134.6560027563161
3 Step RMSE for model: 148.35716899609062
4 Step RMSE for model: 151.59627591059962
5 Step RMSE for model: 157.6589922649477
6 Step RMSE for model: 163.0940108722065
7 Step RMSE for model: 161.75330051442154
8 Step RMSE for model: 163.44604820394375
9 Step RMSE for model: 165.19338888961204
10 Step RMSE for model: 165.58144819566255
11 Step RMSE for model: 166.12445142534244
12 Step RMSE for model: 168.80087021108417


In [25]:
ph.get_aeso_predictions(y_test.index[0], y_test.index[-1])

One step prediction errors for AESO forecasts: 99.49 CAD/MWh.
As these are one step predictions, the error should be lesser than ours since ours is 12 step prediction errors.


In [30]:
ddf = ph.get_upper_lower_plotting_df(fold_actuals, fold_predictions_list, fold_predictions_list, fold_predictions_list, y_hist)

In [32]:
import plotly.graph_objects as go
ddf["date"] = ddf["index"]

frames = []

labels = [
    "Historical Price",
    "Future Actual Price",
    "Predicted",
    "Predicted Upper",
    "Predicted Lower",
]

for timestep in ddf["timestep"].unique():
    frame = go.Frame(
        data=[
            go.Scatter(
                x=ddf.loc[ddf["timestep"] == timestep, "periodstep"],
                y=ddf.loc[ddf["timestep"] == timestep, label],
                mode="markers+lines",
                name=label,  # Setting the name attribute for each trace
                customdata=ddf.loc[ddf["timestep"] == timestep, ["date"]],
                hovertemplate="<br>Label=%{fullData.name}<br>Price=%{y}<br>Date=%{customdata[0]}<extra></extra>",
            )
            for label in labels
        ]
    )
    frames.append(frame)

fig = go.Figure(
    data=frames[0]["data"],
    layout=go.Layout(
        title=dict(text="Energy Price Forecast Animation", font=dict(size=22)),
        xaxis=dict(ticks="", title_text="", showticklabels=False),
        yaxis=dict(title_text="Price", title_font=dict(size=20)),
        updatemenus=[
            dict(
                type="buttons",
                showactive=False,
                buttons=[
                    dict(
                        label="Play",
                        method="animate",
                        args=[None, {"frame": {"duration": 2000, "redraw": False}}],
                    ),
                    dict(
                        label="Pause",
                        method="animate",
                        args=[
                            [None],
                            {
                                "frame": {"duration": 0, "redraw": False},
                                "mode": "immediate",
                                "fromcurrent": True,
                            },
                        ],
                    ),
                ],
            )
        ],
        annotations=[
            dict(
                x=0.95,
                y=0.95,
                xref="paper",
                yref="paper",
                text=f"Average RMSE of predictions: {round(np.mean(rmse_list), 2)} CAD",
                showarrow=False,
                font=dict(size=20),
            )
        ],
        hovermode="x unified",
        height=700,
    ),
    frames=frames,
)

fig.show()