In [1]:
# pip install sktime

In [2]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import HistGradientBoostingRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings
warnings.filterwarnings('ignore')

In [3]:
def initialize_arima_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    residual_forecaster = ARIMA()

    # Create the TransformedTargetForecaster pipeline
    pipe = TransformedTargetForecaster(
        [
            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            ("residual_forecaster", residual_forecaster),
        ]
    )

    return pipe

def initialize_elasticnet_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")

    # Create the TransformedTargetForecaster pipeline
    pipe = TransformedTargetForecaster(
    [
        ("log_transformer", LogTransformer()),
        ("deseasonalizer_daily", deseasonalizer_daily),
        ("forecast", make_reduction(
                ElasticNetCV(n_jobs=-1),
                window_length=24,
                strategy="direct",
            )
        )
    ]
    )

    return pipe

def initialize_rf_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")

    # Create the TransformedTargetForecaster pipeline
    pipe = TransformedTargetForecaster(
        [
            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            ("forecast", make_reduction(
                RandomForestRegressor(n_estimators=200, n_jobs=-1),
                window_length=24,
                strategy="direct",
            )
            )
        ]
    )

    return pipe

def initialize_gb_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
        [

            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            ("forecast", make_reduction(
                GradientBoostingRegressor(n_estimators=200),
                window_length=24,
                strategy="direct",
            )
        )
        ]
    )

    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
        [
            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            ("forecast", make_reduction(
                HistGradientBoostingRegressor(),
                window_length=24,
                strategy="direct",
            )
        )
        ]
    )

    return pipe

def initialize_ets_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
        [
            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            ("forecast", make_reduction(
                AutoETS(auto=True,sp=24, n_jobs=-1),
                window_length=24,
                strategy="direct",
            )
        )
        ]
    )

    return pipe

In [4]:
arima_pipeline = initialize_arima_forecaster()
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
ets_pipeline = initialize_ets_forecaster()

forecasting_models = {
    'elasticnet_pipeline': elasticnet_pipeline,
    'rf_pipeline': rf_pipeline,
    'gb_pipeline': gb_pipeline,
    'hist_pipeline': hist_pipeline,
    'ets_pipeline': ets_pipeline
}

forecaster = MultiplexForecaster(
    forecasters=[
        ("elasticnet_pipeline", elasticnet_pipeline),
        ("rf_pipeline", rf_pipeline),
        ("gb_pipeline", gb_pipeline),
        ("hist_pipeline", hist_pipeline),
    ]
)

forecaster_param_grid = {"selected_forecaster": ["elasticnet_pipeline", "rf_pipeline", "gb_pipeline", "hist_pipeline"]}

In [8]:
price_old_df = pd.read_csv('https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv', parse_dates=['Date (MST)'], index_col='Date (MST)')
price_old_df = price_old_df.sort_values(by='Date (MST)')
price_old_df = price_old_df['2022-02':]
price_old_df = price_old_df.asfreq('H')

price_df = price_old_df['price']

y_train, y_test = temporal_train_test_split(price_df, test_size=48)

In [9]:
cv = ExpandingWindowSplitter(initial_window=int(len(y_train) * 0.985), step_length=1, fh=np.arange(1, 13))
n_splits = cv.get_n_splits(y_train)
print(f"Number of Folds = {n_splits}")

Number of Folds = 141


In [None]:
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredScaledError

list_models = ["elasticnet_pipeline", "rf_pipeline", "gb_pipeline", "hist_pipeline"]

rmse_cv_results = []
rmse_cv_std = []
for i in list_models:
    forecaster.set_params(**{"selected_forecaster": i})
    results = evaluate(
        forecaster=forecaster, y=y_train, cv=cv, strategy="refit", return_data=True, scoring=MeanSquaredScaledError(square_root=True), backend="loky"
    )
    rmse = results["test_MeanSquaredScaledError"].mean()
    rmse_std = results["test_MeanSquaredScaledError"].std()
    rmse_cv_results.append(rmse)
    rmse_cv_std.append(rmse_std)

In [None]:
rmse_cv_results_df = pd.DataFrame(
    {"Model": list_models, "RMSE_CV": rmse_cv_results, "RMSE_CV_STD": rmse_cv_std}
).sort_values(by=["RMSE_CV"])
rmse_cv_results_df

Unnamed: 0,Model,RMSE_CV,RMSE_CV_STD
2,gb_pipeline,2.204175,0.051909
3,hist_pipeline,2.229803,0.100026
1,rf_pipeline,2.259823,0.089031
0,elasticnet_pipeline,2.309153,0.013852
