In [9]:
# pip install sktime


In [10]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter,
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings

warnings.filterwarnings("ignore")

In [11]:
from sklearn.preprocessing import StandardScaler
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor


def initialize_arima_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")

    pipe = ForecastingPipeline(
        steps=[
            ("standardize", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_transformer", LogTransformer()),
                        ("deseasonalizer_daily", deseasonalizer_daily),
                        ("residual_forecaster", ARIMA(1, 0, 1)),
                    ]
                ),
            ),
        ]
    )

    return pipe


def initialize_elasticnet_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    # Create the TransformedTargetForecaster pipeline
    pipe = ForecastingPipeline(
        steps=[
            ("standardize", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_transformer", LogTransformer()),
                        ("deseasonalizer_daily", deseasonalizer_daily),
                        (
                            "forecast",
                            make_reduction(
                                ElasticNetCV(n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe


def initialize_rf_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")

    # Create the TransformedTargetForecaster pipeline
    pipe = ForecastingPipeline(
        steps=[
            ("standardize", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_transformer", LogTransformer()),
                        ("deseasonalizer_daily", deseasonalizer_daily),
                        (
                            "forecast",
                            make_reduction(
                                RandomForestRegressor(n_estimators=200, n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe


def initialize_gb_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = ForecastingPipeline(
        steps=[
            ("standardize", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_transformer", LogTransformer()),
                        ("deseasonalizer_daily", deseasonalizer_daily),
                        (
                            "forecast",
                            make_reduction(
                                GradientBoostingRegressor(n_estimators=200),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = ForecastingPipeline(
        steps=[
            ("standardize", TabularToSeriesAdaptor(StandardScaler())),
            (
                "forecaster",
                TransformedTargetForecaster(
                    [
                        ("log_transformer", LogTransformer()),
                        ("deseasonalizer_daily", deseasonalizer_daily),
                        (
                            "forecast",
                            make_reduction(
                                HistGradientBoostingRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                ),
            ),
        ]
    )

    return pipe


def initialize_ets_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
        [
            ("log_transformer", LogTransformer()),
            ("deseasonalizer_daily", deseasonalizer_daily),
            (
                "forecast",
                make_reduction(
                    AutoETS(auto=True, sp=24, n_jobs=-1),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe

In [12]:
arima_pipeline = initialize_arima_forecaster()
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
ets_pipeline = initialize_ets_forecaster()

forecasting_models = {
    "elasticnet_pipeline": elasticnet_pipeline,
    "rf_pipeline": rf_pipeline,
    "gb_pipeline": gb_pipeline,
    "hist_pipeline": hist_pipeline,
    "ets_pipeline": ets_pipeline,
}

forecaster = MultiplexForecaster(
    forecasters=[
        ("elasticnet_pipeline", elasticnet_pipeline),
        ("rf_pipeline", rf_pipeline),
        ("gb_pipeline", gb_pipeline),
        ("hist_pipeline", hist_pipeline),
    ]
)

In [47]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df["2022-12":]
price_old_df = price_old_df.asfreq("H")

price_ail_df = price_old_df["price"]
X = price_old_df.drop(columns=["price"])

y_train, y_test, X_train, X_test = temporal_train_test_split(
    price_ail_df, X, test_size=48
)

In [18]:
cv = ExpandingWindowSplitter(
    initial_window=int(len(y_train) * 0.996), step_length=1, fh=np.arange(1, 13)
)
n_splits = cv.get_n_splits(y_train)
print(f"Number of Folds = {n_splits}")

Number of Folds = 1


In [20]:
# from plotting import plot_windows, get_windows
# train_windows, test_windows = get_windows(y_train, cv)
# plot_windows(y_train, train_windows, test_windows)


In [19]:
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredScaledError

list_models = ["elasticnet_pipeline"]

rmse_cv_results = []
rmse_cv_std = []
for i in list_models:
    forecaster.set_params(**{"selected_forecaster": i})
    results = evaluate(
        forecaster=forecaster,
        y=y_train,
        X=X_train,
        cv=cv,
        strategy="refit",
        return_data=True,
        scoring=MeanSquaredScaledError(square_root=True),
        backend="loky",
    )
    rmse = results["test_MeanSquaredScaledError"].mean()
    rmse_std = results["test_MeanSquaredScaledError"].std()
    rmse_cv_results.append(rmse)
    rmse_cv_std.append(rmse_std)

In [22]:
rmse_cv_results_df = pd.DataFrame(
    {"Model": list_models, "RMSE_CV": rmse_cv_results, "RMSE_CV_STD": rmse_cv_std}
).sort_values(by=["RMSE_CV"])
rmse_cv_results_df


Unnamed: 0,Model,RMSE_CV,RMSE_CV_STD
0,elasticnet_pipeline,1.024729,0.544594


In [60]:
rf_pipeline.fit(y_train, X_train, fh=ForecastingHorizon(np.arange(1, 13)))

In [21]:
elasticnet_pipeline.fit(y_train, X_train, fh=ForecastingHorizon(np.arange(1, 13)))

In [22]:
elasticnet_pipeline.forecaster_.get_params()


{'steps': [('log_transformer', LogTransformer()),
  ('deseasonalizer_daily', Deseasonalizer(sp=24)),
  ('forecast',
   DirectTabularRegressionForecaster(estimator=ElasticNetCV(n_jobs=-1),
                                     window_length=24))],
 'log_transformer': LogTransformer(),
 'deseasonalizer_daily': Deseasonalizer(sp=24),
 'forecast': DirectTabularRegressionForecaster(estimator=ElasticNetCV(n_jobs=-1),
                                   window_length=24),
 'log_transformer__offset': 0,
 'log_transformer__scale': 1,
 'deseasonalizer_daily__model': 'additive',
 'deseasonalizer_daily__sp': 24,
 'forecast__estimator': ElasticNetCV(n_jobs=-1),
 'forecast__pooling': 'local',
 'forecast__transformers': None,
 'forecast__window_length': 24,
 'forecast__windows_identical': True,
 'forecast__estimator__alphas': None,
 'forecast__estimator__copy_X': True,
 'forecast__estimator__cv': None,
 'forecast__estimator__eps': 0.001,
 'forecast__estimator__fit_intercept': True,
 'forecast__estimato

In [40]:
elasticnet_pipeline.forecaster_.forecaster_.get_fitted_params()


{'estimators': [ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1),
  ElasticNetCV(n_jobs=-1)],
 'transformers': None,
 'window_length': 24}

In [52]:
import pandas as pd


def create_lagged_columns(X, lag_range=24):
    lagged_names = []

    for col in X:
        for lag in range(1, lag_range + 1):
            lagged_names.append(f"{col}_lag{lag}")

    return lagged_names

l = create_lagged_columns(['price'] + X.columns.values.tolist(), lag_range=24)

In [54]:
coef_df = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[0].coef_, 'Label': l})

Unnamed: 0,Coefficient,Label
0,0.038054,price_lag1
1,0.026322,price_lag2
2,0.000529,price_lag3
3,-0.000000,price_lag4
4,-0.000000,price_lag5
...,...,...
1195,0.000000,renewable_energy_penetration_lag20
1196,0.000000,renewable_energy_penetration_lag21
1197,0.000000,renewable_energy_penetration_lag22
1198,0.000000,renewable_energy_penetration_lag23


In [59]:
# print top 10 features
coef_df.sort_values(by=['Coefficient'], ascending=False).head(10)

Unnamed: 0,Coefficient,Label
23,0.52352,price_lag24
815,0.075052,hydro_supply_mix_lag24
1079,0.061297,gas_cost_lag24
31,0.042804,ail_lag8
0,0.038054,price_lag1
778,0.037429,solar_supply_mix_lag11
14,0.026615,price_lag15
1,0.026322,price_lag2
573,0.024159,hydro_reserve_margin_lag22
998,0.023719,renewable_energy_ratio_lag15


In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[1].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[2].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[3].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[4].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[5].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[6].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[7].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[8].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[9].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[10].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)

In [None]:
coef_df_2 = pd.DataFrame({'Coefficient': elasticnet_pipeline.forecaster_.forecaster_.estimators_[11].coef_, 'Label': l})
coef_df_2.sort_values(by=['Coefficient'], ascending=False).head(10)