In [20]:
# pip install sktime

- Using the selected features from the EDA.
- Using tuned 'lgbm_pipeline'

In [21]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter,
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings
warnings.filterwarnings("ignore")

In [22]:
from sklearn.preprocessing import StandardScaler
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.forecasting.arima import AutoARIMA
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


def initialize_elasticnet_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    ElasticNetCV(n_jobs=-1),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


def initialize_rf_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    RandomForestRegressor(n_estimators=100, n_jobs=-1),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


def initialize_gb_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    GradientBoostingRegressor(n_estimators=200),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    HistGradientBoostingRegressor(),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


def initialize_cat_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    CatBoostRegressor(verbose=0, n_estimators=100),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


string_dict = "{'colsample_bytree': 0.7281869913357373, 'learning_rate': 0.08299509553463873, 'max_depth': 20, 'min_child_weight': 0.19733949139352877, 'n_estimators': 3, 'num_leaves': 16, 'reg_alpha': 0.8332138510570211, 'reg_lambda': 0.1975545061747418, 'subsample': 0.8293201208815428}"

lgbm_dictionary = eval(string_dict)


def initialize_lgbm_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    LGBMRegressor(
                        max_depth=lgbm_dictionary["max_depth"],
                        n_estimators=200,
                        reg_alpha=lgbm_dictionary["reg_alpha"],
                        reg_lambda=lgbm_dictionary["reg_lambda"],
                    ),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe


def initialize_xgb_forecaster():
    pipe = TransformedTargetForecaster(
        [
            (
                "forecast",
                make_reduction(
                    XGBRegressor(objective="reg:squarederror"),
                    window_length=24,
                    strategy="direct",
                ),
            ),
        ]
    )

    return pipe

In [23]:
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
cat_pipeline = initialize_cat_forecaster()
lgbm_pipeline = initialize_lgbm_forecaster()
xgb_pipeline = initialize_xgb_forecaster()


forecasting_models = {
    "elasticnet_pipeline": elasticnet_pipeline,
    "rf_pipeline": rf_pipeline,
    "gb_pipeline": gb_pipeline,
    "hist_pipeline": hist_pipeline,
    "cat_pipeline": cat_pipeline,
    "lgbm_pipeline": lgbm_pipeline,
    "xgb_pipeline": xgb_pipeline,
}

In [24]:
selected_features = [
    "other_tng",
    "gas_tng_ratio",
    "renewable_energy_ratio",
    "other_avail",
    "other_reserve_margin",
    "gas_reserve_margin",
    "storage_avail",
    "gas_tng",
    "hydro_avail",
    "wind_avail",
    "other_supply_mix",
    "renewable_energy_penetration",
    "gas_price",
    "gas_supply_mix",
    "relative_gas_reserve",
    "load_on_gas_reserve",
    "gas_cost",
    "rolling_mean",
    "rolling_std",
    "rolling_min",
    "rolling_max",
    "rolling_median",
    "exp_moving_avg",
]

In [25]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

window = 24
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df.asfreq("H")

price_old_df['rolling_mean'] = price_old_df['price'].rolling(window).mean().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_std'] = price_old_df['price'].rolling(window).std().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_min'] = price_old_df['price'].rolling(window).min().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_max'] = price_old_df['price'].rolling(window).max().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_median'] = price_old_df['price'].rolling(window).median().rolling(2).mean().shift(-window // 2)
price_old_df['exp_moving_avg'] = price_old_df['price'].ewm(span=24).mean()

In [26]:
price_old_df_filtered = price_old_df.loc["2022-12-01":"2023-03-28", selected_features + ["price"]]

y = price_old_df_filtered["price"]
X = price_old_df_filtered[selected_features]

# # scale ratios to percentages
X["gas_supply_mix"] = X["gas_supply_mix"] * 100
X["other_supply_mix"] = X["other_supply_mix"] * 100
X['gas_reserve_margin'] = X['gas_reserve_margin'] * 100
X['other_reserve_margin'] = X['other_reserve_margin'] * 100

In [27]:
cv = ExpandingWindowSplitter(
    initial_window=int(len(X) * 0.9), step_length=12, fh=np.arange(1, 13)
)

n_splits = cv.get_n_splits(y)
print(f"Number of Folds = {n_splits}")

Number of Folds = 23


In [28]:
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredScaledError, MeanSquaredError


list_models = ['lgbm_pipeline']

rmse_cv_results = []
rmse_cv_std = []
for i in list_models:
    print(i)
    results = evaluate(
        forecaster=forecasting_models[i],
        y=y,
        X=X,
        cv=cv,
        strategy="refit",
        return_data=True,
        scoring=MeanSquaredError(square_root=True),
        backend="loky",
        error_score='raise'
    )
    
    rmse = results["test_MeanSquaredError"].mean()
    rmse_std = results["test_MeanSquaredError"].std()
    rmse_cv_results.append(rmse)
    rmse_cv_std.append(rmse_std)

lgbm_pipeline


In [29]:
rmse_cv_results_df = pd.DataFrame(
    {"Model": list_models, "RMSE_CV": rmse_cv_results, "RMSE_CV_STD": rmse_cv_std}
).sort_values(by=["RMSE_CV"])
rmse_cv_results_df

Unnamed: 0,Model,RMSE_CV,RMSE_CV_STD
0,lgbm_pipeline,120.594785,66.50479
