In [2]:
# pip install sktime

- Using the selected features from the EDA.
- Using tuned 'lgbm_pipeline'

In [3]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter,
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings
warnings.filterwarnings("ignore")

In [4]:
from sklearn.preprocessing import StandardScaler
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.forecasting.arima import AutoARIMA
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


def initialize_elasticnet_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                ElasticNetCV(n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_rf_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                RandomForestRegressor(n_estimators=100, n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_gb_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                GradientBoostingRegressor(n_estimators=200),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
           
    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                HistGradientBoostingRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
            
    return pipe

def initialize_cat_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                CatBoostRegressor(verbose=0, n_estimators=100),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe

def initialize_lgbm_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                LGBMRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_xgb_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                XGBRegressor(objective="reg:squarederror"),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe

In [5]:
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
cat_pipeline = initialize_cat_forecaster()
lgbm_pipeline = initialize_lgbm_forecaster()
xgb_pipeline = initialize_xgb_forecaster()


forecasting_models = {
    "elasticnet_pipeline": elasticnet_pipeline,
    "rf_pipeline": rf_pipeline,
    "gb_pipeline": gb_pipeline,
    "hist_pipeline": hist_pipeline,
    "cat_pipeline": cat_pipeline,
    "lgbm_pipeline": lgbm_pipeline,
    "xgb_pipeline": xgb_pipeline,
}

In [6]:
selected_features = [
    "other_tng",
    "gas_tng_ratio",
    "renewable_energy_ratio",
    "other_avail",
    "other_reserve_margin",
    "gas_reserve_margin",
    "storage_avail",
    "gas_tng",
    "hydro_avail",
    "wind_avail",
    "other_supply_mix",
    "renewable_energy_penetration",
    "gas_price",
    "gas_supply_mix",
    "relative_gas_reserve",
    "load_on_gas_reserve",
    "gas_cost",
    "rolling_mean",
    "rolling_std",
    "rolling_min",
    "rolling_max",
    "rolling_median",
    "exp_moving_avg",
]

In [7]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

window = 24
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df.asfreq("H")

price_old_df['rolling_mean'] = price_old_df['price'].rolling(window).mean().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_std'] = price_old_df['price'].rolling(window).std().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_min'] = price_old_df['price'].rolling(window).min().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_max'] = price_old_df['price'].rolling(window).max().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_median'] = price_old_df['price'].rolling(window).median().rolling(2).mean().shift(-window // 2)
price_old_df['exp_moving_avg'] = price_old_df['price'].ewm(span=24).mean()

In [8]:
price_old_df_filtered = price_old_df.loc["2022-12-01":"2023-03-28", selected_features + ["price"]]

y = price_old_df_filtered["price"]
X = price_old_df_filtered[selected_features]

# # scale ratios to percentages
X["gas_supply_mix"] = X["gas_supply_mix"] * 100
X["other_supply_mix"] = X["other_supply_mix"] * 100
X['gas_reserve_margin'] = X['gas_reserve_margin'] * 100
X['other_reserve_margin'] = X['other_reserve_margin'] * 100

In [9]:
cv = ExpandingWindowSplitter(
    initial_window=int(len(X) * 0.9), step_length=12, fh=np.arange(1, 13)
)

n_splits = cv.get_n_splits(y)
print(f"Number of Folds = {n_splits}")

Number of Folds = 23


In [12]:
lgbm_pipeline.get_params()

{'steps': [('forecast',
   DirectTabularRegressionForecaster(estimator=LGBMRegressor(), window_length=24))],
 'forecast': DirectTabularRegressionForecaster(estimator=LGBMRegressor(), window_length=24),
 'forecast__estimator': LGBMRegressor(),
 'forecast__pooling': 'local',
 'forecast__transformers': None,
 'forecast__window_length': 24,
 'forecast__windows_identical': True,
 'forecast__estimator__boosting_type': 'gbdt',
 'forecast__estimator__class_weight': None,
 'forecast__estimator__colsample_bytree': 1.0,
 'forecast__estimator__importance_type': 'split',
 'forecast__estimator__learning_rate': 0.1,
 'forecast__estimator__max_depth': -1,
 'forecast__estimator__min_child_samples': 20,
 'forecast__estimator__min_child_weight': 0.001,
 'forecast__estimator__min_split_gain': 0.0,
 'forecast__estimator__n_estimators': 100,
 'forecast__estimator__n_jobs': -1,
 'forecast__estimator__num_leaves': 31,
 'forecast__estimator__objective': None,
 'forecast__estimator__random_state': None,
 'forec

In [13]:
from sklearn.datasets import make_regression
from scipy.stats import uniform, randint
from sktime.forecasting.model_selection import ForecastingRandomizedSearchCV
from sktime.performance_metrics.forecasting import MeanSquaredError

# Define the parameter distributions for randomized search
param_dist = {
    'forecast__estimator__reg_alpha': uniform(0, 1),
    'forecast__estimator__reg_lambda': uniform(0, 1),
    'forecast__estimator__max_depth': range(5, 30),
    'forecast__estimator__learning_rate': uniform(loc=0.01, scale=0.2 - 0.01),
}

random_search = ForecastingRandomizedSearchCV(lgbm_pipeline, cv=cv, param_distributions=param_dist, n_iter=1, scoring=MeanSquaredError(square_root=True), n_jobs=-1, verbose=1, random_state=1, error_score="raise")

In [14]:
random_search.fit(y, X, fh=np.arange(1, 13))

Fitting 23 folds for each of 1 candidates, totalling 23 fits


In [15]:
random_search.best_score_

123.62147381296215

In [16]:
random_search.best_params_

{'forecast__estimator__learning_rate': 0.08923418089348906,
 'forecast__estimator__max_depth': 17,
 'forecast__estimator__reg_alpha': 0.9325573593386588,
 'forecast__estimator__reg_lambda': 0.12812444792935673}

In [17]:
random_search.cv_results_

Unnamed: 0,mean_test_MeanSquaredError,mean_fit_time,mean_pred_time,params,rank_test_MeanSquaredError
0,123.621474,24.164921,0.005962,{'forecast__estimator__learning_rate': 0.08923...,1.0
