In [2]:
# pip install sktime

- Using the selected features from the EDA.
- Using 'lgbm_pipeline', and 'elasticnet_pipeline'
- lgbm scores improved. elasticnet scores worsened.

In [3]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter,
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings
warnings.filterwarnings("ignore")

In [4]:
from sklearn.preprocessing import StandardScaler
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.forecasting.arima import AutoARIMA
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


def initialize_elasticnet_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                ElasticNetCV(n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_rf_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                RandomForestRegressor(n_estimators=100, n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_gb_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                GradientBoostingRegressor(n_estimators=200),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
           
    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                HistGradientBoostingRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
            
    return pipe

def initialize_cat_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                CatBoostRegressor(verbose=0, n_estimators=100),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_lgbm_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                LGBMRegressor(
                        learning_rate=best['learning_rate'],
                        max_depth=best['max_depth'],
                        n_estimators=best['n_estimators'],
                        num_leaves=best['num_leaves'],
                        min_child_weight=best['min_child_weight'],
                        colsample_bytree=best['colsample_bytree'],
                        subsample=best['subsample'],
                        reg_alpha=best['reg_alpha'],
                        reg_lambda=best['reg_lambda'],
                    )
                                
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_xgb_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                XGBRegressor(objective="reg:squarederror"),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe

In [5]:
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
cat_pipeline = initialize_cat_forecaster()
lgbm_pipeline = initialize_lgbm_forecaster()
xgb_pipeline = initialize_xgb_forecaster()


forecasting_models = {
    "elasticnet_pipeline": elasticnet_pipeline,
    "rf_pipeline": rf_pipeline,
    "gb_pipeline": gb_pipeline,
    "hist_pipeline": hist_pipeline,
    "cat_pipeline": cat_pipeline,
    "lgbm_pipeline": lgbm_pipeline,
    "xgb_pipeline": xgb_pipeline,
}

In [6]:
selected_features = [
    "other_tng",
    "gas_tng_ratio",
    "renewable_energy_ratio",
    "other_avail",
    "other_reserve_margin",
    "gas_reserve_margin",
    "storage_avail",
    "gas_tng",
    "hydro_avail",
    "wind_avail",
    "other_supply_mix",
    "renewable_energy_penetration",
    "gas_price",
    "gas_supply_mix",
    "relative_gas_reserve",
    "load_on_gas_reserve",
    "gas_cost",
    "rolling_mean",
    "rolling_std",
    "rolling_min",
    "rolling_max",
    "rolling_median",
    "exp_moving_avg",
]

In [7]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

window = 24
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df.asfreq("H")

price_old_df['rolling_mean'] = price_old_df['price'].rolling(window).mean().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_std'] = price_old_df['price'].rolling(window).std().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_min'] = price_old_df['price'].rolling(window).min().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_max'] = price_old_df['price'].rolling(window).max().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_median'] = price_old_df['price'].rolling(window).median().rolling(2).mean().shift(-window // 2)
price_old_df['exp_moving_avg'] = price_old_df['price'].ewm(span=24).mean()

In [8]:
price_old_df_filtered = price_old_df.loc["2022-12-01":"2023-03-28", selected_features + ["price"]]

y = price_old_df_filtered["price"]
X = price_old_df_filtered[selected_features]

# # scale ratios to percentages
X["gas_supply_mix"] = X["gas_supply_mix"] * 100
X["other_supply_mix"] = X["other_supply_mix"] * 100
X['gas_reserve_margin'] = X['gas_reserve_margin'] * 100
X['other_reserve_margin'] = X['other_reserve_margin'] * 100

In [9]:
cv = ExpandingWindowSplitter(
    initial_window=int(len(X) * 0.9), step_length=12, fh=np.arange(1, 13)
)

n_splits = cv.get_n_splits(y)
print(f"Number of Folds = {n_splits}")

Number of Folds = 23


In [10]:
forecasting_models['lgbm_pipeline'].fit(y, X, fh=np.arange(1, 13))

In [11]:
forecasting_models['lgbm_pipeline'].get_params()

{'steps': [('forecast',
   DirectTabularRegressionForecaster(estimator=LGBMRegressor(), window_length=24))],
 'forecast': DirectTabularRegressionForecaster(estimator=LGBMRegressor(), window_length=24),
 'forecast__estimator': LGBMRegressor(),
 'forecast__pooling': 'local',
 'forecast__transformers': None,
 'forecast__window_length': 24,
 'forecast__windows_identical': True,
 'forecast__estimator__boosting_type': 'gbdt',
 'forecast__estimator__class_weight': None,
 'forecast__estimator__colsample_bytree': 1.0,
 'forecast__estimator__importance_type': 'split',
 'forecast__estimator__learning_rate': 0.1,
 'forecast__estimator__max_depth': -1,
 'forecast__estimator__min_child_samples': 20,
 'forecast__estimator__min_child_weight': 0.001,
 'forecast__estimator__min_split_gain': 0.0,
 'forecast__estimator__n_estimators': 100,
 'forecast__estimator__n_jobs': -1,
 'forecast__estimator__num_leaves': 31,
 'forecast__estimator__objective': None,
 'forecast__estimator__random_state': None,
 'forec

In [12]:
import pandas as pd
def create_lagged_columns(X, lag_range=24):
    lagged_names = []
    for col in X:
        for lag in range(lag_range, 0, -1):
            lagged_names.append(f"{col}_lag{lag}")
    return lagged_names
labels = create_lagged_columns(['price'] + X.columns.values.tolist(), lag_range=24)
len(labels)

576

In [19]:
y_transformed = forecasting_models['lgbm_pipeline'].transform(y)  # does all the trasnformations on y
y_enc, X_enc = forecasting_models['lgbm_pipeline'].forecaster_._transform(y_transformed, X)  # TransfomedTargetForecaster does not transform X, only y. 

In [20]:
y_enc = pd.DataFrame(y_enc)
y_enc.columns = ['price1', 'price2', 'price3', 'price4','price5', 'price6','price7', 'price8','price9', 'price10', 'price11', 'price12']

In [21]:
X_enc = pd.DataFrame(X_enc, columns=labels)

In [33]:
import numpy as np
import pandas as pd
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
from hyperopt import hp, tpe, Trials, fmin

# Assuming you have a dataframe 'df' with columns 'price', 'demand', 'supply'

# Split data into X and y
X = X_enc
y = y_enc['price1']

# Define objective function for Hyperopt optimization
def objective(params, n_folds=5):
    global X
    global y
    
    model = forecasting_models['lgbm_pipeline'].forecaster_.estimators_[0]
    
    tscv = TimeSeriesSplit(n_splits=n_folds)
    scores = []
    
    for train_index, test_index in tscv.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=False)
        y_pred = model.predict(X_test)
        scores.append(mean_squared_error(y_test, y_pred))

    return np.mean(scores)

# Define search space for hyperparameters
space = {
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'max_depth': hp.choice('max_depth', range(5, 30)),
    'n_estimators': hp.choice('n_estimators', range(20, 205)),
    'num_leaves': hp.choice('num_leaves', range(20, 100)),
    'min_child_weight': hp.loguniform('min_child_weight', np.log(0.001), np.log(10)),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0),
    'subsample': hp.uniform('subsample', 0.7, 1.0),
    'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0)
}

# Run optimization
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=200,
            trials=trials)

print("Best: ", best)

100%|██████████| 200/200 [18:55<00:00,  5.68s/trial, best loss: 11631.817632872873]
Best:  {'colsample_bytree': 0.7281869913357373, 'learning_rate': 0.08299509553463873, 'max_depth': 20, 'min_child_weight': 0.19733949139352877, 'n_estimators': 3, 'num_leaves': 16, 'reg_alpha': 0.8332138510570211, 'reg_lambda': 0.1975545061747418, 'subsample': 0.8293201208815428}
