In [20]:
# pip install sktime

In [21]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from sktime.forecasting.model_selection import (
    ForecastingGridSearchCV,
    ExpandingWindowSplitter,
)
from sktime.forecasting.compose import MultiplexForecaster
from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.ets import AutoETS
from sktime.transformations.series.boxcox import LogTransformer


import warnings

In [22]:
from sklearn.preprocessing import StandardScaler
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.forecasting.arima import AutoARIMA


def initialize_arima_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "deseasonalizer_daily",
                            Deseasonalizer(sp=24, model="additive"),
                        ),
                        ("residual_forecaster", AutoARIMA(suppress_warnings=True)),
                    ]
                ),

    return pipe


def initialize_elasticnet_forecaster():
    deseasonalizer_weekly = Deseasonalizer(sp=24 * 7, model="additive")
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                ElasticNetCV(n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_rf_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                RandomForestRegressor(n_estimators=100, n_jobs=-1),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_gb_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                GradientBoostingRegressor(n_estimators=200),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
           
    return pipe


def initialize_hist_forecaster():
    deseasonalizer_daily = Deseasonalizer(sp=24, model="additive")
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                HistGradientBoostingRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )
            
    return pipe


from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor


def initialize_cat_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                CatBoostRegressor(verbose=0, n_estimators=100),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_lgbm_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                LGBMRegressor(),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe


def initialize_xgb_forecaster():
    pipe = TransformedTargetForecaster(
                    [
                        (
                            "forecast",
                            make_reduction(
                                XGBRegressor(objective="reg:squarederror"),
                                window_length=24,
                                strategy="direct",
                            ),
                        ),
                    ]
                )

    return pipe

In [23]:
arima_pipeline = initialize_arima_forecaster()
elasticnet_pipeline = initialize_elasticnet_forecaster()
rf_pipeline = initialize_rf_forecaster()
gb_pipeline = initialize_gb_forecaster()
hist_pipeline = initialize_hist_forecaster()
cat_pipeline = initialize_cat_forecaster()
lgbm_pipeline = initialize_lgbm_forecaster()
xgb_pipeline = initialize_xgb_forecaster()


forecasting_models = {
    "arima_pipeline": arima_pipeline,
    "elasticnet_pipeline": elasticnet_pipeline,
    "rf_pipeline": rf_pipeline,
    "gb_pipeline": gb_pipeline,
    "hist_pipeline": hist_pipeline,
    "arima_pipeline": arima_pipeline,
    "cat_pipeline": cat_pipeline,
    "lgbm_pipeline": lgbm_pipeline,
    "xgb_pipeline": xgb_pipeline,
}

  warn(


In [24]:
selected_features = [
    "ail",
    "gas_price",
    "gas_tng",
    "coal_tng",
    "wind_tng",
    "gas_avail",
    "wind_avail",
    "gas_reserve_margin",
    "coal_reserve_margin",
    "wind_reserve_margin",
    "other_reserve_margin",
    "gas_supply_mix",
    "coal_supply_mix",
    "wind_supply_mix",
    "other_supply_mix",
    "total_reserve_margin",
    "demand_supply_ratio",
    "fossil_fuel_ratio",
    "rolling_mean",
    "rolling_std",
    "rolling_min",
    "rolling_max",
    "rolling_median",
    "exp_moving_avg",
]

In [25]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

window = 24
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df.asfreq("H")

price_old_df['rolling_mean'] = price_old_df['price'].rolling(window).mean().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_std'] = price_old_df['price'].rolling(window).std().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_min'] = price_old_df['price'].rolling(window).min().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_max'] = price_old_df['price'].rolling(window).max().rolling(2).mean().shift(-window // 2)
price_old_df['rolling_median'] = price_old_df['price'].rolling(window).median().rolling(2).mean().shift(-window // 2)
price_old_df['exp_moving_avg'] = price_old_df['price'].ewm(span=24).mean()

In [26]:
price_old_df_filtered = price_old_df.loc["2023-01-01":"2023-01-31", selected_features + ["price"]]

y = price_old_df_filtered["price"]
X = price_old_df_filtered[selected_features]

# scale ratios to percentages
X["gas_supply_mix"] = X["gas_supply_mix"] * 100
X["coal_supply_mix"] = X["coal_supply_mix"] * 100
X["wind_supply_mix"] = X["wind_supply_mix"] * 100
X["other_supply_mix"] = X["other_supply_mix"] * 100
X['gas_reserve_margin'] = X['gas_reserve_margin'] * 100
X['coal_reserve_margin'] = X['coal_reserve_margin'] * 100
X['wind_reserve_margin'] = X['wind_reserve_margin'] * 100
X['other_reserve_margin'] = X['other_reserve_margin'] * 100
X['total_reserve_margin'] = X['total_reserve_margin'] * 100
X['demand_supply_ratio'] = X['demand_supply_ratio'] * 100
X['fossil_fuel_ratio'] = X['fossil_fuel_ratio'] * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["gas_supply_mix"] = X["gas_supply_mix"] * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["coal_supply_mix"] = X["coal_supply_mix"] * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X["wind_supply_mix"] = X["wind_supply_mix"] * 100
A value is trying to be set on a copy of a slice from a D

In [27]:
# standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# y_scaled = scaler.fit_transform(y.values.reshape(-1, 1))

In [28]:
cv = ExpandingWindowSplitter(
    initial_window=int(len(X) * 0.98), step_length=1, fh=np.arange(1, 13)
)

n_splits = cv.get_n_splits(y)
print(f"Number of Folds = {n_splits}")

Number of Folds = 4


In [29]:
# from sktime.utils.plotting import plot_windows, get_windows
# train_windows, test_windows = get_windows(y, cv)
# plot_windows(y, train_windows, test_windows)

In [30]:
# from sktime.forecasting.model_evaluation import evaluate
# from sktime.performance_metrics.forecasting import MeanSquaredScaledError, MeanSquaredError

# list_models = ["arima_pipeline"]

# rmse_cv_results = []
# rmse_cv_std = []
# for i in list_models:
#     print(i)
#     results = evaluate(
#         forecaster=forecasting_models[i],
#         y=y,
#         X=X,
#         cv=cv,
#         strategy="refit",
#         return_data=True,
#         scoring=MeanSquaredError(square_root=True),
#         backend="loky",
#         error_score='raise'
#     )
    
#     rmse = results["test_MeanSquaredError"].mean()
#     rmse_std = results["test_MeanSquaredError"].std()
#     rmse_cv_results.append(rmse)
#     rmse_cv_std.append(rmse_std)

In [31]:
# rmse_cv_results_df = pd.DataFrame(
#     {"Model": list_models, "RMSE_CV": rmse_cv_results, "RMSE_CV_STD": rmse_cv_std}
# ).sort_values(by=["RMSE_CV"])
# rmse_cv_results_df

In [37]:
X.head(1)

Unnamed: 0_level_0,ail,gas_price,gas_tng,coal_tng,wind_tng,gas_avail,wind_avail,gas_reserve_margin,coal_reserve_margin,wind_reserve_margin,...,other_supply_mix,total_reserve_margin,demand_supply_ratio,fossil_fuel_ratio,rolling_mean,rolling_std,rolling_min,rolling_max,rolling_median,exp_moving_avg
Date (MST),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01,9824,4.12,7618.023188,817.17632,780.206753,8253.0,2619.0,7.693891,0.344351,70.209746,...,2.434244,21.349706,70.236648,76.677939,113.835833,62.836213,64.86,285.83,80.735,111.61075


In [32]:
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredScaledError, MeanSquaredError

list_models = ["elasticnet_pipeline", "rf_pipeline", "gb_pipeline", "hist_pipeline", "cat_pipeline", "lgbm_pipeline", "xgb_pipeline"]

rmse_cv_results = []
rmse_cv_std = []
for i in list_models:
    print(i)
    results = evaluate(
        forecaster=forecasting_models[i],
        y=y,
        X=X,
        cv=cv,
        strategy="refit",
        return_data=True,
        scoring=MeanSquaredError(square_root=True),
        backend="loky",
        error_score='raise'
    )
    
    rmse = results["test_MeanSquaredError"].mean()
    rmse_std = results["test_MeanSquaredError"].std()
    rmse_cv_results.append(rmse)
    rmse_cv_std.append(rmse_std)

elasticnet_pipeline


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

rf_pipeline
gb_pipeline
hist_pipeline
cat_pipeline
lgbm_pipeline
xgb_pipeline


In [33]:
rmse_cv_results, rmse_cv_std

([29.458267479196778,
  60.129900451832796,
  52.70391035699318,
  60.031925563112935,
  59.55841435233603,
  60.778947733046735,
  50.56408694995894],
 [4.0051691355474945,
  1.1349828858055542,
  6.14955438043001,
  5.789058397986934,
  12.158510122597134,
  3.1130814605135075,
  3.8658570818877327])

In [34]:
rmse_cv_results_df = pd.DataFrame(
    {"Model": list_models, "RMSE_CV": rmse_cv_results, "RMSE_CV_STD": rmse_cv_std}
).sort_values(by=["RMSE_CV"])
rmse_cv_results_df

Unnamed: 0,Model,RMSE_CV,RMSE_CV_STD
0,elasticnet_pipeline,29.458267,4.005169
6,xgb_pipeline,50.564087,3.865857
2,gb_pipeline,52.70391,6.149554
4,cat_pipeline,59.558414,12.15851
3,hist_pipeline,60.031926,5.789058
1,rf_pipeline,60.1299,1.134983
5,lgbm_pipeline,60.778948,3.113081


In [35]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

In [36]:
train = price_old_df.loc["2023-01-01":"2023-01-31", selected_features+["price"]]
test = price_old_df.loc["2023-02-01":"2023-02-02", selected_features+["price"]]
train = train.sort_values(by="Date (MST)")
train = train.asfreq("H")
test = test.sort_values(by="Date (MST)")
test = test.asfreq("H")
X_train = train[selected_features]
y_train = train["price"]
X_test = test[selected_features]
y_test = test["price"]

KeyError: "['rolling_mean', 'rolling_std', 'rolling_min', 'rolling_max', 'rolling_median', 'exp_moving_avg'] not in index"

In [None]:
# fit and predict for all models
fh = np.arange(1, 13)
for i in list_models:
    print(i)
    forecasting_models[i].fit(y_train, X_train, fh=fh)
    y_pred = forecasting_models[i].predict(fh, X_train.tail(1))
    # get rmse between y_pred and y_test[:12]
    rmse = mean_squared_error(y_test[:12], y_pred, squared=False)
    print(rmse)