In [28]:
import pandas as pd
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import numpy as np
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.compose import (
    TransformedTargetForecaster,
    DirectTimeSeriesRegressionForecaster,
    DirectTabularRegressionForecaster,
)
from sktime.transformations.series.detrend import Deseasonalizer
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Detrender
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sklearn.metrics import mean_squared_error
from sktime.forecasting.compose import make_reduction
from sklearn.exceptions import ConvergenceWarning
import requests
from sklearn.ensemble import (
    HistGradientBoostingRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import ElasticNetCV
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
from sktime.forecasting.model_selection import SlidingWindowSplitter
from sktime.forecasting.compose import YfromX
from sktime.transformations.series.boxcox import LogTransformer
from sktime.transformations.compose import ColumnwiseTransformer
from sktime.forecasting.compose import ForecastingPipeline

import warnings

warnings.filterwarnings("ignore")

class TimeSeriesForecasting:
    def __init__(
        self,
        len_models=12,
        num_lags=24,
    ):
        self.len_models = len_models
        self.num_lags = num_lags
        self.models = {}
        self.X_sub_train = {}
        self.forecast_len = 1
        self.fh = ForecastingHorizon(np.arange(1, 2))
        self.next_y_window = None

    def create_sliding_window_data_refined(self, ts, window_length):
        splitter = SlidingWindowSplitter(
            fh=[1], window_length=window_length, step_length=1
        )
        # Split the series using the splitter
        split_series = list(splitter.split_series(ts))
        # Create an empty DataFrame to store the data
        data = pd.DataFrame()
        # Iterate over the split series and extract the features and target
        for i, (train, test) in enumerate(split_series):
            # Extract features and target from the split series
            features = ts[train.index]
            target = ts[test.index]
            split_data = {"date": pd.to_datetime(target.index[0].to_pydatetime())}

            for i, j in zip(range(window_length, 0, -1), range(0, window_length)):
                split_data[f"lag_{i}"] = features[j]

            split_data[f"target"] = target[0]

            split_df = pd.DataFrame(split_data, index=["date"])

            data = pd.concat([data, split_df])
        data.set_index("date", inplace=True)
        return data

    def fit(self, y, X):
        X_transformed, y_transformed = self.transform(y, X)

        for j in range(1, self.len_models + 1):
            model_name = f"model_{j}"
            X_train_name = f"X_train_{j}"

            lag_cols = [f"lag_{i}" for i in range(j, j + self.num_lags)]

            self.X_sub_train[X_train_name] = X_transformed[
                lag_cols + self.selected_cols
            ].copy()

            self.X_sub_train[X_train_name] = self.X_sub_train[X_train_name].asfreq("H")

            self.models[model_name] = ForecastingPipeline(
                steps=[
                    (
                        "forecaster",
                        TransformedTargetForecaster(
                            steps=[
                                (
                                    "targetforecaster",
                                    YfromX(
                                        RandomForestRegressor(
                                            n_estimators=200, n_jobs=-1
                                        )
                                    ),
                                ),
                            ]
                        ),
                    ),
                ]
            )

            self.models[model_name].fit(
                y=y_transformed, X=self.X_sub_train[X_train_name], fh=self.fh
            )

    def transform(self, y, X):
        len_models = self.len_models
        num_lags = self.num_lags

        window_length = num_lags + len_models - 1

        data_df = TimeSeriesForecasting().create_sliding_window_data_refined(
            y, window_length
        )

        data_df = pd.merge(data_df, X, left_index=True, right_index=True)

        x_feature_names = [f"lag_{i}" for i in range(1, window_length + 1)]

        last_row = data_df[x_feature_names].iloc[[-1]]
        last_row = last_row.shift(1, axis=1)
        last_row.iloc[:, 0] = data_df['target'].iloc[-1]

        last_row.index = last_row.index + pd.Timedelta(hours=1)

        self.next_y_window = last_row

        self.selected_cols = X.columns.tolist()
        x_feature_names += self.selected_cols

        X_transformed = data_df[x_feature_names]
        y_transformed = data_df["target"]
        return X_transformed,y_transformed

    def predict(self, X):
        # X passed here is features like ail alone
        # Combine the last y window with the features

        X = pd.concat([self.next_y_window, X], axis=1)
        
        predictions_df = pd.DataFrame()
        
        X_sub_test = {}
        for j in range(1, self.len_models + 1):
            model_name = f"model_{j}"
            lag_cols = [f"lag_{i}" for i in range(j, j + self.num_lags)]
            X_test_name = f"X_test_{j}"

            X_sub_test[X_test_name] = X[lag_cols + self.selected_cols].copy()
            X_sub_test[X_test_name] = X_sub_test[X_test_name].asfreq("H")

            cutoff_time = self.models[model_name].cutoff
            prediction_for = cutoff_time + pd.DateOffset(hours=j)

            y_pred = self.models[model_name].predict(
                fh=1, X=X_sub_test[X_test_name]
            )

            row = pd.DataFrame(
                {f"cutoff_hour_{self.models['model_1'].cutoff.hour[0]}": y_pred[0]},
                index=pd.Index(prediction_for),
            )

            predictions_df = predictions_df.append(row)
            
        predictions_df.index.name = "date"
        return predictions_df

    def update(self, new_observation_y, new_observation_X):

        X = pd.concat([self.next_y_window, new_observation_X], axis=1)
        
        X_sub_test = {}
        
        for j in range(1, self.len_models + 1):
            model_name = f"model_{j}"
            X_test_name = f"X_test_{j}"
            lag_cols = [f"lag_{i}" for i in range(j, j + self.num_lags)]

            X_sub_test[X_test_name] = X[lag_cols + self.selected_cols].copy()
            X_sub_test[X_test_name] = X_sub_test[X_test_name].asfreq("H")

            new_observation_y = new_observation_y.asfreq("H")

            self.models[model_name].update(
                y=new_observation_y, X=X_sub_test[X_test_name], update_params=False
            )
        
        # Update the next y window
        self.next_y_window = self.next_y_window.shift(1, axis=1)
        self.next_y_window.iloc[:, 0] = new_observation_y.iloc[0]
        self.next_y_window.index = self.next_y_window.index + pd.Timedelta(hours=1)
        

In [29]:
price_old_df = pd.read_csv(
    "https://raw.githubusercontent.com/slalom-ubc-mds/Power-Price-Prediction/main/data/processed/supply_load_price.csv",
    parse_dates=["Date (MST)"],
    index_col="Date (MST)",
)

price_old_df = price_old_df.asfreq("H")
price_old_df = price_old_df.sort_values(by="Date (MST)")
price_old_df = price_old_df["2022-12":]
y = price_old_df["price"]
selected_cols = ["ail"]
X = price_old_df[selected_cols]

X = X.asfreq("H")
y = y.asfreq("H")

# split y and X into train and test
y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, test_size=24)

forecasting = TimeSeriesForecasting(
    len_models=12,
    num_lags=24,
)

In [30]:
forecasting.fit(y=y_train, X=X_train)

In [31]:
forecasting.predict(X_test.iloc[[0]])

Unnamed: 0_level_0,cutoff_hour_22
date,Unnamed: 1_level_1
2023-03-30 23:00:00,50.773
2023-03-31 00:00:00,52.0676
2023-03-31 01:00:00,59.5881
2023-03-31 02:00:00,53.44165
2023-03-31 03:00:00,55.41535
2023-03-31 04:00:00,55.44245
2023-03-31 05:00:00,58.65515
2023-03-31 06:00:00,63.85875
2023-03-31 07:00:00,67.91025
2023-03-31 08:00:00,63.9912


In [32]:
# Update the model with new observation X_test.iloc[[0]] and y_test.iloc[[0]]
forecasting.update(y_test.iloc[[0]], X_test.iloc[[0]])

In [33]:
forecasting.next_y_window

Unnamed: 0,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,...,lag_26,lag_27,lag_28,lag_29,lag_30,lag_31,lag_32,lag_33,lag_34,lag_35
2023-03-31,50.9,50.0,48.55,49.03,49.87,54.42,59.16,55.54,56.21,57.21,...,62.73,60.97,59.74,75.17,95.0,64.48,52.41,49.09,57.46,57.82
