In [3]:
# Data processing
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import plotly.express as px
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
%matplotlib inline

# Modelling and Forecasting
# ==============================================================================
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

from joblib import dump, load

In [6]:
#Reading in data
# ==============================================================================
building4=pd.read_csv('Building4.csv')

In [7]:
#Converting to Datetime objects
# ==============================================================================
building4['datetime']=pd.to_datetime(building4['datetime'], format='%d/%m/%Y %H:%M')
building4= building4.set_index('datetime')
building4= building4.asfreq('H')

In [8]:
building4

Unnamed: 0_level_0,X,Unnamed..0,series_name,Year,Month,Day_of_Month,Week,Day,Hour,series_value,NumericMonth
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-07-03 14:00:00,92533.0,92533.0,Building4,2019.0,Jul,3.0,27.0,3.0,14.0,2.000000,7.0
2019-07-03 15:00:00,92534.0,92534.0,Building4,2019.0,Jul,3.0,27.0,3.0,15.0,1.393068,7.0
2019-07-03 16:00:00,92535.0,92535.0,Building4,2019.0,Jul,3.0,27.0,3.0,16.0,1.650786,7.0
2019-07-03 17:00:00,92536.0,92536.0,Building4,2019.0,Jul,3.0,27.0,3.0,17.0,1.000000,7.0
2019-07-03 18:00:00,92537.0,92537.0,Building4,2019.0,Jul,3.0,27.0,3.0,18.0,1.000000,7.0
...,...,...,...,...,...,...,...,...,...,...,...
2020-10-01 05:00:00,103443.0,103443.0,Building4,2020.0,Oct,1.0,40.0,4.0,5.0,1.686056,10.0
2020-10-01 06:00:00,103444.0,103444.0,Building4,2020.0,Oct,1.0,40.0,4.0,6.0,1.000000,10.0
2020-10-01 07:00:00,103445.0,103445.0,Building4,2020.0,Oct,1.0,40.0,4.0,7.0,1.681818,10.0
2020-10-01 08:00:00,103446.0,103446.0,Building4,2020.0,Oct,1.0,40.0,4.0,8.0,1.275536,10.0


In [None]:
#Further Split Train-Val for Buildings
# ==============================================================================
end_train='2018-12-31 23:00'
start_val='2019-01-01 00:00'
end_val='2019-12-31 23:00'
start_test='2020-01-01 00:00'


building0train=building0.loc[:end_train, :]
building0val=building0.loc[start_val:end_val, :]
building0test=building0.loc[start_test:, :]

In [None]:
# Plot time series
# ==============================================================================
fig, ax = plt.subplots(figsize=(11, 4))
building3train['series_value'].plot(ax=ax, label='train')
building3val['series_value'].plot(ax=ax, label='validation')
building3test['series_value'].plot(ax=ax, label='test')
ax.set_title('Building 3 Series Value')
ax.legend();

In [None]:
# Create forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(random_state=123),
                lags = 24
                )

forecaster

In [None]:
# Grid search of hyperparameters and lags
# ==============================================================================
# Regressor hyperparameters

param_grid = {
    'n_estimators': [100, 500],
    'max_depth': [3, 5, 10],
    'learning_rate': [0.01, 0.1]
    }
lags_grid = [24, 48, 72, [1, 2, 3, 23, 24, 25, 71, 72, 73]]

results_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = building3.loc[:end_val, 'series_value'],
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(building3train)),
                        lags_grid          = lags_grid,
                        param_grid         = param_grid,
                        steps              = 36,
                        )

In [None]:
# Backtesting
# ==============================================================================
metric, predictions = backtesting_forecaster(
    forecaster = forecaster,
    y          = building1['series_value'],
    initial_train_size = len(building1.loc[:end_val]),
    fixed_train_size   = False,
    steps      = 36,
    refit      = False,
    metric     = 'mean_squared_error',
    verbose    = False
    )

print(f"Backtest error: {metric}")