In [5]:
# Data processing
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import plotly.express as px
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
%matplotlib inline

# Modelling and Forecasting
# ==============================================================================
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

from joblib import dump, load

In [6]:
#Reading in data
# ==============================================================================
building1=pd.read_csv('Building1.csv')

In [7]:
#Converting to Datetime objects
# ==============================================================================
building1['datetime']=pd.to_datetime(building1['datetime'], format='%d/%m/%Y %H:%M')
building1= building1.set_index('datetime')
building1= building1.asfreq('H')


In [8]:
building1

Unnamed: 0_level_0,X,Unnamed..0,series_name,Year,Month,Day_of_Month,Week,Day,Hour,series_value,NumericMonth
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-01-10 10:00:00,37223.0,37223.0,Building1,2019.0,Jan,10.0,2.0,4.0,10.0,15.533333,1.0
2019-01-10 11:00:00,37224.0,37224.0,Building1,2019.0,Jan,10.0,2.0,4.0,11.0,20.500000,1.0
2019-01-10 12:00:00,37225.0,37225.0,Building1,2019.0,Jan,10.0,2.0,4.0,12.0,20.800000,1.0
2019-01-10 13:00:00,37226.0,37226.0,Building1,2019.0,Jan,10.0,2.0,4.0,13.0,20.225000,1.0
2019-01-10 14:00:00,37227.0,37227.0,Building1,2019.0,Jan,10.0,2.0,4.0,14.0,22.750000,1.0
...,...,...,...,...,...,...,...,...,...,...,...
2020-10-01 05:00:00,52313.0,52313.0,Building1,2020.0,Oct,1.0,40.0,4.0,5.0,10.725000,10.0
2020-10-01 06:00:00,52314.0,52314.0,Building1,2020.0,Oct,1.0,40.0,4.0,6.0,10.500000,10.0
2020-10-01 07:00:00,52315.0,52315.0,Building1,2020.0,Oct,1.0,40.0,4.0,7.0,11.125000,10.0
2020-10-01 08:00:00,52316.0,52316.0,Building1,2020.0,Oct,1.0,40.0,4.0,8.0,11.600000,10.0


In [None]:
#Further Split Train-Val for Buildings
# ==============================================================================
end_train='2018-12-31 23:00'
start_val='2019-01-01 00:00'
end_val='2019-12-31 23:00'
start_test='2020-01-01 00:00'


building0train=building0.loc[:end_train, :]
building0val=building0.loc[start_val:end_val, :]
building0test=building0.loc[start_test:, :]


In [None]:
# Plot time series
# ==============================================================================
fig, ax = plt.subplots(figsize=(11, 4))
building0train['series_value'].plot(ax=ax, label='train')
building0val['series_value'].plot(ax=ax, label='validation')
building0test['series_value'].plot(ax=ax, label='test')
ax.set_title('Building 0 Series Value')
ax.legend();

In [9]:
# Create forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(random_state=123),
                lags = 24
                )

forecaster

ForecasterAutoreg 
Regressor: LGBMRegressor(random_state=123) 
Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24] 
Window size: 24 
Included exogenous: False 
Type of exogenous variable: None 
Exogenous variables names: None 
Training range: None 
Training index type: None 
Training index frequency: None 
Regressor parameters: {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': -1, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': 123, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0} 
Creation date: 2022-09-20 00:46:46 
Last fit date: None 
Skforecast version: 0.4.3 

In [None]:
# Grid search of hyperparameters and lags
# ==============================================================================
# Regressor hyperparameters

param_grid = {
    'n_estimators': [100, 500],
    'max_depth': [3, 5, 10],
    'learning_rate': [0.01, 0.1]
    }
lags_grid = [24, 48, 72, [1, 2, 3, 23, 24, 25, 71, 72, 73]]

results_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = building1.loc[:end_val, 'series_value'],
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(building1train)),
                        lags_grid          = lags_grid,
                        param_grid         = param_grid,
                        steps              = 36,
                        )

In [None]:
# Backtesting
# ==============================================================================
metric, predictions = backtesting_forecaster(
    forecaster = forecaster,
    y          = building1['series_value'],
    initial_train_size = len(building1.loc[:end_val]),
    fixed_train_size   = False,
    steps      = 36,
    refit      = False,
    metric     = 'mean_squared_error',
    verbose    = False
    )

print(f"Backtest error: {metric}")