In [1]:
# Data processing
# ==============================================================================
import numpy as np
import pandas as pd

# Plots
# ==============================================================================
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import plotly.express as px
plt.style.use('fivethirtyeight')
plt.rcParams['lines.linewidth'] = 1.5
%matplotlib inline

# Modelling and Forecasting
# ==============================================================================
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiOutput import ForecasterAutoregMultiOutput
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection import backtesting_forecaster

from joblib import dump, load

  import pandas.util.testing as tm


In [2]:
#Reading in data
# ==============================================================================
building6=pd.read_csv('Building6.csv')

In [3]:
#Converting to Datetime objects
# ==============================================================================
building6['datetime']=pd.to_datetime(building6['datetime'], format='%d/%m/%Y %H:%M')
building6= building6.set_index('datetime')
building6= building6.asfreq('H')


In [4]:
building6

Unnamed: 0_level_0,X,Unnamed..0,series_name,Year,Month,Day_of_Month,Week,Day,Hour,series_value,NumericMonth
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-07-25 11:00:00,113864.0,113864.0,Building6,2019.0,Jul,25.0,30.0,4.0,11.0,36.80,7.0
2019-07-25 12:00:00,113865.0,113865.0,Building6,2019.0,Jul,25.0,30.0,4.0,12.0,35.40,7.0
2019-07-25 13:00:00,113866.0,113866.0,Building6,2019.0,Jul,25.0,30.0,4.0,13.0,35.50,7.0
2019-07-25 14:00:00,113867.0,113867.0,Building6,2019.0,Jul,25.0,30.0,4.0,14.0,36.00,7.0
2019-07-25 15:00:00,113868.0,113868.0,Building6,2019.0,Jul,25.0,30.0,4.0,15.0,35.20,7.0
...,...,...,...,...,...,...,...,...,...,...,...
2020-10-01 05:00:00,124249.0,124249.0,Building6,2020.0,Oct,1.0,40.0,4.0,5.0,29.95,10.0
2020-10-01 06:00:00,124250.0,124250.0,Building6,2020.0,Oct,1.0,40.0,4.0,6.0,28.60,10.0
2020-10-01 07:00:00,124251.0,124251.0,Building6,2020.0,Oct,1.0,40.0,4.0,7.0,33.10,10.0
2020-10-01 08:00:00,124252.0,124252.0,Building6,2020.0,Oct,1.0,40.0,4.0,8.0,35.60,10.0


In [None]:
#Further Split Train-Val for Buildings
# ==============================================================================
end_train='2018-12-31 23:00'
start_val='2019-01-01 00:00'
end_val='2019-12-31 23:00'
start_test='2020-01-01 00:00'


building0train=building0.loc[:end_train, :]
building0val=building0.loc[start_val:end_val, :]
building0test=building0.loc[start_test:, :]

In [None]:
# Plot time series
# ==============================================================================
fig, ax = plt.subplots(figsize=(11, 4))
building3train['series_value'].plot(ax=ax, label='train')
building3val['series_value'].plot(ax=ax, label='validation')
building3test['series_value'].plot(ax=ax, label='test')
ax.set_title('Building 3 Series Value')
ax.legend();

In [None]:
# Create forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
                regressor = LGBMRegressor(random_state=123),
                lags = 24
                )

forecaster

In [None]:
# Grid search of hyperparameters and lags
# ==============================================================================
# Regressor hyperparameters

param_grid = {
    'n_estimators': [100, 500],
    'max_depth': [3, 5, 10],
    'learning_rate': [0.01, 0.1]
    }
lags_grid = [24, 48, 72, [1, 2, 3, 23, 24, 25, 71, 72, 73]]

results_grid = grid_search_forecaster(
                        forecaster         = forecaster,
                        y                  = building3.loc[:end_val, 'series_value'],
                        metric             = 'mean_squared_error',
                        initial_train_size = int(len(building3train)),
                        lags_grid          = lags_grid,
                        param_grid         = param_grid,
                        steps              = 36,
                        )

In [None]:
# Backtesting
# ==============================================================================
metric, predictions = backtesting_forecaster(
    forecaster = forecaster,
    y          = building1['series_value'],
    initial_train_size = len(building1.loc[:end_val]),
    fixed_train_size   = False,
    steps      = 36,
    refit      = False,
    metric     = 'mean_squared_error',
    verbose    = False
    )

print(f"Backtest error: {metric}")