# Load necessary packages

In [1]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import numpy as np
import pandas as pd

import datetime

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

# import matplotlib.font_manager as font_manager
# mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

import plotly.express as px
import plotly.graph_objects as go

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

from termcolor import colored

from IPython.display import display, Markdown, Latex

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

green = '🟢'
red = '🔴'

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #


# Organise data

## Import features

In [2]:

! pip install 'git+git://github.com/HR/github-clone#egg=ghclone' &> /dev/null

! ghclone https://github.com/timovijn/ElectricityPriceForecasting/tree/master/forecasting


zsh:1: command not found: ghclone


In [3]:

features = pd.read_pickle(f'./data/ID/ID3.pkl')

display(Markdown('***')), display(Markdown(f'**“features” {(features.shape)}:** <p> *Holds all input features*')), display(Markdown(f'(Check) No NaN’s: {green if features.isna().any(axis=1).sum() == 0 else red}'))
display(features)


***

**“features” (34872, 2):** <p> *Holds all input features*

(Check) No NaN’s: 🟢

Unnamed: 0_level_0,ID3,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00+00:00,24.2549,38.50
2015-01-01 01:00:00+00:00,28.6447,38.22
2015-01-01 02:00:00+00:00,28.3007,35.60
2015-01-01 03:00:00+00:00,28.0543,33.00
2015-01-01 04:00:00+00:00,28.1546,27.41
...,...,...
2018-12-31 19:00:00+00:00,68.9794,58.28
2018-12-31 20:00:00+00:00,51.6558,50.01
2018-12-31 21:00:00+00:00,50.4283,47.48
2018-12-31 22:00:00+00:00,50.1262,50.95


In [4]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

daterange = ['2017.12.12', '2017.12.20']

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig1 = go.Scatter       (      
                        x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['MCP'],
                        name = 'MCP',
                        line_color = hex_indigo,

                        mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig2 = go.Scatter       (      
                        x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['ID3'],
                        name = 'ID3',
                        line_color = hex_salmon,

                        mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout       (      
                        autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Day-ahead (MCP) and intraday (ID3)',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'Price (€)',

                        font_family = 'SF Mono',

                        hovermode = 'x',
                        xaxis_showspikes = True,
                        yaxis_showspikes = True,
                        )

fig.show()


## Select features

In [5]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

feature_y = []
feature_X = []

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

lags_y = []
lagged_y = []

feature = ['ID3']
feature_y = feature_y + feature
lag = list(range(0, 1, 1))
lags_y = lags_y + lag
lagged_y = lagged_y + feature * len(lag)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

lags_X = []
lagged_X = []

feature = ['ID3']
feature_X = feature_X + feature
lag = list(range(-72, -3, 1))
lags_X = lags_X + lag
lagged_X = lagged_X + feature * len(lag)

feature = ['MCP']
feature_X = feature_X + feature
lag = list(range(-24, 1, 1))
lags_X = lags_X + lag
lagged_X = lagged_X + feature * len(lag)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X = features[feature_X]
y = features[feature_y]

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

header =    [
            ['y'] * len(lagged_y) + ['X'] * len(lagged_X),
            lagged_y + lagged_X, 
            lags_y + lags_X,
            ]

frame = pd.DataFrame(index = X.index, columns = header)

frame.columns = frame.columns.rename('Set', level = 0)
frame.columns = frame.columns.rename('Feature', level = 1)
frame.columns = frame.columns.rename('Lag', level = 2)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X3 = pd.DataFrame(index = X.index)
y3 = pd.DataFrame(index = y.index)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

for i in range(len(lagged_y)):
    c = lagged_y[i]
    l = lags_y[i]
    y3[f'{c} ({l})'] = y[f'{c}'].shift(-l)

frame['y'] = y3.values

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

for i in range(len(lagged_X)):
    c = lagged_X[i]
    l = lags_X[i]
    X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

frame['X'] = X3.values

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame = frame.dropna()

display(Markdown('***')), display(Markdown(f'**“X” {(X.shape)}:** <p> *Holds all selected input features*'))
display(X)

display(Markdown('***')), display(Markdown(f'**“y” {(y.shape)}:** <p> *Holds all selected output features*'))
display(y)

display(Markdown('***')), display(Markdown(f'**“frame” {(frame.shape)}:** <p> *Holds all selected input and output features and their lags*'))
display(frame)


***

**“X” (34872, 2):** <p> *Holds all selected input features*

Unnamed: 0_level_0,ID3,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00+00:00,24.2549,38.50
2015-01-01 01:00:00+00:00,28.6447,38.22
2015-01-01 02:00:00+00:00,28.3007,35.60
2015-01-01 03:00:00+00:00,28.0543,33.00
2015-01-01 04:00:00+00:00,28.1546,27.41
...,...,...
2018-12-31 19:00:00+00:00,68.9794,58.28
2018-12-31 20:00:00+00:00,51.6558,50.01
2018-12-31 21:00:00+00:00,50.4283,47.48
2018-12-31 22:00:00+00:00,50.1262,50.95


***

**“y” (34872, 1):** <p> *Holds all selected output features*

Unnamed: 0_level_0,ID3
Timestamp,Unnamed: 1_level_1
2015-01-01 00:00:00+00:00,24.2549
2015-01-01 01:00:00+00:00,28.6447
2015-01-01 02:00:00+00:00,28.3007
2015-01-01 03:00:00+00:00,28.0543
2015-01-01 04:00:00+00:00,28.1546
...,...
2018-12-31 19:00:00+00:00,68.9794
2018-12-31 20:00:00+00:00,51.6558
2018-12-31 21:00:00+00:00,50.4283
2018-12-31 22:00:00+00:00,50.1262


***

**“frame” (34800, 95):** <p> *Holds all selected input and output features and their lags*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,21.838,24.2549,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,...,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26
2015-01-04 01:00:00+00:00,20.2474,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,...,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28
2015-01-04 02:00:00+00:00,19.5168,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,...,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63
2015-01-04 03:00:00+00:00,20.0229,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,...,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63,27.03
2015-01-04 04:00:00+00:00,18.8012,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,26.0735,...,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63,27.03,27.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,68.9794,56.2728,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,...,62.64,64.56,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28
2018-12-31 20:00:00+00:00,51.6558,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,...,64.56,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01
2018-12-31 21:00:00+00:00,50.4283,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,...,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01,47.48
2018-12-31 22:00:00+00:00,50.1262,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,44.1,...,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01,47.48,50.95


## Split train and test

In [6]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_train, X_test, y_train, y_test = train_test_split     (
                                                        frame['X'],
                                                        frame['y'],
                                                        test_size = 2/10,
                                                        random_state = 0,
                                                        shuffle = False
                                                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_train, X_val, y_train, y_val = train_test_split       (
                                                        X_train,
                                                        y_train,
                                                        test_size = 2/8,
                                                        random_state = 0,
                                                        shuffle = False
                                                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_train = pd.merge      (
                            pd.concat([y_train], keys = ['y'], names = ['Set'], axis = 1),
                            pd.concat([X_train], keys = ['X'], names = ['Set'], axis = 1),
                            left_index = True, right_index = True,
                            )

display(Markdown('***')), display(Markdown(f'**“frame_train” {(frame_train.shape)}:** <p> *Holds all input and output features of the train set*'))
display(frame_train)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_test = pd.merge       (
                            pd.concat([y_test], keys = ['y'], names = ['Set'], axis = 1),
                            pd.concat([X_test], keys = ['X'], names = ['Set'], axis = 1),
                            left_index = True, right_index = True,
                            )
display(Markdown('***')), display(Markdown(f'**“frame_test” {(frame_train.shape)}:** <p> *Holds all input and output features of the test set*'))
display(frame_test)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_val = pd.merge        (
                            pd.concat([y_val], keys = ['y'], names = ['Set'], axis = 1),
                            pd.concat([X_val], keys = ['X'], names = ['Set'], axis = 1),
                            left_index = True, right_index = True,
                            )
display(Markdown('***')), display(Markdown(f'**“frame_val” {(frame_val.shape)}:** <p> *Holds all input and output features of the validation set*'))
display(frame_val)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #


***

**“frame_train” (20880, 95):** <p> *Holds all input and output features of the train set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,21.838,24.2549,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,...,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26
2015-01-04 01:00:00+00:00,20.2474,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,...,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28
2015-01-04 02:00:00+00:00,19.5168,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,...,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63
2015-01-04 03:00:00+00:00,20.0229,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,...,49.82,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63,27.03
2015-01-04 04:00:00+00:00,18.8012,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,26.0735,...,47.62,43.79,42.44,42.03,40.91,36.26,32.28,27.63,27.03,27.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-27 19:00:00+00:00,35.697,41.3179,40.8523,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,...,35.34,30,25.19,25.13,26.4,24.68,25.1,27.31,34.3,37.13
2017-05-27 20:00:00+00:00,30.1949,40.8523,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,...,30,25.19,25.13,26.4,24.68,25.1,27.31,34.3,37.13,35.97
2017-05-27 21:00:00+00:00,25.9212,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,22.9759,...,25.19,25.13,26.4,24.68,25.1,27.31,34.3,37.13,35.97,34.34
2017-05-27 22:00:00+00:00,26.1836,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,22.9759,23.3225,...,25.13,26.4,24.68,25.1,27.31,34.3,37.13,35.97,34.34,45.64


***

**“frame_test” (20880, 95):** <p> *Holds all input and output features of the test set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2018-03-15 00:00:00+00:00,17.0579,31.7753,32.4653,33.8847,34.1606,39.3655,41.4889,44.6716,49.4267,50.1638,...,49.81,54.02,57.12,62.81,60.94,48.86,47.44,47.4,35,25.36
2018-03-15 01:00:00+00:00,10.9428,32.4653,33.8847,34.1606,39.3655,41.4889,44.6716,49.4267,50.1638,48.5884,...,54.02,57.12,62.81,60.94,48.86,47.44,47.4,35,25.36,20.41
2018-03-15 02:00:00+00:00,10.2218,33.8847,34.1606,39.3655,41.4889,44.6716,49.4267,50.1638,48.5884,47.5879,...,57.12,62.81,60.94,48.86,47.44,47.4,35,25.36,20.41,18.42
2018-03-15 03:00:00+00:00,10.789,34.1606,39.3655,41.4889,44.6716,49.4267,50.1638,48.5884,47.5879,47.6674,...,62.81,60.94,48.86,47.44,47.4,35,25.36,20.41,18.42,14.06
2018-03-15 04:00:00+00:00,22.0832,39.3655,41.4889,44.6716,49.4267,50.1638,48.5884,47.5879,47.6674,48.7564,...,60.94,48.86,47.44,47.4,35,25.36,20.41,18.42,14.06,12.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,68.9794,56.2728,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,...,62.64,64.56,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28
2018-12-31 20:00:00+00:00,51.6558,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,...,64.56,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01
2018-12-31 21:00:00+00:00,50.4283,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,...,65.01,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01,47.48
2018-12-31 22:00:00+00:00,50.1262,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,44.1,...,63.91,59.68,56.6,63.28,68.01,66.98,58.28,50.01,47.48,50.95


***

**“frame_val” (6960, 95):** <p> *Holds all input and output features of the validation set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-05-28 00:00:00+00:00,24.7119,31.7037,30.681,30.726,24.6634,24.0226,22.9759,23.3225,20.9981,20.1191,...,24.68,25.1,27.31,34.3,37.13,35.97,34.34,45.64,40.1,44
2017-05-28 01:00:00+00:00,20.0886,30.681,30.726,24.6634,24.0226,22.9759,23.3225,20.9981,20.1191,20.1793,...,25.1,27.31,34.3,37.13,35.97,34.34,45.64,40.1,44,33.37
2017-05-28 02:00:00+00:00,19.6431,30.726,24.6634,24.0226,22.9759,23.3225,20.9981,20.1191,20.1793,22.1358,...,27.31,34.3,37.13,35.97,34.34,45.64,40.1,44,33.37,25.71
2017-05-28 03:00:00+00:00,15.6021,24.6634,24.0226,22.9759,23.3225,20.9981,20.1191,20.1793,22.1358,23.9598,...,34.3,37.13,35.97,34.34,45.64,40.1,44,33.37,25.71,25.6
2017-05-28 04:00:00+00:00,13.9775,24.0226,22.9759,23.3225,20.9981,20.1191,20.1793,22.1358,23.9598,23.6334,...,37.13,35.97,34.34,45.64,40.1,44,33.37,25.71,25.6,25.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-03-14 19:00:00+00:00,38.4234,45.5119,43.8196,35.6613,33.8746,32.4164,31.7753,32.4653,33.8847,34.1606,...,57.98,53.83,49.58,48.45,49.82,49.81,54.02,57.12,62.81,60.94
2018-03-14 20:00:00+00:00,35.9622,43.8196,35.6613,33.8746,32.4164,31.7753,32.4653,33.8847,34.1606,39.3655,...,53.83,49.58,48.45,49.82,49.81,54.02,57.12,62.81,60.94,48.86
2018-03-14 21:00:00+00:00,27.4425,35.6613,33.8746,32.4164,31.7753,32.4653,33.8847,34.1606,39.3655,41.4889,...,49.58,48.45,49.82,49.81,54.02,57.12,62.81,60.94,48.86,47.44
2018-03-14 22:00:00+00:00,24.1728,33.8746,32.4164,31.7753,32.4653,33.8847,34.1606,39.3655,41.4889,44.6716,...,48.45,49.82,49.81,54.02,57.12,62.81,60.94,48.86,47.44,47.4


## Scaling

In [7]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_train_unscaled = frame_train
frame_test_unscaled = frame_test
frame_val_unscaled = frame_val

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

y_scaler = StandardScaler()
y_scaler.fit(frame_train['y'])

frame_train['y'] = y_scaler.transform(frame_train['y'])
frame_test['y'] = y_scaler.transform(frame_test['y'])
frame_val['y'] = y_scaler.transform(frame_val['y'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_scaler = StandardScaler()
X_scaler.fit(frame_train['X'])

frame_train['X'] = X_scaler.transform(frame_train['X'])
frame_test['X'] = X_scaler.transform(frame_test['X'])
frame_val['X'] = X_scaler.transform(frame_val['X'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

display(Markdown('***')), display(Markdown(f'**“frame_train” {(frame_train.shape)}:** <p> *Holds all (scaled) input and output features of the train set*'))
display(frame_train)

display(Markdown('***')), display(Markdown(f'**“frame_test” {(frame_test.shape)}:** <p> *Holds all (scaled) input and output features of the train set*'))
display(frame_test)

display(Markdown('***')), display(Markdown(f'**“frame_val” {(frame_val.shape)}:** <p> *Holds all (scaled) input and output features of the train set*'))
display(frame_val)


***

**“frame_train” (20880, 95):** <p> *Holds all (scaled) input and output features of the train set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,-1.062439,-0.912976,-0.637497,-0.659101,-0.674570,-0.668260,-0.709326,-0.754441,-0.812699,-0.903817,...,0.258138,0.463220,1.337506,1.068645,0.888302,0.574239,0.463552,0.429959,0.338083,-0.043289
2015-01-04 01:00:00+00:00,-1.162249,-0.637465,-0.659092,-0.674559,-0.668275,-0.709346,-0.754463,-0.812717,-0.903844,-0.826937,...,0.463170,1.337460,1.068513,0.888218,0.574187,0.463518,0.429925,0.338100,-0.043292,-0.369714
2015-01-04 02:00:00+00:00,-1.208088,-0.659060,-0.674551,-0.668265,-0.709362,-0.754483,-0.812740,-0.903862,-0.826963,-0.608849,...,1.337430,1.068463,0.888091,0.574111,0.463468,0.429891,0.338067,-0.043279,-0.369717,-0.751090
2015-01-04 03:00:00+00:00,-1.176330,-0.674518,-0.668256,-0.709351,-0.754499,-0.812760,-0.903887,-0.826981,-0.608872,-0.780988,...,1.068427,0.888038,0.573994,0.463395,0.429842,0.338033,-0.043310,-0.369706,-0.751092,-0.800300
2015-01-04 04:00:00+00:00,-1.252990,-0.668224,-0.709343,-0.754489,-0.812777,-0.903908,-0.827004,-0.608888,-0.781014,-0.798787,...,0.887998,0.573935,0.463280,0.429770,0.337986,-0.043341,-0.369736,-0.751084,-0.800302,-0.799480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-27 19:00:00+00:00,-0.192823,0.157918,0.128674,-0.216560,-0.498388,-0.669150,-0.445494,-0.509666,-0.506822,-0.887287,...,-0.119123,-0.557000,-0.951390,-0.956237,-0.852043,-0.993085,-0.958615,-0.777330,-0.204044,0.028065
2017-05-27 20:00:00+00:00,-0.538070,0.128695,-0.216553,-0.498379,-0.669165,-0.445511,-0.509686,-0.506837,-0.887314,-0.927499,...,-0.557072,-0.951474,-0.956311,-0.852081,-0.993107,-0.958638,-0.777358,-0.204032,0.028062,-0.067074
2017-05-27 21:00:00+00:00,-0.806230,-0.216528,-0.498371,-0.669155,-0.445524,-0.509703,-0.506856,-0.887332,-0.927527,-0.993192,...,-0.951555,-0.956395,-0.852158,-0.993142,-0.958661,-0.777383,-0.204063,0.028076,-0.067077,-0.200761
2017-05-27 22:00:00+00:00,-0.789766,-0.498341,-0.669147,-0.445515,-0.509717,-0.506874,-0.887356,-0.927546,-0.993221,-0.971441,...,-0.956476,-0.852241,-0.993215,-0.958697,-0.777410,-0.204092,0.028044,-0.067063,-0.200764,0.726024


***

**“frame_test” (6960, 95):** <p> *Holds all (scaled) input and output features of the train set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2018-03-15 00:00:00+00:00,-1.362377,-0.440985,-0.397710,-0.308635,-0.291328,0.035358,0.168636,0.368395,0.666832,0.713097,...,1.067607,1.412910,1.667185,2.133983,1.980734,0.990060,0.873635,0.870390,-0.146633,-0.937267
2018-03-15 01:00:00+00:00,-1.746088,-0.397681,-0.308628,-0.291319,0.035349,0.168626,0.368386,0.666828,0.713092,0.614227,...,1.412882,1.667145,2.133822,1.980621,0.990000,0.873597,0.870354,-0.146620,-0.937269,-1.343248
2018-03-15 02:00:00+00:00,-1.791327,-0.308601,-0.291312,0.035356,0.168617,0.368377,0.666822,0.713088,0.614221,0.551438,...,1.667122,2.133789,1.980463,0.989913,0.873540,0.870317,-0.146651,-0.937263,-1.343250,-1.506460
2018-03-15 03:00:00+00:00,-1.755736,-0.291285,0.035362,0.168623,0.368370,0.666816,0.713083,0.614216,0.551431,0.556426,...,2.133777,1.980428,0.989784,0.873456,0.870259,-0.146681,-0.937291,-1.343246,-1.506462,-1.864051
2018-03-15 04:00:00+00:00,-1.047054,0.035384,0.168629,0.368376,0.666811,0.713077,0.614209,0.551425,0.556420,0.624767,...,1.980412,0.989732,0.873330,0.870175,-0.146719,-0.937314,-1.343273,-1.506460,-1.864053,-2.024803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,1.895559,1.096502,0.976774,0.841074,0.706880,0.707923,0.677539,0.796326,0.396875,0.322483,...,2.119834,2.277309,2.314243,2.224197,1.877396,1.624863,2.172777,2.560758,2.476246,1.762710
2018-12-31 20:00:00+00:00,0.808549,0.976782,0.841077,0.706885,0.707919,0.677533,0.796322,0.396868,0.322473,0.344197,...,2.277299,2.314214,2.224032,1.877285,1.624791,2.172730,2.560716,2.476281,1.762705,1.084435
2018-12-31 21:00:00+00:00,0.731525,0.841087,0.706888,0.707923,0.677528,0.796317,0.396859,0.322466,0.344187,0.361129,...,2.314205,2.224002,1.877131,1.624688,2.172647,2.560665,2.476238,1.762734,1.084431,0.876934
2018-12-31 22:00:00+00:00,0.712568,0.706900,0.707927,0.677533,0.796313,0.396851,0.322456,0.344180,0.361120,0.332538,...,2.223991,1.877094,1.624540,2.172529,2.560575,2.476188,1.762694,1.084454,0.876930,1.161530


***

**“frame_val” (6960, 95):** <p> *Holds all (scaled) input and output features of the train set*

Set,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-05-28 00:00:00+00:00,-0.882112,-0.445478,-0.509700,-0.506879,-0.887395,-0.927592,-0.993265,-0.971489,-1.117350,-1.172481,...,-0.993382,-0.958855,-0.777529,-0.204185,0.027972,-0.067125,-0.200782,0.726040,0.271650,0.591517
2017-05-28 01:00:00+00:00,-1.172213,-0.509670,-0.506871,-0.887384,-0.927609,-0.993287,-0.971514,-1.117370,-1.172512,-1.168701,...,-0.958936,-0.777610,-0.204280,0.027910,-0.067165,-0.200811,0.726005,0.271666,0.591513,-0.280316
2017-05-28 02:00:00+00:00,-1.200162,-0.506841,-0.887374,-0.927598,-0.993306,-0.971536,-1.117397,-1.172533,-1.168732,-1.045919,...,-0.777688,-0.204352,0.027808,-0.067224,-0.200849,0.725969,0.271633,0.591532,-0.280319,-0.908561
2017-05-28 03:00:00+00:00,-1.453726,-0.887339,-0.927589,-0.993294,-0.971554,-1.117420,-1.172560,-1.168753,-1.045948,-0.931442,...,-0.204416,0.027740,-0.067324,-0.200904,0.725914,0.271601,0.591498,-0.280307,-0.908564,-0.917583
2017-05-28 04:00:00+00:00,-1.555668,-0.927552,-0.993284,-0.971542,-1.117439,-1.172584,-1.168780,-1.045968,-0.931470,-0.951930,...,0.027681,-0.067393,-0.201000,0.725834,0.271554,0.591463,-0.280338,-0.908557,-0.917585,-0.917583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-03-14 19:00:00+00:00,-0.021753,0.421136,0.314905,-0.197131,-0.309276,-0.400782,-0.441001,-0.397678,-0.308585,-0.291251,...,1.737653,1.397328,1.048831,0.956288,1.068734,1.067975,1.413304,1.667594,2.134238,1.980873
2018-03-14 20:00:00+00:00,-0.176188,0.314923,-0.197124,-0.309267,-0.400795,-0.441017,-0.397696,-0.308599,-0.291270,0.035408,...,1.397299,1.048780,0.956160,1.068645,1.067914,1.413262,1.667555,2.134270,1.980868,0.990116
2018-03-14 21:00:00+00:00,-0.710770,-0.197099,-0.309260,-0.400786,-0.441031,-0.397713,-0.308616,-0.291283,0.035394,0.168669,...,1.048744,0.956108,1.068513,1.067825,1.413194,1.667512,2.134229,1.980899,0.990112,0.873653
2018-03-14 22:00:00+00:00,-0.915941,-0.309233,-0.400779,-0.441022,-0.397726,-0.308631,-0.291300,0.035384,0.168657,0.368412,...,0.956069,1.068463,1.067693,1.413096,1.667439,2.134182,1.980858,0.990134,0.873649,0.870372


## Convert to two dimensional

In [8]:

def two_dim_tensor(frame_train, frame_test, frame_val):

    X_train = frame_train['X']
    y_train = frame_train['y']

    X_test = frame_test['X']
    y_test = frame_test['y']

    X_val = frame_val['X']
    y_val = frame_val['y']

    return X_train, y_train, X_test, y_test, X_val, y_val


## Convert to three dimensional

In [9]:

def three_dim_tensor(frame_train, frame_test, frame_val):

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_train = list()

    for index, row in frame_train['X'].iterrows():
        X_train.extend(row.tolist())

    X_train = np.array(X_train)

    X_train = X_train.reshape((len(frame_train), len(lag), len(feature_X)))

    y_train = frame_train['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_test = list()

    for index, row in frame_test['X'].iterrows():
        X_test.extend(row.tolist())

    X_test = np.array(X_test)

    X_test = X_test.reshape((len(frame_test), len(lag), len(feature_X)))

    y_test = frame_test['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_val = list()

    for index, row in frame_val['X'].iterrows():
        X_val.extend(row.tolist())

    X_val = np.array(X_val)

    X_val = X_val.reshape((len(frame_val), len(lag), len(feature_X)))

    y_val = frame_val['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    return X_train, y_train, X_test, y_test, X_val, y_val
    

In [10]:

def plotModelResults(model, X_train=X_train, X_test=X_test, plot_intervals=False, plot_anomalies=False):
    """
        Plots modelled vs fact values, prediction intervals and anomalies
    
    """

    def mean_absolute_percentage_error(y_true, y_pred): 
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    prediction = model.predict(X_test)
    
    plt.figure(figsize=(15, 7))
    plt.plot(prediction, "g", label="prediction", linewidth=2.0)
    plt.plot(frame_test_unscaled['y'].values, label="actual", linewidth=2.0)
    
    if plot_intervals:
        cv = cross_val_score(model, X_train, y_train, 
                                    cv=tscv, 
                                    scoring="neg_mean_absolute_error")
        mae = cv.mean() * (-1)
        deviation = cv.std()
        
        scale = 1.96
        lower = prediction - (mae + scale * deviation)
        upper = prediction + (mae + scale * deviation)
        
        plt.plot(lower, "r--", label="upper bond / lower bond", alpha=0.5)
        plt.plot(upper, "r--", alpha=0.5)
        
        if plot_anomalies:
            anomalies = np.array([np.NaN]*len(y_test))
            anomalies[y_test<lower] = y_test[y_test<lower]
            anomalies[y_test>upper] = y_test[y_test>upper]
            plt.plot(anomalies, "o", markersize=10, label = "Anomalies")
    
    error = mean_absolute_percentage_error(prediction, y_test.values)
    # plt.title("Mean absolute percentage error {0:.2f}%".format(error))
    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid(True);
    
def plotCoefficients(model):
    """
        Plots sorted coefficient values of the model
    """
    
    coefs = pd.DataFrame(np.transpose(model.coef_), X_train.columns)
    coefs.columns = ["coef"]
    coefs["abs"] = coefs.coef.apply(np.abs)
    coefs = coefs.sort_values(by="abs", ascending=False).drop(["abs"], axis=1)
    
    plt.figure(figsize=(15, 7))
    coefs.coef.plot(kind='bar')
    plt.grid(True, axis='y')
    plt.hlines(y=0, xmin=0, xmax=len(coefs), linestyles='dashed');
    

# Different models

## Linear regression

In [11]:

def model_LINREG():

    model = LinearRegression()

    return model


## LASSO regression

In [12]:

def model_LASSO():

    # for time-series cross-validation set 5 folds 
    tscv = TimeSeriesSplit(n_splits = 5)

    model = LassoCV(cv = tscv)

    return model


# Learning

## Create model

In [13]:

LATENT_DIM = 5
BATCH_SIZE = 50
EPOCHS = 10

HORIZON = len(lagged_y)


In [14]:

def model_GRU():

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    model = Sequential()

    model.add(GRU(LATENT_DIM, input_shape = (len(lag), len(feature_X))))

    model.add(RepeatVector(HORIZON))

    model.add(GRU(LATENT_DIM, return_sequences = True))

    model.add(TimeDistributed(Dense(1)))

    model.add(Flatten())

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #
    
    return model


In [15]:

def model_MLP():

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    model = Sequential()

    model.add(Dense(200, activation = 'relu', kernel_initializer = 'he_normal', input_shape = (len(frame_train['X'].columns),)))

    model.add(Dense(100, activation = 'relu', kernel_initializer = 'he_normal'))

    model.add(Dense(HORIZON))

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    return model


In [16]:

def learn(model):

    from keras.callbacks import EarlyStopping

    earlystop = EarlyStopping       (
                                    monitor = 'val_loss',
                                    mode = 'min', 
                                    patience = 10,
                                    verbose = 1,
                                    )

    history = model.fit     (    
                            X_train,
                            y_train,
                            batch_size = BATCH_SIZE,
                            epochs = EPOCHS,
                            validation_data =   ( 
                                                X_val,
                                                y_val,
                                                ),
                            callbacks = [earlystop],
                            verbose = 1,
                            )

    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc = 'upper left')
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc = 'upper left')
    plt.show()

    return model, history


In [17]:

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import TimeSeriesSplit

from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

model_sel = input(r'Enter model: ')

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'GRU':

    X_train, y_train, X_test, y_test, X_val, y_val = three_dim_tensor(frame_train, frame_test, frame_val)

    model = model_GRU()

    model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

    model.summary()

    model, history = learn(model)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'MLP':

    X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

    model = model_MLP()

    model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

    model.summary()

    model, history = learn(model)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'LINREG':

    X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

    model = model_LINREG()

    model.fit(X_train, y_train)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'LASSO':

    X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

    model = model_LASSO()

    model.fit(X_train, y_train)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #


In [26]:
display(Markdown('***')), display(Markdown(f'**“X_train” {(X_train.shape)}:** <p> *Seperate dataset for training set to rule-out data leakage*'))
display(X_train)

display(Markdown('***')), display(Markdown(f'**“X_test” {(X_test.shape)}:** <p> *Seperate dataset for test set to rule-out data leakage*'))
display(X_test)

display(Markdown('***')), display(Markdown(f'**“X_val” {(X_val.shape)}:** <p> *Seperate dataset for validation set to rule-out data leakage*'))
display(X_val)

***

**“X_train” (20880, 94):** <p> *Seperate dataset for training set to rule-out data leakage*

Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-01-04 00:00:00+00:00,-0.912976,-0.637497,-0.659101,-0.674570,-0.668260,-0.709326,-0.754441,-0.812699,-0.903817,-0.826913,...,0.258138,0.463220,1.337506,1.068645,0.888302,0.574239,0.463552,0.429959,0.338083,-0.043289
2015-01-04 01:00:00+00:00,-0.637465,-0.659092,-0.674559,-0.668275,-0.709346,-0.754463,-0.812717,-0.903844,-0.826937,-0.608828,...,0.463170,1.337460,1.068513,0.888218,0.574187,0.463518,0.429925,0.338100,-0.043292,-0.369714
2015-01-04 02:00:00+00:00,-0.659060,-0.674551,-0.668265,-0.709362,-0.754483,-0.812740,-0.903862,-0.826963,-0.608849,-0.780965,...,1.337430,1.068463,0.888091,0.574111,0.463468,0.429891,0.338067,-0.043279,-0.369717,-0.751090
2015-01-04 03:00:00+00:00,-0.674518,-0.668256,-0.709351,-0.754499,-0.812760,-0.903887,-0.826981,-0.608872,-0.780988,-0.798764,...,1.068427,0.888038,0.573994,0.463395,0.429842,0.338033,-0.043310,-0.369706,-0.751092,-0.800300
2015-01-04 04:00:00+00:00,-0.668224,-0.709343,-0.754489,-0.812777,-0.903908,-0.827004,-0.608888,-0.781014,-0.798787,-0.706827,...,0.887998,0.573935,0.463280,0.429770,0.337986,-0.043341,-0.369736,-0.751084,-0.800302,-0.799480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-27 19:00:00+00:00,0.157918,0.128674,-0.216560,-0.498388,-0.669150,-0.445494,-0.509666,-0.506822,-0.887287,-0.927474,...,-0.119123,-0.557000,-0.951390,-0.956237,-0.852043,-0.993085,-0.958615,-0.777330,-0.204044,0.028065
2017-05-27 20:00:00+00:00,0.128695,-0.216553,-0.498379,-0.669165,-0.445511,-0.509686,-0.506837,-0.887314,-0.927499,-0.993166,...,-0.557072,-0.951474,-0.956311,-0.852081,-0.993107,-0.958638,-0.777358,-0.204032,0.028062,-0.067074
2017-05-27 21:00:00+00:00,-0.216528,-0.498371,-0.669155,-0.445524,-0.509703,-0.506856,-0.887332,-0.927527,-0.993192,-0.971416,...,-0.951555,-0.956395,-0.852158,-0.993142,-0.958661,-0.777383,-0.204063,0.028076,-0.067077,-0.200761
2017-05-27 22:00:00+00:00,-0.498341,-0.669147,-0.445515,-0.509717,-0.506874,-0.887356,-0.927546,-0.993221,-0.971441,-1.117292,...,-0.956476,-0.852241,-0.993215,-0.958697,-0.777410,-0.204092,0.028044,-0.067063,-0.200764,0.726024


***

**“X_test” (6960, 94):** <p> *Seperate dataset for test set to rule-out data leakage*

Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-03-15 00:00:00+00:00,-0.440985,-0.397710,-0.308635,-0.291328,0.035358,0.168636,0.368395,0.666832,0.713097,0.614231,...,1.067607,1.412910,1.667185,2.133983,1.980734,0.990060,0.873635,0.870390,-0.146633,-0.937267
2018-03-15 01:00:00+00:00,-0.397681,-0.308628,-0.291319,0.035349,0.168626,0.368386,0.666828,0.713092,0.614227,0.551443,...,1.412882,1.667145,2.133822,1.980621,0.990000,0.873597,0.870354,-0.146620,-0.937269,-1.343248
2018-03-15 02:00:00+00:00,-0.308601,-0.291312,0.035356,0.168617,0.368377,0.666822,0.713088,0.614221,0.551438,0.556432,...,1.667122,2.133789,1.980463,0.989913,0.873540,0.870317,-0.146651,-0.937263,-1.343250,-1.506460
2018-03-15 03:00:00+00:00,-0.291285,0.035362,0.168623,0.368370,0.666816,0.713083,0.614216,0.551431,0.556426,0.624771,...,2.133777,1.980428,0.989784,0.873456,0.870259,-0.146681,-0.937291,-1.343246,-1.506462,-1.864051
2018-03-15 04:00:00+00:00,0.035384,0.168629,0.368376,0.666811,0.713077,0.614209,0.551425,0.556420,0.624767,1.066106,...,1.980412,0.989732,0.873330,0.870175,-0.146719,-0.937314,-1.343273,-1.506460,-1.864053,-2.024803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,1.096502,0.976774,0.841074,0.706880,0.707923,0.677539,0.796326,0.396875,0.322483,0.344205,...,2.119834,2.277309,2.314243,2.224197,1.877396,1.624863,2.172777,2.560758,2.476246,1.762710
2018-12-31 20:00:00+00:00,0.976782,0.841077,0.706885,0.707919,0.677533,0.796322,0.396868,0.322473,0.344197,0.361137,...,2.277299,2.314214,2.224032,1.877285,1.624791,2.172730,2.560716,2.476281,1.762705,1.084435
2018-12-31 21:00:00+00:00,0.841087,0.706888,0.707923,0.677528,0.796317,0.396859,0.322466,0.344187,0.361129,0.332547,...,2.314205,2.224002,1.877131,1.624688,2.172647,2.560665,2.476238,1.762734,1.084431,0.876934
2018-12-31 22:00:00+00:00,0.706900,0.707927,0.677533,0.796313,0.396851,0.322456,0.344180,0.361120,0.332538,0.389029,...,2.223991,1.877094,1.624540,2.172529,2.560575,2.476188,1.762694,1.084454,0.876930,1.161530


***

**“X_val” (6960, 94):** <p> *Seperate dataset for validation set to rule-out data leakage*

Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-05-28 00:00:00+00:00,-0.445478,-0.509700,-0.506879,-0.887395,-0.927592,-0.993265,-0.971489,-1.117350,-1.172481,-1.168673,...,-0.993382,-0.958855,-0.777529,-0.204185,0.027972,-0.067125,-0.200782,0.726040,0.271650,0.591517
2017-05-28 01:00:00+00:00,-0.509670,-0.506871,-0.887384,-0.927609,-0.993287,-0.971514,-1.117370,-1.172512,-1.168701,-1.045892,...,-0.958936,-0.777610,-0.204280,0.027910,-0.067165,-0.200811,0.726005,0.271666,0.591513,-0.280316
2017-05-28 02:00:00+00:00,-0.506841,-0.887374,-0.927598,-0.993306,-0.971536,-1.117397,-1.172533,-1.168732,-1.045919,-0.931417,...,-0.777688,-0.204352,0.027808,-0.067224,-0.200849,0.725969,0.271633,0.591532,-0.280319,-0.908561
2017-05-28 03:00:00+00:00,-0.887339,-0.927589,-0.993294,-0.971554,-1.117420,-1.172560,-1.168753,-1.045948,-0.931442,-0.951905,...,-0.204416,0.027740,-0.067324,-0.200904,0.725914,0.271601,0.591498,-0.280307,-0.908564,-0.917583
2017-05-28 04:00:00+00:00,-0.927552,-0.993284,-0.971542,-1.117439,-1.172584,-1.168780,-1.045968,-0.931470,-0.951930,-0.928360,...,0.027681,-0.067393,-0.201000,0.725834,0.271554,0.591463,-0.280338,-0.908557,-0.917585,-0.917583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-03-14 19:00:00+00:00,0.421136,0.314905,-0.197131,-0.309276,-0.400782,-0.441001,-0.397678,-0.308585,-0.291251,0.035420,...,1.737653,1.397328,1.048831,0.956288,1.068734,1.067975,1.413304,1.667594,2.134238,1.980873
2018-03-14 20:00:00+00:00,0.314923,-0.197124,-0.309267,-0.400795,-0.441017,-0.397696,-0.308599,-0.291270,0.035408,0.168680,...,1.397299,1.048780,0.956160,1.068645,1.067914,1.413262,1.667555,2.134270,1.980868,0.990116
2018-03-14 21:00:00+00:00,-0.197099,-0.309260,-0.400786,-0.441031,-0.397713,-0.308616,-0.291283,0.035394,0.168669,0.368420,...,1.048744,0.956108,1.068513,1.067825,1.413194,1.667512,2.134229,1.980899,0.990112,0.873653
2018-03-14 22:00:00+00:00,-0.309233,-0.400779,-0.441022,-0.397726,-0.308631,-0.291300,0.035384,0.168657,0.368412,0.666841,...,0.956069,1.068463,1.067693,1.413096,1.667439,2.134182,1.980858,0.990134,0.873649,0.870372


In [18]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

predictions = pd.DataFrame().reindex_like(frame_test['y'])

predictions = pd.concat([predictions], keys = ['Prediction'], names = ['Set'], axis = 1)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_test = pd.merge(predictions, frame_test, left_index = True, right_index = True)

frame_test['Prediction'] = model.predict(X_test)

frame_test['Prediction'] = y_scaler.inverse_transform(frame_test['Prediction'])

frame_test['y'] = y_scaler.inverse_transform(frame_test['y'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

display(Markdown('***')), display(Markdown(f'**“frame_test” {(frame_test.shape)}:** <p> *Holds all input and output features of the test set, and the predictions*'))
display(frame_test)


***

**“frame_test” (6960, 96):** <p> *Holds all input and output features of the test set, and the predictions*

Set,Prediction,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP,MCP
Lag,0,0,-72,-71,-70,-69,-68,-67,-66,-65,...,-9,-8,-7,-6,-5,-4,-3,-2,-1,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2018-03-15 00:00:00+00:00,26.629672,17.057930,-0.440985,-0.397710,-0.308635,-0.291328,0.035358,0.168636,0.368395,0.666832,...,1.067607,1.412910,1.667185,2.133983,1.980734,0.990060,0.873635,0.870390,-0.146633,-0.937267
2018-03-15 01:00:00+00:00,20.983410,10.942751,-0.397681,-0.308628,-0.291319,0.035349,0.168626,0.368386,0.666828,0.713092,...,1.412882,1.667145,2.133822,1.980621,0.990000,0.873597,0.870354,-0.146620,-0.937269,-1.343248
2018-03-15 02:00:00+00:00,20.354549,10.221772,-0.308601,-0.291312,0.035356,0.168617,0.368377,0.666822,0.713088,0.614221,...,1.667122,2.133789,1.980463,0.989913,0.873540,0.870317,-0.146651,-0.937263,-1.343250,-1.506460
2018-03-15 03:00:00+00:00,20.919065,10.788994,-0.291285,0.035362,0.168623,0.368370,0.666816,0.713083,0.614216,0.551431,...,2.133777,1.980428,0.989784,0.873456,0.870259,-0.146681,-0.937291,-1.343246,-1.506462,-1.864051
2018-03-15 04:00:00+00:00,22.765967,22.083204,0.035384,0.168629,0.368376,0.666811,0.713077,0.614209,0.551425,0.556420,...,1.980412,0.989732,0.873330,0.870175,-0.146719,-0.937314,-1.343273,-1.506460,-1.864053,-2.024803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,59.766915,68.979449,1.096502,0.976774,0.841074,0.706880,0.707923,0.677539,0.796326,0.396875,...,2.119834,2.277309,2.314243,2.224197,1.877396,1.624863,2.172777,2.560758,2.476246,1.762710
2018-12-31 20:00:00+00:00,53.954577,51.655845,0.976782,0.841077,0.706885,0.707919,0.677533,0.796322,0.396868,0.322473,...,2.277299,2.314214,2.224032,1.877285,1.624791,2.172730,2.560716,2.476281,1.762705,1.084435
2018-12-31 21:00:00+00:00,55.069311,50.428313,0.841087,0.706888,0.707923,0.677528,0.796317,0.396859,0.322466,0.344187,...,2.314205,2.224002,1.877131,1.624688,2.172647,2.560665,2.476238,1.762734,1.084431,0.876934
2018-12-31 22:00:00+00:00,56.733664,50.126203,0.706900,0.707927,0.677533,0.796313,0.396851,0.322456,0.344180,0.361120,...,2.223991,1.877094,1.624540,2.172529,2.560575,2.476188,1.762694,1.084454,0.876930,1.161530


# Results

## Plot prediction

In [19]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y']['ID3'][0],
                        name = 'Actual',
                        line_color = hex_maroon,

                        # mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction']['ID3'][0],
                        name = 'Predicted',
                        line_color = hex_gold,

                        # mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Forecast of test set',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'ID3 (€)',

                        font_family = 'SF Mono',

                        hovermode = 'x',
                        xaxis_showspikes = True,
                        yaxis_showspikes = True,
                        )

fig.show()


## Metrics

### SMAPE

In [20]:

def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

print()
for l in lags_y:
    print(f'SMAPE for hour {l}:', smape(frame_test['y']['ID3'][l], frame_test['Prediction']['ID3'][l]))
print()



SMAPE for hour 0: 14.714508442640104



### MAE

In [21]:

from sklearn.metrics import mean_absolute_error

print()
for l in lags_y:
    print(f'MAE for hour {l}:', mean_absolute_error(frame_test['y']['ID3'][l], frame_test['Prediction']['ID3'][l]))
print()



MAE for hour 0: 7.814000718925994



### MSE

In [22]:

from sklearn.metrics import mean_squared_error

print()
for l in lags_y:
    print(f'MSE for hour {l}:', mean_squared_error(frame_test['y']['ID3'][l], frame_test['Prediction']['ID3'][l], squared = True))
print()



MSE for hour 0: 129.93928645280636



### RMSE

In [23]:

from sklearn.metrics import mean_squared_error

print()
for l in lags_y:
    print(f'RMSE for hour {l}:', mean_squared_error(frame_test['y']['ID3'][l], frame_test['Prediction']['ID3'][l], squared = False))
print()



RMSE for hour 0: 11.39909147488546

