# Load necessary packages

In [1]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import numpy as np
import pandas as pd

import datetime

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

# import matplotlib.font_manager as font_manager
# mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

import plotly.express as px
import plotly.graph_objects as go

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

from termcolor import colored

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #


# Organise data

## Import features

In [2]:

! pip install 'git+git://github.com/HR/github-clone#egg=ghclone' &> /dev/null

! ghclone https://github.com/timovijn/ElectricityPriceForecasting/tree/master/LSTM


zsh:1: command not found: ghclone


In [3]:

features = pd.read_pickle(f'./data/ID/ID3.pkl')

display(features, features.shape)


Unnamed: 0_level_0,ID3,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00+00:00,24.2549,38.50
2015-01-01 01:00:00+00:00,28.6447,38.22
2015-01-01 02:00:00+00:00,28.3007,35.60
2015-01-01 03:00:00+00:00,28.0543,33.00
2015-01-01 04:00:00+00:00,28.1546,27.41
...,...,...
2018-12-31 19:00:00+00:00,68.9794,58.28
2018-12-31 20:00:00+00:00,51.6558,50.01
2018-12-31 21:00:00+00:00,50.4283,47.48
2018-12-31 22:00:00+00:00,50.1262,50.95


(34872, 2)

In [4]:

features = features.dropna()

display(features, features.shape)


Unnamed: 0_level_0,ID3,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00+00:00,24.2549,38.50
2015-01-01 01:00:00+00:00,28.6447,38.22
2015-01-01 02:00:00+00:00,28.3007,35.60
2015-01-01 03:00:00+00:00,28.0543,33.00
2015-01-01 04:00:00+00:00,28.1546,27.41
...,...,...
2018-12-31 19:00:00+00:00,68.9794,58.28
2018-12-31 20:00:00+00:00,51.6558,50.01
2018-12-31 21:00:00+00:00,50.4283,47.48
2018-12-31 22:00:00+00:00,50.1262,50.95


(34872, 2)

In [5]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

daterange = ['2017.12.12', '2017.12.20']

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig1 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['MCP'],
                        name = 'MCP',
                        line_color = hex_indigo,

                        mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig2 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['ID3'],
                        name = 'ID3',
                        line_color = hex_salmon,

                        mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Day-ahead (MCP) and intraday (ID3)',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'Price (€)',

                        font_family = 'SF Mono',

                        hovermode = 'x',
                        xaxis_showspikes = True,
                        yaxis_showspikes = True,
                        )

fig.show()


## Select features

In [6]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X = features[['ID3', 'MCP']]
y = features[['ID3']]

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

lagged = ['ID3', 'MCP']

lag_X = range(-72, -3, 1)
lag_y = range(0, 1, 1)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X2 = pd.DataFrame(index = X.index, columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Feature', 'Type', 'Lag']))
X2 = X2.rename_axis('Timestamp')

y2 = pd.DataFrame(index = y.index, columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Feature', 'Type', 'Lag']))
y2 = y2.rename_axis('Timestamp')

frame = pd.merge(y2, X2, left_index = True, right_index = True)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X3 = pd.DataFrame(index = X.index)
y3 = pd.DataFrame(index = y.index)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

for c in lagged:
    for l in lag_X:
        X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

frame['X'] = X3.values

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

for c in y.columns:
    for l in lag_y:
        y3[f'{c} ({l})'] = y.shift(-l)

frame['y'] = y3.values

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame = frame.dropna()

display(frame)


Feature,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Type,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-8,-7,-7,-6,-6,-5,-5,-4,-4
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,21.838,24.2549,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,...,45.36,45.6,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79
2015-01-04 01:00:00+00:00,20.2474,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,...,45.6,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44
2015-01-04 02:00:00+00:00,19.5168,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,...,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03
2015-01-04 03:00:00+00:00,20.0229,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,...,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91
2015-01-04 04:00:00+00:00,18.8012,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,26.0735,...,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,68.9794,56.2728,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,...,49.12,53.79,57.47,61.52,62.64,64.56,65.01,63.91,59.68,56.6
2018-12-31 20:00:00+00:00,51.6558,54.3653,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,...,53.79,57.47,61.52,62.64,64.56,65.01,63.91,59.68,56.6,63.28
2018-12-31 21:00:00+00:00,50.4283,52.2032,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,...,57.47,61.52,62.64,64.56,65.01,63.91,59.68,56.6,63.28,68.01
2018-12-31 22:00:00+00:00,50.1262,50.0651,50.0817,49.5975,51.4901,45.1253,43.9398,44.2858,44.5556,44.1,...,61.52,62.64,64.56,65.01,63.91,59.68,56.6,63.28,68.01,66.98


## Split train and test

In [7]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_train, X_test, y_train, y_test = train_test_split(
    frame['X'],
    frame['y'],
    test_size = 2/10,
    random_state = 0,
    shuffle = False
    )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size = 2/8,
    random_state = 0,
    shuffle = False
    )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_train.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_train.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_train = pd.merge(y_train, X_train, left_index = True, right_index = True)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_test.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_test.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_test = pd.merge(y_test, X_test, left_index = True, right_index = True)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_val.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_val.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_val = pd.merge(y_val, X_val, left_index = True, right_index = True)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

print()
print(f'Train input', frame_train['X'].shape, 'output', frame_train['y'].shape)
print()
print(f'Test input', frame_test['X'].shape, 'output', frame_test['y'].shape)
print()
print(f'Validation input', frame_val['X'].shape, 'output', frame_val['y'].shape)
print()

display(frame_train)



Train input (20880, 138) output (20880, 1)

Test input (6960, 138) output (6960, 1)

Validation input (6960, 138) output (6960, 1)



Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-8,-7,-7,-6,-6,-5,-5,-4,-4
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,21.838,24.2549,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,...,45.36,45.6,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79
2015-01-04 01:00:00+00:00,20.2474,28.6447,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,...,45.6,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44
2015-01-04 02:00:00+00:00,19.5168,28.3007,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,...,44.96,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03
2015-01-04 03:00:00+00:00,20.0229,28.0543,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,...,42.44,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91
2015-01-04 04:00:00+00:00,18.8012,28.1546,27.5,26.7808,25.8523,24.4,25.625,29.1,26.3571,26.0735,...,39.94,42.44,53.1,49.82,47.62,43.79,42.44,42.03,40.91,36.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-27 19:00:00+00:00,35.697,41.3179,40.8523,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,...,26.1,29.14,30.13,34.79,35.34,30,25.19,25.13,26.4,24.68
2017-05-27 20:00:00+00:00,30.1949,40.8523,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,...,29.14,30.13,34.79,35.34,30,25.19,25.13,26.4,24.68,25.1
2017-05-27 21:00:00+00:00,25.9212,35.3517,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,22.9759,...,30.13,34.79,35.34,30,25.19,25.13,26.4,24.68,25.1,27.31
2017-05-27 22:00:00+00:00,26.1836,30.8615,28.1405,31.7037,30.681,30.726,24.6634,24.0226,22.9759,23.3225,...,34.79,35.34,30,25.19,25.13,26.4,24.68,25.1,27.31,34.3


## Scaling

In [8]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_train_unscaled = frame_train
frame_test_unscaled = frame_test
frame_val_unscaled = frame_val

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

y_scaler = StandardScaler()
y_scaler.fit(frame_train['y'])

frame_train['y'] = y_scaler.transform(frame_train['y'])
frame_test['y'] = y_scaler.transform(frame_test['y'])
frame_val['y'] = y_scaler.transform(frame_val['y'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

X_scaler = StandardScaler()
X_scaler.fit(frame_train['X'])

frame_train['X'] = X_scaler.transform(frame_train['X'])
frame_test['X'] = X_scaler.transform(frame_test['X'])
frame_val['X'] = X_scaler.transform(frame_val['X'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

display(frame_train)

display(frame_test)

display(frame_val)


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-8,-7,-7,-6,-6,-5,-5,-4,-4
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-04 00:00:00+00:00,-1.062439,-0.912976,-0.637497,-0.659101,-0.674570,-0.668260,-0.709326,-0.754441,-0.812699,-0.903817,...,0.702387,0.722134,0.669718,0.463113,0.258138,0.463220,1.337506,1.068645,0.888302,0.574239
2015-01-04 01:00:00+00:00,-1.162249,-0.637465,-0.659092,-0.674559,-0.668275,-0.709346,-0.754463,-0.812717,-0.903844,-0.826937,...,0.722071,0.669644,0.463040,0.258078,0.463170,1.337460,1.068513,0.888218,0.574187,0.463518
2015-01-04 02:00:00+00:00,-1.208088,-0.659060,-0.674551,-0.668265,-0.709362,-0.754483,-0.812740,-0.903862,-0.826963,-0.608849,...,0.669581,0.462964,0.258003,0.463113,1.337430,1.068463,0.888091,0.574111,0.463468,0.429891
2015-01-04 03:00:00+00:00,-1.176330,-0.674518,-0.668256,-0.709351,-0.754499,-0.812760,-0.903887,-0.826981,-0.608872,-0.780988,...,0.462902,0.257925,0.463040,1.337383,1.068427,0.888038,0.573994,0.463395,0.429842,0.338033
2015-01-04 04:00:00+00:00,-1.252990,-0.668224,-0.709343,-0.754489,-0.812777,-0.903908,-0.827004,-0.608888,-0.781014,-0.798787,...,0.257863,0.462964,1.337320,1.068377,0.887998,0.573935,0.463280,0.429770,0.337986,-0.043341
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-27 19:00:00+00:00,-0.192823,0.157918,0.128674,-0.216560,-0.498388,-0.669150,-0.445494,-0.509666,-0.506822,-0.887287,...,-0.877230,-0.627845,-0.546564,-0.164295,-0.119123,-0.557000,-0.951390,-0.956237,-0.852043,-0.993085
2017-05-27 20:00:00+00:00,-0.538070,0.128695,-0.216553,-0.498379,-0.669165,-0.445511,-0.509686,-0.506837,-0.887314,-0.927499,...,-0.627903,-0.546650,-0.164375,-0.119187,-0.557072,-0.951474,-0.956311,-0.852081,-0.993107,-0.958638
2017-05-27 21:00:00+00:00,-0.806230,-0.216528,-0.498371,-0.669155,-0.445524,-0.509703,-0.506856,-0.887332,-0.927527,-0.993192,...,-0.546708,-0.164456,-0.119266,-0.557142,-0.951555,-0.956395,-0.852158,-0.993142,-0.958661,-0.777383
2017-05-27 22:00:00+00:00,-0.789766,-0.498341,-0.669147,-0.445515,-0.509717,-0.506874,-0.887356,-0.927546,-0.993221,-0.971441,...,-0.164516,-0.119348,-0.557226,-0.951630,-0.956476,-0.852241,-0.993215,-0.958697,-0.777410,-0.204092


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-8,-7,-7,-6,-6,-5,-5,-4,-4
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2018-03-15 00:00:00+00:00,-1.362377,-0.440985,-0.397710,-0.308635,-0.291328,0.035358,0.168636,0.368395,0.666832,0.713097,...,1.397057,1.048557,0.955950,1.068377,1.067607,1.412910,1.667185,2.133983,1.980734,0.990060
2018-03-15 01:00:00+00:00,-1.746088,-0.397681,-0.308628,-0.291319,0.035349,0.168626,0.368386,0.666828,0.713092,0.614227,...,1.048492,0.955879,1.068311,1.067557,1.412882,1.667145,2.133822,1.980621,0.990000,0.873597
2018-03-15 02:00:00+00:00,-1.791327,-0.308601,-0.291312,0.035356,0.168617,0.368377,0.666822,0.713088,0.614221,0.551438,...,0.955814,1.068241,1.067491,1.412836,1.667122,2.133789,1.980463,0.989913,0.873540,0.870317
2018-03-15 03:00:00+00:00,-1.755736,-0.291285,0.035362,0.168623,0.368370,0.666816,0.713083,0.614216,0.551431,0.556426,...,1.068176,1.067420,1.412774,1.667080,2.133777,1.980428,0.989784,0.873456,0.870259,-0.146681
2018-03-15 04:00:00+00:00,-1.047054,0.035384,0.168629,0.368376,0.666811,0.713077,0.614209,0.551425,0.556420,0.624767,...,1.067355,1.412707,1.667020,2.133740,1.980412,0.989732,0.873330,0.870175,-0.146719,-0.937314
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,1.895559,1.096502,0.976774,0.841074,0.706880,0.707923,0.677539,0.796326,0.396875,0.322483,...,1.010765,1.393843,1.695726,2.027942,2.119834,2.277309,2.314243,2.224197,1.877396,1.624863
2018-12-31 20:00:00+00:00,0.808549,0.976782,0.841077,0.706885,0.707919,0.677533,0.796322,0.396868,0.322473,0.344197,...,1.393777,1.695661,2.027886,2.119797,2.277299,2.314214,2.224032,1.877285,1.624791,2.172730
2018-12-31 21:00:00+00:00,0.731525,0.841087,0.706888,0.707923,0.677528,0.796317,0.396859,0.322466,0.344187,0.361129,...,1.695593,2.027825,2.119743,2.277264,2.314205,2.224002,1.877131,1.624688,2.172647,2.560665
2018-12-31 22:00:00+00:00,0.712568,0.706900,0.707927,0.677533,0.796313,0.396851,0.322456,0.344180,0.361120,0.332538,...,2.027756,2.119682,2.277212,2.314171,2.223991,1.877094,1.624540,2.172529,2.560575,2.476188


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-8,-7,-7,-6,-6,-5,-5,-4,-4
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-05-28 00:00:00+00:00,-0.882112,-0.445478,-0.509700,-0.506879,-0.887395,-0.927592,-0.993265,-0.971489,-1.117350,-1.172481,...,-0.557370,-0.951807,-0.956639,-0.852393,-0.993382,-0.958855,-0.777529,-0.204185,0.027972,-0.067125
2017-05-28 01:00:00+00:00,-1.172213,-0.509670,-0.506871,-0.887384,-0.927609,-0.993287,-0.971514,-1.117370,-1.172512,-1.168701,...,-0.951864,-0.956728,-0.852480,-0.993457,-0.958936,-0.777610,-0.204280,0.027910,-0.067165,-0.200811
2017-05-28 02:00:00+00:00,-1.200162,-0.506841,-0.887374,-0.927598,-0.993306,-0.971536,-1.117397,-1.172533,-1.168732,-1.045919,...,-0.956784,-0.852568,-0.993546,-0.959011,-0.777688,-0.204352,0.027808,-0.067224,-0.200849,0.725969
2017-05-28 03:00:00+00:00,-1.453726,-0.887339,-0.927589,-0.993294,-0.971554,-1.117420,-1.172560,-1.168753,-1.045948,-0.931442,...,-0.852625,-0.993635,-0.959100,-0.777760,-0.204416,0.027740,-0.067324,-0.200904,0.725914,0.271601
2017-05-28 04:00:00+00:00,-1.555668,-0.927552,-0.993284,-0.971542,-1.117439,-1.172584,-1.168780,-1.045968,-0.931470,-0.951930,...,-0.993691,-0.959189,-0.777847,-0.204482,0.027681,-0.067393,-0.201000,0.725834,0.271554,0.591463
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-03-14 19:00:00+00:00,-0.021753,0.421136,0.314905,-0.197131,-0.309276,-0.400782,-0.441001,-0.397678,-0.308585,-0.291251,...,1.060794,2.616698,3.543523,2.805435,1.737653,1.397328,1.048831,0.956288,1.068734,1.067975
2018-03-14 20:00:00+00:00,-0.176188,0.314923,-0.197124,-0.309267,-0.400795,-0.441017,-0.397696,-0.308599,-0.291270,0.035408,...,2.616626,3.543476,2.805388,1.737612,1.397299,1.048780,0.956160,1.068645,1.067914,1.413262
2018-03-14 21:00:00+00:00,-0.710770,-0.197099,-0.309260,-0.400786,-0.441031,-0.397713,-0.308616,-0.291283,0.035394,0.168669,...,3.543400,2.805334,1.737553,1.397253,1.048744,0.956108,1.068513,1.067825,1.413194,1.667512
2018-03-14 22:00:00+00:00,-0.915941,-0.309233,-0.400779,-0.441022,-0.397726,-0.308631,-0.291300,0.035384,0.168657,0.368412,...,2.805261,1.737489,1.397191,1.048693,0.956069,1.068463,1.067693,1.413096,1.667439,2.134182


## Convert to two dimensional

In [9]:

def two_dim_tensor(frame_train, frame_test, frame_val):

    X_train = frame_train['X']
    y_train = frame_train['y']

    X_test = frame_test['X']
    y_test = frame_test['y']

    X_val = frame_val['X']
    X_val = frame_val['y']

    return X_train, y_train, X_test, y_test, X_val, y_val


## Convert to three dimensional

In [10]:

def three_dim_tensor(frame_train, frame_test, frame_val):

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_train = list()

    for index, row in frame_train['X'].iterrows():
        X_train.extend(row.tolist())

    X_train = np.array(X_train)

    X_train = X_train.reshape((len(frame_train), len(lag_X), len(X.columns)))

    y_train = frame_train['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_test = list()

    for index, row in frame_test['X'].iterrows():
        X_test.extend(row.tolist())

    X_test = np.array(X_test)

    X_test = X_test.reshape((len(frame_test), len(lag_X), len(X.columns)))

    y_test = frame_test['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    X_val = list()

    for index, row in frame_val['X'].iterrows():
        X_val.extend(row.tolist())

    X_val = np.array(X_val)

    X_val = X_val.reshape((len(frame_val), len(lag_X), len(X.columns)))

    y_val = frame_val['y']

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    return X_train, y_train, X_test, y_test, X_val, y_val
    

In [11]:

def plotModelResults(model, X_train=X_train, X_test=X_test, plot_intervals=False, plot_anomalies=False):
    """
        Plots modelled vs fact values, prediction intervals and anomalies
    
    """

    def mean_absolute_percentage_error(y_true, y_pred): 
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    prediction = model.predict(X_test)
    
    plt.figure(figsize=(15, 7))
    plt.plot(prediction, "g", label="prediction", linewidth=2.0)
    plt.plot(frame_test_unscaled['y'].values, label="actual", linewidth=2.0)
    
    if plot_intervals:
        cv = cross_val_score(model, X_train, y_train, 
                                    cv=tscv, 
                                    scoring="neg_mean_absolute_error")
        mae = cv.mean() * (-1)
        deviation = cv.std()
        
        scale = 1.96
        lower = prediction - (mae + scale * deviation)
        upper = prediction + (mae + scale * deviation)
        
        plt.plot(lower, "r--", label="upper bond / lower bond", alpha=0.5)
        plt.plot(upper, "r--", alpha=0.5)
        
        if plot_anomalies:
            anomalies = np.array([np.NaN]*len(y_test))
            anomalies[y_test<lower] = y_test[y_test<lower]
            anomalies[y_test>upper] = y_test[y_test>upper]
            plt.plot(anomalies, "o", markersize=10, label = "Anomalies")
    
    error = mean_absolute_percentage_error(prediction, y_test.values)
    # plt.title("Mean absolute percentage error {0:.2f}%".format(error))
    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid(True);
    
def plotCoefficients(model):
    """
        Plots sorted coefficient values of the model
    """
    
    coefs = pd.DataFrame(np.transpose(model.coef_), X_train.columns)
    coefs.columns = ["coef"]
    coefs["abs"] = coefs.coef.apply(np.abs)
    coefs = coefs.sort_values(by="abs", ascending=False).drop(["abs"], axis=1)
    
    plt.figure(figsize=(15, 7))
    coefs.coef.plot(kind='bar')
    plt.grid(True, axis='y')
    plt.hlines(y=0, xmin=0, xmax=len(coefs), linestyles='dashed');
    

# Different models

## Linear regression

In [12]:

def model_LINREG():

    model = LinearRegression()

    # plotModelResults(   lr,
    #                     plot_intervals = True,
    #                     plot_anomalies = False
    #                     )

    # plotCoefficients(lr)

    return model


## LASSO regression

In [13]:

def model_LASSO():

    # for time-series cross-validation set 5 folds 
    tscv = TimeSeriesSplit(n_splits = 5)

    model = LassoCV(cv = tscv)
    
    # plotModelResults(   lasso, 
    #                     X_train = X_train, 
    #                     X_test = X_test, 
    #                     plot_intervals = True, plot_anomalies = False
    #                     )

    # plotCoefficients(lasso)

    return model


# Learning

## Create model

In [14]:

LATENT_DIM = 5
BATCH_SIZE = 50
EPOCHS = 100

HORIZON = len(lag_y)


In [15]:

def model_GRU():

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    model = Sequential()

    model.add(GRU(LATENT_DIM, input_shape = (len(lag_X), len(X.columns))))

    model.add(RepeatVector(HORIZON))

    model.add(GRU(LATENT_DIM, return_sequences = True))

    model.add(TimeDistributed(Dense(1)))

    model.add(Flatten())

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #
    
    return model


In [16]:

def model_MLP():

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    model = Sequential()

    model.add(Dense(200, activation = 'relu', kernel_initializer = 'he_normal', input_shape = (frame_train['X'].shape[1],)))

    model.add(Dense(100, activation = 'relu', kernel_initializer = 'he_normal'))

    model.add(Dense(HORIZON))

    # ———————————————————————————————————————————————————————————————————————————————————————————————————— #

    return model


In [17]:

def learn(model):

    from keras.callbacks import EarlyStopping

    earlystop = EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, verbose = 1)

    history = model.fit(    X_train,
                            np.array(frame_train['y']),
                            batch_size = BATCH_SIZE,
                            epochs = EPOCHS,
                            validation_data = (X_val, frame_val['y']),
                            callbacks = [earlystop],
                            verbose = 1
                            )

    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc = 'upper left')
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc = 'upper left')
    plt.show()

    return model, history


In [18]:

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import TimeSeriesSplit

from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

model_sel = input(r'Enter model: ')

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'GRU':

    X_train, y_train, X_test, y_test, X_val, y_val = three_dim_tensor(frame_train, frame_test, frame_val)

    model = model_GRU()

    model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

    model.summary()

    model, history = learn(model)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'MLP':

    X_train, y_train, X_test, y_test, X_val, y_val = three_dim_tensor(frame_train, frame_test, frame_val)

    model = model_MLP()

    model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

    model.summary()

    model, history = learn(model)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'LINREG':

    X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

    model = model_LINREG()

    model.fit(X_train, y_train)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

if model_sel == 'LASSO':

    X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

    model = model_LASSO()

    model.fit(X_train, y_train)

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #


In [19]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

predictions = pd.DataFrame(index = frame_test.index, columns = pd.MultiIndex.from_product([['Prediction'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag']))

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

frame_test = pd.merge(frame_test, predictions, left_index = True, right_index = True)

frame_test['Prediction'] = model.predict(X_test)

frame_test['Prediction'] = y_scaler.inverse_transform(frame_test['Prediction'])

frame_test['y'] = y_scaler.inverse_transform(frame_test['y'])

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

display(frame_test)


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,Prediction
Feature,0,-72,-72,-71,-71,-70,-70,-69,-69,-68,...,-8,-7,-7,-6,-6,-5,-5,-4,-4,ID3
Lag,ID3,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,...,MCP,ID3,MCP,ID3,MCP,ID3,MCP,ID3,MCP,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2018-03-15 00:00:00+00:00,17.057930,-0.440985,-0.397710,-0.308635,-0.291328,0.035358,0.168636,0.368395,0.666832,0.713097,...,1.048557,0.955950,1.068377,1.067607,1.412910,1.667185,2.133983,1.980734,0.990060,26.750848
2018-03-15 01:00:00+00:00,10.942751,-0.397681,-0.308628,-0.291319,0.035349,0.168626,0.368386,0.666828,0.713092,0.614227,...,0.955879,1.068311,1.067557,1.412882,1.667145,2.133822,1.980621,0.990000,0.873597,26.358180
2018-03-15 02:00:00+00:00,10.221772,-0.308601,-0.291312,0.035356,0.168617,0.368377,0.666822,0.713088,0.614221,0.551438,...,1.068241,1.067491,1.412836,1.667122,2.133789,1.980463,0.989913,0.873540,0.870317,25.637505
2018-03-15 03:00:00+00:00,10.788994,-0.291285,0.035362,0.168623,0.368370,0.666816,0.713083,0.614216,0.551431,0.556426,...,1.067420,1.412774,1.667080,2.133777,1.980428,0.989784,0.873456,0.870259,-0.146681,27.969164
2018-03-15 04:00:00+00:00,22.083204,0.035384,0.168629,0.368376,0.666811,0.713077,0.614209,0.551425,0.556420,0.624767,...,1.412707,1.667020,2.133740,1.980412,0.989732,0.873330,0.870175,-0.146719,-0.937314,29.844357
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 19:00:00+00:00,68.979449,1.096502,0.976774,0.841074,0.706880,0.707923,0.677539,0.796326,0.396875,0.322483,...,1.393843,1.695726,2.027942,2.119834,2.277309,2.314243,2.224197,1.877396,1.624863,63.011821
2018-12-31 20:00:00+00:00,51.655845,0.976782,0.841077,0.706885,0.707919,0.677533,0.796322,0.396868,0.322473,0.344197,...,1.695661,2.027886,2.119797,2.277299,2.314214,2.224032,1.877285,1.624791,2.172730,59.695068
2018-12-31 21:00:00+00:00,50.428313,0.841087,0.706888,0.707923,0.677528,0.796317,0.396859,0.322466,0.344187,0.361129,...,2.027825,2.119743,2.277264,2.314205,2.224002,1.877131,1.624688,2.172647,2.560665,59.962164
2018-12-31 22:00:00+00:00,50.126203,0.706900,0.707927,0.677533,0.796313,0.396851,0.322456,0.344180,0.361120,0.332538,...,2.119682,2.277212,2.314171,2.223991,1.877094,1.624540,2.172529,2.560575,2.476188,59.204608


# Results

## Plot prediction

In [20]:

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y'][0]['ID3'],
                        name = 'Actual',
                        line_color = hex_maroon,

                        # mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction']['ID3'][0],
                        name = 'Predicted',
                        line_color = hex_gold,

                        # mode = 'lines+markers',
                        )

# ———————————————————————————————————————————————————————————————————————————————————————————————————— #

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Forecast of test set',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'ID3 (€)',

                        font_family = 'SF Mono',

                        hovermode = 'x',
                        xaxis_showspikes = True,
                        yaxis_showspikes = True,
                        )

fig.show()


## Metrics

### SMAPE

In [21]:

def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

print()
for l in lag_y:
    print(f'SMAPE for hour {l}:', smape(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l]))
print()



SMAPE for hour 0: 15.535558705945805



### MAE

In [22]:

from sklearn.metrics import mean_absolute_error

print()
for l in lag_y:
    print(f'MAE for hour {l}:', mean_absolute_error(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l]))
print()



MAE for hour 0: 8.21496056594141



### MSE

In [23]:

from sklearn.metrics import mean_squared_error

print()
for l in lag_y:
    print(f'MSE for hour {l}:', mean_squared_error(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l], squared = True))
print()



MSE for hour 0: 141.78878448844654



### RMSE

In [24]:

from sklearn.metrics import mean_squared_error

print()
for l in lag_y:
    print(f'RMSE for hour {l}:', mean_squared_error(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l], squared = False))
print()



RMSE for hour 0: 11.907509583806622

