# Load necessary packages

In [1]:
import pandas as pd
import numpy as np

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

import plotly.express as px
import plotly.graph_objects as go

import matplotlib.font_manager as font_manager
mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

from termcolor import colored

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

import tensorflow as tf

import datetime

# Organise data

## Import features

In [2]:
! pip install 'git+git://github.com/HR/github-clone#egg=ghclone' &> /dev/null

! ghclone https://github.com/timovijn/ElectricityPriceForecasting/tree/master/LSTM

zsh:1: command not found: ghclone


In [3]:
features = pd.merge(pd.read_pickle(f'./data/ID/ID3.pkl'), pd.read_pickle(f'./data/DA/DA.pkl'), left_index = True, right_index = True)

display(features, features.shape)

Unnamed: 0_level_0,ID3,Volume,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-31 23:00:00+00:00,48.8259,51.0,42.00
2017-01-01 00:00:00+00:00,46.11,218.5,49.94
2017-01-01 01:00:00+00:00,46.8041,204.3,51.95
2017-01-01 02:00:00+00:00,47.9673,269.8,40.99
2017-01-01 03:00:00+00:00,48.6605,280.4,39.00
...,...,...,...
2017-12-31 18:00:00+00:00,71.1385,5.0,34.70
2017-12-31 19:00:00+00:00,80.7264,60.0,33.70
2017-12-31 20:00:00+00:00,67.2939,70.0,28.10
2017-12-31 21:00:00+00:00,24.2249,10.0,32.00


(8712, 3)

In [48]:
daterange = ['2017.12.12', '2017.12.20']

fig1 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['MCP'],
                        name = 'MCP',
                        line_color = hex_indigo,
                        # title = 'Log of Appliance Energy Consumption in Wh vs Time',
                    )

fig2 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['ID3'],
                        name = 'ID3',
                        line_color = hex_salmon,
                        # title = 'Log of Appliance Energy Consumption in Wh vs Time',
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Day-ahead (MCP) and intraday (ID3)',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'Price (€)'
                    )

fig.show()

## Select features

In [5]:
X = features[['ID3']]
y = features[['ID3']]

lagged = ['ID3']

lag_X = range(-72, -3, 1)
lag_y = range(0, 1, 1)

X2 = pd.DataFrame(index = X.index, columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Feature', 'Type', 'Lag']))
X2 = X2.rename_axis('Timestamp')

y2 = pd.DataFrame(index = y.index, columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Feature', 'Type', 'Lag']))
y2 = y2.rename_axis('Timestamp')

frame = pd.merge(y2, X2, left_index = True, right_index = True)

X3 = pd.DataFrame(index = X.index)
y3 = pd.DataFrame(index = y.index)

# for c in X.columns:
#     for l in lag_X:
#         X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

for c in lagged:
    for l in lag_X:
        X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

frame['X'] = X3.values

for c in y.columns:
    for l in lag_y:
        y3[f'{c} ({l})'] = y.shift(-l)

frame['y'] = y3.values

frame = frame.dropna()

display(frame)

Feature,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Type,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-01-06 06:00:00+00:00,77.1078,35,47.5295,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,...,88.5123,75.4617,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301
2017-01-06 07:00:00+00:00,76.0859,47.5295,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,...,75.4617,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832
2017-01-06 08:00:00+00:00,68.5947,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,...,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35
2017-01-06 09:00:00+00:00,67.7811,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,33.1769,...,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35,35
2017-01-06 10:00:00+00:00,65.2342,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,33.1769,32.7695,...,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35,35,77.1078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-31 18:00:00+00:00,71.1385,48.4236,39.5776,38.2564,33.2816,33.0097,33.0153,35.5607,34.9896,34.9917,...,30.5337,32.7857,28.7826,27.2456,28.4701,31.194,34.8165,38.2763,41.7275,51.4587
2017-12-31 19:00:00+00:00,80.7264,39.5776,38.2564,33.2816,33.0097,33.0153,35.5607,34.9896,34.9917,40.8571,...,32.7857,28.7826,27.2456,28.4701,31.194,34.8165,38.2763,41.7275,51.4587,49.8577
2017-12-31 20:00:00+00:00,67.2939,38.2564,33.2816,33.0097,33.0153,35.5607,34.9896,34.9917,40.8571,45.6066,...,28.7826,27.2456,28.4701,31.194,34.8165,38.2763,41.7275,51.4587,49.8577,57.5725
2017-12-31 21:00:00+00:00,24.2249,33.2816,33.0097,33.0153,35.5607,34.9896,34.9917,40.8571,45.6066,48.0521,...,27.2456,28.4701,31.194,34.8165,38.2763,41.7275,51.4587,49.8577,57.5725,53.0437


## Split train and test

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    frame['X'],
    frame['y'],
    test_size = 0.3,
    random_state = 0,
    shuffle = False
    )

X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size = 0.5,
    random_state = 0,
    shuffle = False
    )

X_train.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_train.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_train = pd.merge(y_train, X_train, left_index = True, right_index = True)

X_test.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_test.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_test = pd.merge(y_test, X_test, left_index = True, right_index = True)

X_val.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_val.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_val = pd.merge(y_val, X_val, left_index = True, right_index = True)

print()
print(f'Train input', frame_train['X'].shape, 'output', frame_train['y'].shape)
print()
print(f'Test input', frame_test['X'].shape, 'output', frame_test['y'].shape)
print()
print(f'Validation input', frame_val['X'].shape, 'output', frame_val['y'].shape)
print()

display(frame_train)


Train input (2551, 69) output (2551, 1)

Test input (2188, 69) output (2188, 1)

Validation input (2552, 69) output (2552, 1)



Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-01-06 06:00:00+00:00,77.1078,35,47.5295,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,...,88.5123,75.4617,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301
2017-01-06 07:00:00+00:00,76.0859,47.5295,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,...,75.4617,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832
2017-01-06 08:00:00+00:00,68.5947,47.2877,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,...,67.2939,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35
2017-01-06 09:00:00+00:00,67.7811,44.0313,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,33.1769,...,61.9335,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35,35
2017-01-06 10:00:00+00:00,65.2342,40.0338,37.0793,35.2597,35.2113,37.3807,38.3373,40.1271,33.1769,32.7695,...,64.6584,64.463,64.463,35,36.2262,35.7301,35.8832,35,35,77.1078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-29 14:00:00+00:00,63.9458,35.1413,37.3205,36.7381,37.5947,38.8846,39.1165,34.1211,27.947,28.564,...,30.3077,32.0393,34.0221,42.5405,51.6231,55.872,54.8865,55.4461,55.2771,57.044
2017-05-29 15:00:00+00:00,63.9414,37.3205,36.7381,37.5947,38.8846,39.1165,34.1211,27.947,28.564,30.2886,...,32.0393,34.0221,42.5405,51.6231,55.872,54.8865,55.4461,55.2771,57.044,55.4829
2017-05-29 16:00:00+00:00,70.7921,36.7381,37.5947,38.8846,39.1165,34.1211,27.947,28.564,30.2886,29.7792,...,34.0221,42.5405,51.6231,55.872,54.8865,55.4461,55.2771,57.044,55.4829,58.5075
2017-05-29 17:00:00+00:00,73.3632,37.5947,38.8846,39.1165,34.1211,27.947,28.564,30.2886,29.7792,29.9737,...,42.5405,51.6231,55.872,54.8865,55.4461,55.2771,57.044,55.4829,58.5075,62.9276


## Scaling

In [7]:
frame_train_unscaled = frame_train
frame_test_unscaled = frame_test
frame_val_unscaled = frame_val

y_scaler = StandardScaler()
y_scaler.fit(frame_train['y'])

frame_train['y'] = y_scaler.transform(frame_train['y'])
frame_test['y'] = y_scaler.transform(frame_test['y'])
frame_val['y'] = y_scaler.transform(frame_val['y'])

X_scaler = StandardScaler()
X_scaler.fit(frame_train['X'])

frame_train['X'] = X_scaler.transform(frame_train['X'])
frame_test['X'] = X_scaler.transform(frame_test['X'])
frame_val['X'] = X_scaler.transform(frame_val['X'])

display(frame_train)

display(frame_test)

display(frame_val)

Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-01-06 06:00:00+00:00,2.909464,-0.201647,0.788699,0.765305,0.504526,0.187914,-0.042731,-0.184179,-0.187941,-0.019554,...,3.719022,2.780474,2.174813,1.749933,1.948613,1.938044,1.948483,-0.232371,-0.143027,-0.180451
2017-01-06 07:00:00+00:00,2.832989,0.793563,0.769568,0.508707,0.191256,-0.042034,-0.184044,-0.187932,-0.018826,0.054920,...,2.755557,2.172472,1.775434,1.950405,1.934249,1.938044,-0.231638,-0.141684,-0.179645,-0.169195
2017-01-06 08:00:00+00:00,2.272429,0.774350,0.512016,0.193706,-0.040275,-0.183654,-0.187802,-0.019681,0.055738,0.194277,...,2.152558,1.773450,1.978455,1.936032,1.934249,-0.231766,-0.140904,-0.178378,-0.168341,-0.234107
2017-01-06 09:00:00+00:00,2.211550,0.515700,0.195845,-0.039104,-0.182869,-0.187420,-0.019317,0.054501,0.195262,-0.346862,...,1.756821,1.976290,1.963900,1.936032,-0.231941,-0.141462,-0.177617,-0.167051,-0.233526,-0.234107
2017-01-06 10:00:00+00:00,2.020964,0.198180,-0.037830,-0.182487,-0.186661,-0.018570,0.054967,0.193312,-0.346528,-0.378576,...,1.957991,1.961747,1.963900,-0.231547,-0.141787,-0.178001,-0.166284,-0.232371,-0.233526,2.860606
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-29 14:00:00+00:00,1.924557,-0.190425,-0.018752,-0.065989,0.000116,0.098475,0.115486,-0.272481,-0.754206,-0.706012,...,-0.577974,-0.451820,-0.304102,0.323205,0.990228,1.305356,1.239870,1.279764,1.263007,1.386014
2017-05-29 15:00:00+00:00,1.924227,-0.017333,-0.064815,0.001509,0.101202,0.116526,-0.272468,-0.751301,-0.706109,-0.571740,...,-0.450136,-0.304225,0.330563,0.991405,1.302616,1.232783,1.281273,1.267270,1.393406,1.271279
2017-05-29 16:00:00+00:00,2.436859,-0.063593,0.002934,0.103154,0.119377,-0.272270,-0.751953,-0.703450,-0.571676,-0.611402,...,-0.303755,0.329872,1.007260,1.303994,1.230164,1.273989,1.268772,1.397940,1.278189,1.493572
2017-05-29 17:00:00+00:00,2.629257,0.004446,0.104956,0.121429,-0.272095,-0.752795,-0.704036,-0.569705,-0.611385,-0.596254,...,0.325123,1.005964,1.323824,1.231496,1.271303,1.261548,1.399509,1.282484,1.501417,1.818431


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-09-19 05:00:00+00:00,0.970977,0.115289,0.299605,0.322441,0.356042,0.251086,0.136042,-0.009488,-0.168638,-0.182968,...,0.712947,0.678103,0.698710,0.527552,-0.225534,-0.257673,-0.094878,0.196333,0.196443,-0.025019
2017-09-19 06:00:00+00:00,0.962887,0.302382,0.325059,0.359402,0.254863,0.137127,-0.009110,-0.168727,-0.182436,0.261859,...,0.670488,0.697690,0.537508,-0.225136,-0.257804,-0.095653,0.197286,0.198491,-0.023559,0.208231
2017-09-19 07:00:00+00:00,0.745200,0.327944,0.362157,0.257664,0.140120,-0.008341,-0.168570,-0.182455,0.262926,0.505518,...,0.689914,0.536632,-0.224752,-0.257427,-0.096055,0.195129,0.199446,-0.021968,0.210672,0.260585
2017-09-19 08:00:00+00:00,0.647524,0.365201,0.260041,0.142287,-0.006349,-0.168147,-0.182317,0.260630,0.506878,0.603087,...,0.530182,-0.224946,-0.257454,-0.095574,0.194242,0.197278,-0.021127,0.212749,0.263246,0.764806
2017-09-19 09:00:00+00:00,0.439552,0.262650,0.144235,-0.004992,-0.167256,-0.181924,0.261383,0.503334,0.604564,0.657359,...,-0.225129,-0.257618,-0.093543,0.194909,0.196388,-0.022251,0.213711,0.265433,0.769586,0.956689
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-31 18:00:00+00:00,2.462779,0.864574,0.159770,0.053655,-0.337884,-0.358767,-0.358343,-0.160832,-0.205220,-0.205560,...,-0.561291,-0.396260,-0.694469,-0.802036,-0.712038,-0.512063,-0.245219,0.009934,0.262986,0.975524
2017-12-31 19:00:00+00:00,3.180237,0.161950,0.055274,-0.338359,-0.359188,-0.358331,-0.160664,-0.205122,-0.205055,0.251114,...,-0.395033,-0.694243,-0.808988,-0.711952,-0.511769,-0.245284,0.010792,0.265172,0.981191,0.857859
2017-12-31 20:00:00+00:00,2.175096,0.057009,-0.338197,-0.359780,-0.358749,-0.160224,-0.205016,-0.204958,0.252167,0.620903,...,-0.690564,-0.808659,-0.717759,-0.511555,-0.245435,0.009517,0.266161,0.984869,0.863031,1.424856
2017-12-31 21:00:00+00:00,-1.047737,-0.338141,-0.359697,-0.359339,-0.159278,-0.204671,-0.204852,0.249926,0.622401,0.811303,...,-0.804038,-0.717512,-0.514813,-0.245051,0.008940,0.263678,0.986226,0.866464,1.432412,1.092012


Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Feature,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-05-29 19:00:00+00:00,2.183323,0.125325,-0.271799,-0.758713,-0.707577,-0.570552,-0.609666,-0.594122,-0.849356,-0.861232,...,1.309329,1.248890,1.292092,1.260231,1.388784,1.276697,1.507799,1.833078,1.902788,1.892939
2017-05-29 20:00:00+00:00,0.895189,-0.271459,-0.760113,-0.710094,-0.572431,-0.610199,-0.594556,-0.845965,-0.861516,-0.754109,...,1.236577,1.290540,1.279505,1.390217,1.274006,1.499445,1.834870,1.908380,1.902463,2.396429
2017-05-29 21:00:00+00:00,0.414891,-0.761857,-0.711314,-0.574202,-0.612351,-0.595057,-0.846747,-0.858063,-0.754264,-0.907130,...,1.277885,1.277965,1.411143,1.275365,1.496382,1.824968,1.910211,1.908055,2.408070,2.585396
2017-05-29 22:00:00+00:00,0.365220,-0.712849,-0.574916,-0.614342,-0.597104,-0.847796,-0.858862,-0.751359,-0.907470,-0.920065,...,1.265413,1.409485,1.294831,1.497884,1.821362,1.899953,1.909885,2.414711,2.597832,2.389167
2017-05-29 23:00:00+00:00,-0.237009,-0.575870,-0.615206,-0.599011,-0.851583,-0.859937,-0.752011,-0.903781,-0.920420,-1.030681,...,1.395852,1.293277,1.520179,1.823073,1.896222,1.899629,2.416801,2.604867,2.400778,2.147415
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-09-19 00:00:00+00:00,0.194503,-0.391340,-0.546453,-0.591022,-0.505192,-0.171984,0.105674,0.287813,0.315336,0.350726,...,0.054325,0.059706,0.296008,0.701644,0.675063,0.710433,0.672482,0.690746,0.527993,-0.227702
2017-09-19 01:00:00+00:00,-0.028556,-0.547286,-0.591799,-0.506591,-0.171119,0.106692,0.288603,0.312772,0.351899,0.250202,...,0.057181,0.295348,0.713814,0.676038,0.708686,0.668078,0.691952,0.530729,-0.227095,-0.259961
2017-09-19 02:00:00+00:00,0.208930,-0.592825,-0.507054,-0.170672,0.109476,0.290019,0.313596,0.349148,0.251255,0.136201,...,0.290883,0.712780,0.687882,0.709683,0.666402,0.687456,0.531854,-0.225927,-0.259488,-0.098272
2017-09-19 03:00:00+00:00,0.262234,-0.507719,-0.169887,0.111473,0.294064,0.315066,0.350024,0.249018,0.137116,-0.009321,...,0.704880,0.686872,0.721955,0.667371,0.685748,0.528115,-0.225190,-0.258388,-0.097120,0.191917


## Convert to two dimensional

In [8]:
def two_dim_tensor(frame_train, frame_test, frame_val):

    X_train = frame_train['X']
    y_train = frame_train['y']

    X_test = frame_test['X']
    y_test = frame_test['y']

    X_val = frame_val['X']
    X_val = frame_val['y']

    return X_train, y_train, X_test, y_test, X_val, y_val

## Convert to three dimensional

In [9]:
def three_dim_tensor(frame_train, frame_test, frame_val):

    X_train = list()

    for index, row in frame_train['X'].iterrows():
        X_train.extend(row.tolist())

    X_train = np.array(X_train)

    X_train = X_train.reshape((len(frame_train), len(lag_X), len(X.columns)))

    X_test = list()

    for index, row in frame_test['X'].iterrows():
        X_test.extend(row.tolist())

    X_test = np.array(X_test)

    X_test = X_test.reshape((len(frame_test), len(lag_X), len(X.columns)))

    X_val = list()

    for index, row in frame_val['X'].iterrows():
        X_val.extend(row.tolist())

    X_val = np.array(X_val)

    X_val = X_val.reshape((len(frame_val), len(lag_X), len(X.columns)))

    return X_train, X_test, X_val

# Linear regression

In [10]:
def plotModelResults(model, X_train=X_train, X_test=X_test, plot_intervals=False, plot_anomalies=False):
    """
        Plots modelled vs fact values, prediction intervals and anomalies
    
    """

    def mean_absolute_percentage_error(y_true, y_pred): 
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    prediction = model.predict(X_test)
    
    plt.figure(figsize=(15, 7))
    plt.plot(prediction, "g", label="prediction", linewidth=2.0)
    plt.plot(frame_test_unscaled['y'].values, label="actual", linewidth=2.0)
    
    if plot_intervals:
        cv = cross_val_score(model, X_train, y_train, 
                                    cv=tscv, 
                                    scoring="neg_mean_absolute_error")
        mae = cv.mean() * (-1)
        deviation = cv.std()
        
        scale = 1.96
        lower = prediction - (mae + scale * deviation)
        upper = prediction + (mae + scale * deviation)
        
        plt.plot(lower, "r--", label="upper bond / lower bond", alpha=0.5)
        plt.plot(upper, "r--", alpha=0.5)
        
        if plot_anomalies:
            anomalies = np.array([np.NaN]*len(y_test))
            anomalies[y_test<lower] = y_test[y_test<lower]
            anomalies[y_test>upper] = y_test[y_test>upper]
            plt.plot(anomalies, "o", markersize=10, label = "Anomalies")
    
    error = mean_absolute_percentage_error(prediction, y_test.values)
    # plt.title("Mean absolute percentage error {0:.2f}%".format(error))
    plt.legend(loc="best")
    plt.tight_layout()
    plt.grid(True);
    
def plotCoefficients(model):
    """
        Plots sorted coefficient values of the model
    """
    
    coefs = pd.DataFrame(np.transpose(model.coef_), X_train.columns)
    coefs.columns = ["coef"]
    coefs["abs"] = coefs.coef.apply(np.abs)
    coefs = coefs.sort_values(by="abs", ascending=False).drop(["abs"], axis=1)
    
    plt.figure(figsize=(15, 7))
    coefs.coef.plot(kind='bar')
    plt.grid(True, axis='y')
    plt.hlines(y=0, xmin=0, xmax=len(coefs), linestyles='dashed');

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import TimeSeriesSplit

# for time-series cross-validation set 5 folds 
tscv = TimeSeriesSplit(n_splits = 5)

X_train, y_train, X_test, y_test, X_val, y_val = two_dim_tensor(frame_train, frame_test, frame_val)

In [12]:
model = LinearRegression()
model.fit(X_train, y_train)

# plotModelResults(   lr,
#                     plot_intervals = True,
#                     plot_anomalies = False
#                     )

# plotCoefficients(lr)

LinearRegression()

## LASSO regression

In [None]:
lasso = LassoCV(cv = tscv)
lasso.fit(X_train, y_train)

plotModelResults(   lasso, 
                    X_train = X_train, 
                    X_test = X_test, 
                    plot_intervals = True, plot_anomalies = False
                    )

plotCoefficients(lasso)

# Learning

## Create model

In [None]:
LATENT_DIM = 5
BATCH_SIZE = 50
EPOCHS = 100

HORIZON = len(lag_y)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten

In [None]:
def model_GRU():

    ######################################################################################################

    model = Sequential()

    model.add(GRU(LATENT_DIM, input_shape = (len(lag_X), len(X.columns))))

    model.add(RepeatVector(HORIZON))

    model.add(GRU(LATENT_DIM, return_sequences = True))

    model.add(TimeDistributed(Dense(1)))

    model.add(Flatten())

    ######################################################################################################

    X_train, X_test, X_val = three_dim_tensor(frame_train, frame_test, frame_val)
    
    return model, X_train, X_test, X_val

In [None]:
def model_MLP():

    ######################################################################################################

    model = Sequential()

    model.add(Dense(200, activation = 'relu', kernel_initializer = 'he_normal', input_shape = (frame_train['X'].shape[1],)))

    model.add(Dense(100, activation = 'relu', kernel_initializer = 'he_normal'))

    model.add(Dense(HORIZON))

    ######################################################################################################

    X_train = frame_train['X']
    X_test = frame_test['X']
    X_val = frame_val['X']

    return model, X_train, X_test, X_val

In [None]:
model_sel = input(r'Enter model: ')

if model_sel == 'GRU':
    model, X_train, X_test, X_val = model_GRU()

if model_sel == 'MLP':
    model, X_train, X_test, X_val = model_MLP()

model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

model.summary()

In [None]:
from keras.callbacks import EarlyStopping

earlystop = EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, verbose = 1)

history = model.fit(X_train,
          np.array(frame_train['y']),
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          validation_data = (X_val, frame_val['y']),
          callbacks = [earlystop],
          verbose = 1)

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

In [13]:
predictions = pd.DataFrame(index = frame_test.index, columns = pd.MultiIndex.from_product([['Prediction'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag']))

frame_test = pd.merge(frame_test, predictions, left_index = True, right_index = True)

frame_test['Prediction'] = model.predict(X_test)

frame_test['Prediction'] = y_scaler.inverse_transform(frame_test['Prediction'])

frame_test['y'] = y_scaler.inverse_transform(frame_test['y'])

display(frame_test)

Lag,y,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,Prediction
Feature,0,-72,-71,-70,-69,-68,-67,-66,-65,-64,...,-12,-11,-10,-9,-8,-7,-6,-5,-4,ID3
Lag,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,...,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,ID3,0
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2017-09-19 05:00:00+00:00,51.202420,0.115289,0.299605,0.322441,0.356042,0.251086,0.136042,-0.009488,-0.168638,-0.182968,...,0.678103,0.698710,0.527552,-0.225534,-0.257673,-0.094878,0.196333,0.196443,-0.025019,41.884256
2017-09-19 06:00:00+00:00,51.094308,0.302382,0.325059,0.359402,0.254863,0.137127,-0.009110,-0.168727,-0.182436,0.261859,...,0.697690,0.537508,-0.225136,-0.257804,-0.095653,0.197286,0.198491,-0.023559,0.208231,44.151943
2017-09-19 07:00:00+00:00,48.185192,0.327944,0.362157,0.257664,0.140120,-0.008341,-0.168570,-0.182455,0.262926,0.505518,...,0.536632,-0.224752,-0.257427,-0.096055,0.195129,0.199446,-0.021968,0.210672,0.260585,41.540313
2017-09-19 08:00:00+00:00,46.879881,0.365201,0.260041,0.142287,-0.006349,-0.168147,-0.182317,0.260630,0.506878,0.603087,...,-0.224946,-0.257454,-0.095574,0.194242,0.197278,-0.021127,0.212749,0.263246,0.764806,44.036466
2017-09-19 09:00:00+00:00,44.100597,0.262650,0.144235,-0.004992,-0.167256,-0.181924,0.261383,0.503334,0.604564,0.657359,...,-0.257618,-0.093543,0.194909,0.196388,-0.022251,0.213711,0.265433,0.769586,0.956689,46.501296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-31 18:00:00+00:00,71.138462,0.864574,0.159770,0.053655,-0.337884,-0.358767,-0.358343,-0.160832,-0.205220,-0.205560,...,-0.396260,-0.694469,-0.802036,-0.712038,-0.512063,-0.245219,0.009934,0.262986,0.975524,44.827011
2017-12-31 19:00:00+00:00,80.726384,0.161950,0.055274,-0.338359,-0.359188,-0.358331,-0.160664,-0.205122,-0.205055,0.251114,...,-0.694243,-0.808988,-0.711952,-0.511769,-0.245284,0.010792,0.265172,0.981191,0.857859,44.062464
2017-12-31 20:00:00+00:00,67.293939,0.057009,-0.338197,-0.359780,-0.358749,-0.160224,-0.205016,-0.204958,0.252167,0.620903,...,-0.808659,-0.717759,-0.511555,-0.245435,0.009517,0.266161,0.984869,0.863031,1.424856,47.083834
2017-12-31 21:00:00+00:00,24.224867,-0.338141,-0.359697,-0.359339,-0.159278,-0.204671,-0.204852,0.249926,0.622401,0.811303,...,-0.717512,-0.514813,-0.245051,0.008940,0.263678,0.986226,0.866464,1.432412,1.092012,42.786070


# Results

## Plot prediction

In [14]:
fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y'][0]['ID3'],
                        name = 'Actual',
                        line_color = hex_maroon,
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction']['ID3'][0],
                        name = 'Predicted',
                        line_color = hex_gold,
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      title = 'Forecast of test set',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'ID3 (€)')

fig.show()

## Metrics

In [46]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

print()
for l in lag_y:
    print(f'SMAPE for hour {l}:', smape(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l]))
print()


SMAPE for hour 0: 19.016145313817898



In [47]:
from sklearn.metrics import mean_absolute_error

print()
for l in lag_y:
    print(f'MAE for hour {l}:', mean_absolute_error(frame_test['y'][l]['ID3'], frame_test['Prediction']['ID3'][l]))
print()


MAE for hour 0: 8.99515816414378

