# Load necessary packages

In [1]:
import pandas as pd
import numpy as np

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

import matplotlib.font_manager as font_manager
mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

from termcolor import colored

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

import tensorflow as tf

from entsoe import EntsoePandasClient

# Organise data

## Import features

In [2]:
features = pd.read_pickle(f"./features.pkl")

display(features)

Unnamed: 0,ID3,VOL,MCP,LOAD,LOAD_F,LOAD_FE,ID3 (-4),ID3 (-5),ID3 (-6),ID3 (-7),...,HOD 14,HOD 15,HOD 16,HOD 17,HOD 18,HOD 19,HOD 20,HOD 21,HOD 22,HOD 23
2015-01-08 01:00:00+00:00,22.953776,439.5,32.32,9008.00,8505.25,502.75,29.934792,61.666667,61.118812,61.370370,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 02:00:00+00:00,23.168355,261.5,31.10,8889.25,8222.25,667.00,29.853669,29.934792,61.666667,61.118812,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 03:00:00+00:00,21.000000,420.5,30.17,8929.25,8122.25,807.00,24.012378,29.853669,29.934792,61.666667,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 04:00:00+00:00,30.000000,460.6,24.54,9423.75,8323.50,1100.25,23.269810,24.012378,29.853669,29.934792,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 05:00:00+00:00,30.000000,250.0,32.00,10884.50,9015.00,1869.50,22.953776,23.269810,24.012378,29.853669,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-30 14:00:00+00:00,53.790740,446.6,46.19,13842.50,15329.25,1486.75,76.370821,87.755884,78.709213,52.958116,...,1,0,0,0,0,0,0,0,0,0
2018-12-30 15:00:00+00:00,59.477646,131.6,47.64,14319.25,15644.50,1325.25,63.690401,76.370821,87.755884,78.709213,...,0,1,0,0,0,0,0,0,0,0
2018-12-30 16:00:00+00:00,59.883829,310.1,55.94,15120.75,16285.75,1165.00,56.170316,63.690401,76.370821,87.755884,...,0,0,1,0,0,0,0,0,0,0
2018-12-30 17:00:00+00:00,59.471501,220.9,58.40,14728.75,15555.75,827.00,51.675229,56.170316,63.690401,76.370821,...,0,0,0,1,0,0,0,0,0,0


## Select features

In [3]:
list(range(0,2))

[0, 1]

In [4]:
X = features[['ID3', 'LOAD']]
y = features[['ID3']]

lag_X = range(-4, -7, -1)
lag_y = range(0, 3, 1)

X2 = pd.DataFrame(index = X.index, columns = pd.MultiIndex.from_product([['X'], X.columns, lag_X], names = ['Type', 'Feature', 'Lag']))
X2 = X2.rename_axis('Timestamp')

y2 = pd.DataFrame(index = y.index, columns = pd.MultiIndex.from_product([['y'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag']))
y2 = y2.rename_axis('Timestamp')

frame = pd.merge(y2, X2, left_index = True, right_index = True)

X3 = pd.DataFrame(index = X.index)
y3 = pd.DataFrame(index = y.index)

for c in X.columns:
    for l in lag_X:
        X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

frame['X'] = X3.values

for c in y.columns:
    for l in lag_y:
        y3[f'{c} ({l})'] = y.shift(-l)

frame['y'] = y3.values

frame = frame.dropna()

display(frame)

Type,y,y,y,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,LOAD,LOAD,LOAD
Lag,0,1,2,-4,-5,-6,-4,-5,-6
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
2015-01-08 07:00:00+00:00,43.588694,43.537764,48.252186,21.000000,23.168355,22.953776,8929.25,8889.25,9008.00
2015-01-08 08:00:00+00:00,43.537764,48.252186,48.683607,30.000000,21.000000,23.168355,9423.75,8929.25,8889.25
2015-01-08 09:00:00+00:00,48.252186,48.683607,46.580903,30.000000,30.000000,21.000000,10884.50,9423.75,8929.25
2015-01-08 10:00:00+00:00,48.683607,46.580903,39.971304,43.153846,30.000000,30.000000,13364.50,10884.50,9423.75
2015-01-08 11:00:00+00:00,46.580903,39.971304,44.970497,43.588694,43.153846,30.000000,15053.25,13364.50,10884.50
...,...,...,...,...,...,...,...,...,...
2018-12-30 12:00:00+00:00,56.170316,51.675229,53.790740,78.709213,52.958116,56.241202,12507.50,11786.75,11064.00
2018-12-30 13:00:00+00:00,51.675229,53.790740,59.477646,87.755884,78.709213,52.958116,13035.50,12507.50,11786.75
2018-12-30 14:00:00+00:00,53.790740,59.477646,59.883829,76.370821,87.755884,78.709213,13448.25,13035.50,12507.50
2018-12-30 15:00:00+00:00,59.477646,59.883829,59.471501,63.690401,76.370821,87.755884,13715.50,13448.25,13035.50


## Split train and test

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    frame['X'],
    frame['y'],
    test_size = 0.3,
    random_state = 0,
    shuffle = False)

X_train.columns = pd.MultiIndex.from_product([['X'], X.columns, lag_X], names = ['Type', 'Feature', 'Lag'])
y_train.columns = pd.MultiIndex.from_product([['y'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag'])

frame_train = pd.merge(y_train, X_train, left_index = True, right_index = True)

X_test.columns = pd.MultiIndex.from_product([['X'], X.columns, lag_X], names = ['Type', 'Feature', 'Lag'])
y_test.columns = pd.MultiIndex.from_product([['y'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag'])

frame_test = pd.merge(y_test, X_test, left_index = True, right_index = True)

print()
print(f'Train input', frame_train['X'].shape, 'output', frame_train['y'].shape)
print()
print(f'Test input', frame_test['X'].shape, 'output', frame_test['y'].shape)
print()

display(frame_train)


Train input (23685, 6) output (23685, 3)

Test input (10152, 6) output (10152, 3)



Type,y,y,y,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,LOAD,LOAD,LOAD
Lag,0,1,2,-4,-5,-6,-4,-5,-6
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
2015-01-08 07:00:00+00:00,43.588694,43.537764,48.252186,21.000000,23.168355,22.953776,8929.25,8889.25,9008.00
2015-01-08 08:00:00+00:00,43.537764,48.252186,48.683607,30.000000,21.000000,23.168355,9423.75,8929.25,8889.25
2015-01-08 09:00:00+00:00,48.252186,48.683607,46.580903,30.000000,30.000000,21.000000,10884.50,9423.75,8929.25
2015-01-08 10:00:00+00:00,48.683607,46.580903,39.971304,43.153846,30.000000,30.000000,13364.50,10884.50,9423.75
2015-01-08 11:00:00+00:00,46.580903,39.971304,44.970497,43.588694,43.153846,30.000000,15053.25,13364.50,10884.50
...,...,...,...,...,...,...,...,...,...
2017-10-28 11:00:00+00:00,43.600365,42.941124,43.582655,35.416372,32.603013,31.291775,13372.25,12739.50,11720.00
2017-10-28 12:00:00+00:00,42.941124,43.582655,40.537582,35.416084,35.416372,32.603013,13757.25,13372.25,12739.50
2017-10-28 13:00:00+00:00,43.582655,40.537582,41.788302,35.730954,35.416084,35.416372,13883.50,13757.25,13372.25
2017-10-28 14:00:00+00:00,40.537582,41.788302,45.834303,37.779619,35.730954,35.416084,13822.25,13883.50,13757.25


## Scaling

In [6]:
frame_train_unscaled = frame_train
frame_test_unscaled = frame_test

y_scaler = StandardScaler()
y_scaler.fit(frame_train['y'])

frame_train['y'] = y_scaler.transform(frame_train['y'])
frame_test['y'] = y_scaler.transform(frame_test['y'])

X_scaler = StandardScaler()
X_scaler.fit(frame_train['X'])

frame_train['X'] = X_scaler.transform(frame_train['X'])
frame_test['X'] = X_scaler.transform(frame_test['X'])

display(frame_train)

Type,y,y,y,X,X,X,X,X,X
Feature,ID3,ID3,ID3,ID3,ID3,ID3,LOAD,LOAD,LOAD
Lag,0,1,2,-4,-5,-6,-4,-5,-6
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
2015-01-08 07:00:00+00:00,0.334322,0.330982,0.639457,-1.143655,-1.001694,-1.015671,-1.394783,-1.411118,-1.362106
2015-01-08 08:00:00+00:00,0.330989,0.639481,0.667687,-0.554741,-1.143577,-1.001631,-1.191205,-1.394651,-1.410990
2015-01-08 09:00:00+00:00,0.639488,0.667712,0.530094,-0.554741,-0.554674,-1.143511,-0.589834,-1.191080,-1.394524
2015-01-08 10:00:00+00:00,0.667720,0.530117,0.097585,0.305979,-0.554674,-0.554621,0.431147,-0.589731,-1.190959
2015-01-08 11:00:00+00:00,0.530124,0.097604,0.424714,0.334433,0.306030,-0.554621,1.126382,0.431214,-0.589629
...,...,...,...,...,...,...,...,...,...
2017-10-28 11:00:00+00:00,0.335086,0.291940,0.333899,-0.200322,-0.384350,-0.470098,0.434338,0.173919,-0.245688
2017-10-28 12:00:00+00:00,0.291947,0.333920,0.134640,-0.200340,-0.200261,-0.384300,0.592837,0.434404,0.173998
2017-10-28 13:00:00+00:00,0.333927,0.134659,0.216483,-0.179737,-0.200280,-0.200216,0.644812,0.592898,0.434475
2017-10-28 14:00:00+00:00,0.134666,0.216503,0.481239,-0.045683,-0.179677,-0.200235,0.619596,0.644871,0.592963


In [7]:
step1 = []
step2 = []
step3 = []

for index, row in frame_train.iterrows():
    step2 = []
    for l in lag_X:
        step1 = []
        for c in X.columns:
            step1.append(row['X'][f'{c}'][l])
        step2.append(step1)
    step3.append(step2)

X_train = step3

X_train = np.array(X_train)

In [9]:
step1 = []
step2 = []
step3 = []

for index, row in frame_test.iterrows():
    step2 = []
    for l in lag_X:
        step1 = []
        for c in X.columns:
            step1.append(row['X'][f'{c}'][l])
        step2.append(step1)
    step3.append(step2)

X_test = step3

X_test = np.array(X_test)

# Learning

## Create model

In [11]:
LATENT_DIM = 5
BATCH_SIZE = 32
EPOCHS = 10

In [12]:
from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten

In [13]:
T = 3
HORIZON = 3

model = Sequential()

model.add(GRU(LATENT_DIM, input_shape = (T, 2)))

model.add(RepeatVector(HORIZON))

model.add(GRU(LATENT_DIM, return_sequences = True))

model.add(TimeDistributed(Dense(1)))

model.add(Flatten())

In [14]:
model.compile(optimizer = 'Adam', loss = 'mse')

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 5)                 135       
_________________________________________________________________
repeat_vector (RepeatVector) (None, 3, 5)              0         
_________________________________________________________________
gru_1 (GRU)                  (None, 3, 5)              180       
_________________________________________________________________
time_distributed (TimeDistri (None, 3, 1)              6         
_________________________________________________________________
flatten (Flatten)            (None, 3)                 0         
Total params: 321
Trainable params: 321
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.fit(X_train,
          np.array(frame_train['y']),
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
        #   validation_data=(valid_inputs['X'], valid_inputs['target']),
        #   callbacks=[earlystop],
          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f80d5b4bac0>

In [18]:
predictions = pd.DataFrame(index = frame_test.index, columns = pd.MultiIndex.from_product([['Prediction'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag']))

frame_test = pd.merge(frame_test, predictions, left_index = True, right_index = True)

frame_test['Prediction'] = model.predict(X_test)

frame_test['Prediction'] = y_scaler.inverse_transform(frame_test['Prediction'])

frame_test['y'] = y_scaler.inverse_transform(frame_test['y'])

display(frame_test)

Type,y,y,y,X,X,X,X,X,X,Prediction,Prediction,Prediction
Feature,ID3,ID3,ID3,ID3,ID3,ID3,LOAD,LOAD,LOAD,ID3,ID3,ID3
Lag,0,1,2,-4,-5,-6,-4,-5,-6,0,1,2
Timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
2017-10-28 16:00:00+00:00,45.834303,49.971656,53.021902,0.292060,0.335247,-0.045584,0.489401,0.540307,0.619721,39.174580,38.289906,37.928768
2017-10-28 17:00:00+00:00,49.971656,53.021902,17.579661,0.334038,0.292111,0.335281,0.500207,0.489465,0.540374,40.284458,39.211720,38.837242
2017-10-28 18:00:00+00:00,53.021902,17.579661,18.143989,0.134784,0.334088,0.292145,0.624948,0.500272,0.489534,39.677898,38.668415,38.299530
2017-10-28 19:00:00+00:00,17.579661,18.143989,17.683110,0.216625,0.134838,0.334122,0.860124,0.625008,0.500340,40.428520,39.286270,38.888439
2017-10-28 20:00:00+00:00,18.143989,17.683110,17.911371,0.481374,0.216677,0.134876,0.892235,0.860175,0.625072,41.569611,40.303303,39.883938
...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-30 12:00:00+00:00,56.170316,51.675229,53.790740,2.632540,0.947559,1.162399,0.078332,-0.218300,-0.515736,59.286526,58.473721,57.997204
2018-12-30 13:00:00+00:00,51.675229,53.790740,59.477646,3.224508,2.632547,0.947579,0.295702,0.078412,-0.218210,63.539383,62.770802,62.212860
2018-12-30 14:00:00+00:00,53.790740,59.477646,59.883829,2.479528,3.224504,2.632531,0.465626,0.295774,0.078493,58.008450,55.679783,55.043499
2018-12-30 15:00:00+00:00,59.477646,59.883829,59.471501,1.649786,2.479538,3.224475,0.575649,0.465691,0.295849,51.685860,49.184338,48.668789


# Results

## Plot prediction

In [25]:
import plotly.express as px
import plotly.graph_objects as go

fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y']['ID3'][0],
                        name = 'Actual',
                        # color = hex_maroon
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction']['ID3'][0],
                        name = 'Predicted',
                        # color = hex_gold
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      title = 'Forecast of test set',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'ID3 (€)')

fig.show()

## Metrics

In [28]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

print(smape(frame_test['y']['ID3'][0], frame_test['Prediction']['ID3'][0]))
print(smape(frame_test['y']['ID3'][1], frame_test['Prediction']['ID3'][1]))
print(smape(frame_test['y']['ID3'][2], frame_test['Prediction']['ID3'][2]))

21.03815937675948
23.17602821868029
24.29416872739463
