# Load necessary packages

In [1]:
import pandas as pd
import numpy as np

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

import matplotlib.font_manager as font_manager
mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

from termcolor import colored

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

import tensorflow as tf

from entsoe import EntsoePandasClient

# Organise data

## Import features

In [12]:
! pip install 'git+git://github.com/HR/github-clone#egg=ghclone' &> /dev/null

! ghclone https://github.com/timovijn/ElectricityPriceForecasting/tree/master/LSTM

zsh:1: command not found: ghclone


In [2]:
features = pd.read_pickle(f"./features.pkl")

display(features)

Unnamed: 0,ID3,VOL,MCP,LOAD,LOAD_F,LOAD_FE,ID3 (-4),ID3 (-5),ID3 (-6),ID3 (-7),...,HOD 14,HOD 15,HOD 16,HOD 17,HOD 18,HOD 19,HOD 20,HOD 21,HOD 22,HOD 23
2015-01-08 01:00:00+00:00,22.953776,439.5,32.32,9008.00,8505.25,502.75,29.934792,61.666667,61.118812,61.370370,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 02:00:00+00:00,23.168355,261.5,31.10,8889.25,8222.25,667.00,29.853669,29.934792,61.666667,61.118812,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 03:00:00+00:00,21.000000,420.5,30.17,8929.25,8122.25,807.00,24.012378,29.853669,29.934792,61.666667,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 04:00:00+00:00,30.000000,460.6,24.54,9423.75,8323.50,1100.25,23.269810,24.012378,29.853669,29.934792,...,0,0,0,0,0,0,0,0,0,0
2015-01-08 05:00:00+00:00,30.000000,250.0,32.00,10884.50,9015.00,1869.50,22.953776,23.269810,24.012378,29.853669,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-30 14:00:00+00:00,53.790740,446.6,46.19,13842.50,15329.25,1486.75,76.370821,87.755884,78.709213,52.958116,...,1,0,0,0,0,0,0,0,0,0
2018-12-30 15:00:00+00:00,59.477646,131.6,47.64,14319.25,15644.50,1325.25,63.690401,76.370821,87.755884,78.709213,...,0,1,0,0,0,0,0,0,0,0
2018-12-30 16:00:00+00:00,59.883829,310.1,55.94,15120.75,16285.75,1165.00,56.170316,63.690401,76.370821,87.755884,...,0,0,1,0,0,0,0,0,0,0
2018-12-30 17:00:00+00:00,59.471501,220.9,58.40,14728.75,15555.75,827.00,51.675229,56.170316,63.690401,76.370821,...,0,0,0,1,0,0,0,0,0,0


## Select features

In [3]:
X = features[['ID3 (-4)', 'ID3 (-5)']]
y = features[['ID3']]

X.columns = pd.MultiIndex.from_product([["X"], X.columns], names = ['Type', 'Feature'])
y.columns = pd.MultiIndex.from_product([["y"], y.columns], names = ['Type', 'Feature'])

frame = pd.merge(y, X, left_index = True, right_index = True)

frame = frame.rename_axis('Timestamp')

display(frame)

Type,y,X,X
Feature,ID3,ID3 (-4),ID3 (-5)
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2015-01-08 01:00:00+00:00,22.953776,29.934792,61.666667
2015-01-08 02:00:00+00:00,23.168355,29.853669,29.934792
2015-01-08 03:00:00+00:00,21.000000,24.012378,29.853669
2015-01-08 04:00:00+00:00,30.000000,23.269810,24.012378
2015-01-08 05:00:00+00:00,30.000000,22.953776,23.269810
...,...,...,...
2018-12-30 14:00:00+00:00,53.790740,76.370821,87.755884
2018-12-30 15:00:00+00:00,59.477646,63.690401,76.370821
2018-12-30 16:00:00+00:00,59.883829,56.170316,63.690401
2018-12-30 17:00:00+00:00,59.471501,51.675229,56.170316


## Split train and test

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    frame['X'],
    frame['y'],
    test_size = 0.3,
    random_state = 0,
    shuffle = False)

X_train.columns = pd.MultiIndex.from_product([["X"], X_train.columns], names = ['Type', 'Feature'])
y_train.columns = pd.MultiIndex.from_product([["y"], y_train.columns], names = ['Type', 'Feature'])

frame_train = pd.merge(y_train, X_train, left_index = True, right_index = True)

X_test.columns = pd.MultiIndex.from_product([["X"], X_test.columns], names = ['Type', 'Feature'])
y_test.columns = pd.MultiIndex.from_product([["y"], y_test.columns], names = ['Type', 'Feature'])

frame_test = pd.merge(y_test, X_test, left_index = True, right_index = True)

print()
print(f'Train input', frame_train['X'].shape, 'output', frame_train['y'].shape)
print()
print(f'Test input', frame_test['X'].shape, 'output', frame_test['y'].shape)
print()

display(frame_train)


Train input (23691, 2) output (23691, 1)

Test input (10154, 2) output (10154, 1)



Type,y,X,X
Feature,ID3,ID3 (-4),ID3 (-5)
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2015-01-08 01:00:00+00:00,22.953776,29.934792,61.666667
2015-01-08 02:00:00+00:00,23.168355,29.853669,29.934792
2015-01-08 03:00:00+00:00,21.000000,24.012378,29.853669
2015-01-08 04:00:00+00:00,30.000000,23.269810,24.012378
2015-01-08 05:00:00+00:00,30.000000,22.953776,23.269810
...,...,...,...
2017-10-28 11:00:00+00:00,43.600365,35.416372,32.603013
2017-10-28 12:00:00+00:00,42.941124,35.416084,35.416372
2017-10-28 13:00:00+00:00,43.582655,35.730954,35.416084
2017-10-28 14:00:00+00:00,40.537582,37.779619,35.730954


## Scaling

In [5]:
frame_train_unscaled = frame_train
frame_test_unscaled = frame_test

scaler = StandardScaler()
scaler.fit(frame_train['X'])

frame_train['X'] = scaler.transform(frame_train['X'])
frame_test['X'] = scaler.transform(frame_test['X'])

display(frame_train)

Type,y,X,X
Feature,ID3,ID3 (-4),ID3 (-5)
Timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2015-01-08 01:00:00+00:00,22.953776,-0.558572,1.518263
2015-01-08 02:00:00+00:00,23.168355,-0.563882,-0.558585
2015-01-08 03:00:00+00:00,21.000000,-0.946177,-0.563894
2015-01-08 04:00:00+00:00,30.000000,-0.994775,-0.946206
2015-01-08 05:00:00+00:00,30.000000,-1.015459,-0.994807
...,...,...,...
2017-10-28 11:00:00+00:00,43.600365,-0.199819,-0.383950
2017-10-28 12:00:00+00:00,42.941124,-0.199838,-0.199816
2017-10-28 13:00:00+00:00,43.582655,-0.179231,-0.199835
2017-10-28 14:00:00+00:00,40.537582,-0.045152,-0.179226


In [6]:
# time_steps = 24
# for_periods = 1
# lag = 23                 # Extra lag

# X_train = []
# y_train = []

# for i in range(time_steps+lag, len(frame_train)):
#     X_train.append(frame_train['X'][i-lag-time_steps : i-lag, 0])
#     y_train.append(frame_train['y'][i : i+for_periods, 0])

# X_train = np.array(X_train)
# y_train = np.array([item for sublist in y_train for item in sublist])
# X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# Learning

## Create model

In [11]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model

model_MLP = Sequential()
model_MLP.add(Dense(200, activation = 'relu', kernel_initializer = 'he_normal', input_shape = (frame_train['X'].shape[1],)))
model_MLP.add(Dense(100, activation = 'relu', kernel_initializer = 'he_normal'))
model_MLP.add(Dense(1))

optimizer = Adam(clipvalue = 0.5)

# Compiling
model_MLP.compile(optimizer = optimizer, loss = 'mean_squared_error')

! plot_model(model_MLP, show_shapes = True, show_layer_names = True)

zsh:1: number expected


## Train model

In [8]:
model_MLP.fit(frame_train['X'], frame_train['y'], epochs = 50, batch_size = 16, verbose = 1)

frame_test['Prediction'] = model_MLP.predict(frame_test['X'])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Results

## Plot prediction

In [9]:
import plotly.express as px
import plotly.graph_objects as go

fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y']['ID3'],
                        name = 'Actual'
                        # color = evaluation['Type'],
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction'],
                        name = 'Predicted'
                        # color = evaluation['Type'],
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(title = 'Forecast of test set',
                   xaxis_title = 'Timestamp',
                   yaxis_title = 'ID3 (€)')

fig.show()

## Metrics

In [10]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

smape(frame_test['y']['ID3'], frame_test['Prediction'])

20.97909189747305