# Load necessary packages

In [1]:
import pandas as pd
import numpy as np

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates

import plotly.express as px
import plotly.graph_objects as go

import matplotlib.font_manager as font_manager
mpl.font_manager._rebuild()

mpl.rcParams['font.family'] = 'SF Mono'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams["figure.titlesize"] = 'large'
mpl.rcParams["figure.titleweight"] = 'semibold'

from termcolor import colored

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LogisticRegression, Ridge, ElasticNet, LassoCV, RidgeCV, ElasticNetCV
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import roc_auc_score, accuracy_score

import tensorflow as tf

import datetime

# Organise data

## Import features

In [2]:
! pip install 'git+git://github.com/HR/github-clone#egg=ghclone' &> /dev/null

! ghclone https://github.com/timovijn/ElectricityPriceForecasting/tree/master/LSTM

zsh:1: command not found: ghclone


In [3]:
features = pd.read_pickle(f"./ID3/ID3.pkl")

display(features)

Unnamed: 0_level_0,ID3
Timestamp,Unnamed: 1_level_1
2020-07-20 22:00:00+00:00,31.84
2020-07-20 23:00:00+00:00,30.47
2020-07-21 00:00:00+00:00,28.73
2020-07-21 01:00:00+00:00,27.78
2020-07-21 02:00:00+00:00,28.01
...,...
2021-02-22 18:00:00+00:00,63.62
2021-02-22 19:00:00+00:00,48.16
2021-02-22 20:00:00+00:00,41.61
2021-02-22 21:00:00+00:00,41.88


In [None]:
# features = pd.read_pickle(f"./features.pkl")

# display(features)

In [None]:
daterange = ['2017.01.09', '2017.02.05']

fig1 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['MCP'],
                        name = 'MCP',
                        line_color = hex_indigo,
                        # title = 'Log of Appliance Energy Consumption in Wh vs Time',
                    )

fig2 = go.Scatter(      x = features[(features.index >= daterange[0]) & (features.index <= daterange[1])].index,
                        y = features[(features.index >= daterange[0]) & (features.index <= daterange[1])]['ID3'],
                        name = 'ID3',
                        line_color = hex_salmon,
                        # title = 'Log of Appliance Energy Consumption in Wh vs Time',
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      autosize = False,
                        width = 1250,
                        height = 500,
    
                        title = 'Prices',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'Price (€)'
                    )

fig.show()

## Select features

In [None]:
X = features[['ID3']]
y = features[['ID3']]

lagged = ['ID3']

lag_X = range(-72, -3, 1)
lag_y = range(0, 3, 1)

X2 = pd.DataFrame(index = X.index, columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Feature', 'Type', 'Lag']))
X2 = X2.rename_axis('Timestamp')

y2 = pd.DataFrame(index = y.index, columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Feature', 'Type', 'Lag']))
y2 = y2.rename_axis('Timestamp')

frame = pd.merge(y2, X2, left_index = True, right_index = True)

X3 = pd.DataFrame(index = X.index)
y3 = pd.DataFrame(index = y.index)

# for c in X.columns:
#     for l in lag_X:
#         X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

for c in lagged:
    for l in lag_X:
        X3[f'{c} ({l})'] = X[f'{c}'].shift(-l)

frame['X'] = X3.values

for c in y.columns:
    for l in lag_y:
        y3[f'{c} ({l})'] = y.shift(-l)

frame['y'] = y3.values

frame = frame.dropna()

display(frame)

## Split train and test

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    frame['X'],
    frame['y'],
    test_size = 0.3,
    random_state = 0,
    shuffle = False)

X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size = 0.5,
    random_state = 0,
    shuffle = False)

X_train.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_train.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_train = pd.merge(y_train, X_train, left_index = True, right_index = True)

X_test.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_test.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_test = pd.merge(y_test, X_test, left_index = True, right_index = True)

X_val.columns = pd.MultiIndex.from_product([['X'], lag_X, X.columns], names = ['Lag', 'Feature', 'Lag'])
y_val.columns = pd.MultiIndex.from_product([['y'], lag_y, y.columns], names = ['Lag', 'Feature', 'Lag'])

frame_val = pd.merge(y_val, X_val, left_index = True, right_index = True)

print()
print(f'Train input', frame_train['X'].shape, 'output', frame_train['y'].shape)
print()
print(f'Test input', frame_test['X'].shape, 'output', frame_test['y'].shape)
print()
print(f'Validation input', frame_val['X'].shape, 'output', frame_val['y'].shape)
print()

display(frame_train)

## Scaling

In [None]:
frame_train_unscaled = frame_train
frame_test_unscaled = frame_test
frame_val_unscaled = frame_val

y_scaler = StandardScaler()
y_scaler.fit(frame_train['y'])

frame_train['y'] = y_scaler.transform(frame_train['y'])
frame_test['y'] = y_scaler.transform(frame_test['y'])
frame_val['y'] = y_scaler.transform(frame_val['y'])

X_scaler = StandardScaler()
X_scaler.fit(frame_train['X'])

frame_train['X'] = X_scaler.transform(frame_train['X'])
frame_test['X'] = X_scaler.transform(frame_test['X'])
frame_val['X'] = X_scaler.transform(frame_val['X'])

display(frame_train)

display(frame_test)

display(frame_val)

In [None]:
def three_dim_tensor(frame_train, frame_test, frame_val):

    X_train = list()

    for index, row in frame_train['X'].iterrows():
        X_train.extend(row.tolist())

    X_train = np.array(X_train)

    X_train = X_train.reshape((len(frame_train), len(lag_X), len(X.columns)))

    X_test = list()

    for index, row in frame_test['X'].iterrows():
        X_test.extend(row.tolist())

    X_test = np.array(X_test)

    X_test = X_test.reshape((len(frame_test), len(lag_X), len(X.columns)))

    X_val = list()

    for index, row in frame_val['X'].iterrows():
        X_val.extend(row.tolist())

    X_val = np.array(X_val)

    X_val = X_val.reshape((len(frame_val), len(lag_X), len(X.columns)))

    return X_train, X_test, X_val

# Learning

## Create model

In [None]:
LATENT_DIM = 5
BATCH_SIZE = 50
EPOCHS = 100

HORIZON = len(lag_y)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.optimizers import SGD, Adam
from keras.utils.vis_utils import plot_model
from keras.layers import GRU, Dense, RepeatVector, TimeDistributed, Flatten

In [None]:
def model_GRU():

    ######################################################################################################

    model = Sequential()

    model.add(GRU(LATENT_DIM, input_shape = (len(lag_X), len(X.columns))))

    model.add(RepeatVector(HORIZON))

    model.add(GRU(LATENT_DIM, return_sequences = True))

    model.add(TimeDistributed(Dense(1)))

    model.add(Flatten())

    ######################################################################################################

    X_train, X_test, X_val = three_dim_tensor(frame_train, frame_test, frame_val)
    
    return model, X_train, X_test, X_val

In [None]:
def model_MLP():

    ######################################################################################################

    model = Sequential()

    model.add(Dense(200, activation = 'relu', kernel_initializer = 'he_normal', input_shape = (frame_train['X'].shape[1],)))

    model.add(Dense(100, activation = 'relu', kernel_initializer = 'he_normal'))

    model.add(Dense(HORIZON))

    ######################################################################################################

    X_train = frame_train['X']
    X_test = frame_test['X']
    X_val = frame_val['X']

    return model, X_train, X_test, X_val

In [None]:
model_sel = input(r'Enter model: ')

if model_sel == 'GRU':
    model, X_train, X_test, X_val = model_GRU()

if model_sel == 'MLP':
    model, X_train, X_test, X_val = model_MLP()

model.compile(optimizer = 'Adam', loss = 'mse', metrics = ['accuracy'])

model.summary()

In [None]:
from keras.callbacks import EarlyStopping

earlystop = EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, verbose = 1)

history = model.fit(X_train,
          np.array(frame_train['y']),
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          validation_data = (X_val, frame_val['y']),
          callbacks = [earlystop],
          verbose = 1)

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc = 'upper left')
plt.show()

In [None]:
predictions = pd.DataFrame(index = frame_test.index, columns = pd.MultiIndex.from_product([['Prediction'], y.columns, lag_y], names = ['Type', 'Feature', 'Lag']))

frame_test = pd.merge(frame_test, predictions, left_index = True, right_index = True)

frame_test['Prediction'] = model.predict(X_test)

frame_test['Prediction'] = y_scaler.inverse_transform(frame_test['Prediction'])

frame_test['y'] = y_scaler.inverse_transform(frame_test['y'])

display(frame_test)

# Results

## Plot prediction

In [None]:
fig1 = go.Scatter(      x = frame_test.index,
                        y = frame_test['y'][0]['ID3'],
                        name = 'Actual',
                        line_color = hex_maroon,
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

fig2 = go.Scatter(      x = frame_test.index,
                        y = frame_test['Prediction']['ID3'][0],
                        name = 'Predicted',
                        line_color = hex_gold,
                        # title = "Log of Appliance Energy Consumption in Wh vs Time"
                    )

data = [fig1, fig2]

fig = go.Figure(data = data)

fig.update_layout(      title = 'Forecast of test set',
                        xaxis_title = 'Timestamp',
                        yaxis_title = 'ID3 (€)')

fig.show()

## Metrics

In [None]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

print(smape(frame_test['y'][0]['ID3'], frame_test['Prediction']['ID3'][0]))
print(smape(frame_test['y'][1]['ID3'], frame_test['Prediction']['ID3'][1]))
print(smape(frame_test['y'][2]['ID3'], frame_test['Prediction']['ID3'][2]))

In [None]:
from sklearn.metrics import mean_absolute_error

display(mean_absolute_error(frame_test['y'][0]['ID3'], frame_test['Prediction']['ID3'][0]))