<h1> Parameter Tunning 
    
    Look at n_neurons & n_hidden layers

# Importing libraries:

In [None]:
!pip install pandas
!pip install sklearn

import pandas as pd;
import numpy as np;
import sklearn



# Import data; set X and y; fill nan values and split in test and training  data:

In [None]:
# import data
data = pd.read_csv('Data_set_1_smaller.csv', index_col = 0)

# for later use
features_num = 15

# 2018 data
data = data.loc[data.index > 2018070000, :]

# reset index
data.reset_index(inplace = True)
data.drop('index', axis = 1, inplace = True)

# fill nan values
data.fillna(method = 'ffill', inplace = True)

from sklearn.model_selection import train_test_split

# divide data into train and test 
data_train, data_test = train_test_split(
         data, test_size = 0.15, shuffle=False)

# Apply feature scaling:

In [None]:
from sklearn.preprocessing import MinMaxScaler

# data scaling  (including offer (y))
sc_X = MinMaxScaler()
data_train = sc_X.fit_transform(data_train)
data_test = sc_X.transform(data_test)

# Importing packages:

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import LeakyReLU
from keras import initializers
from keras import optimizers
from keras.callbacks import EarlyStopping

mae_cv = []
mse_cv = []
mae_gen = []
mse_gen  =[]
rmse_gen = []
mae_nor = []
mae_spi = []
mse_nor = []
mse_spi = []
rmse_nor = []
rmse_spi = []
hist_list = []
y_pred_list = []
prediction_list = []
time_count = []

# Prepare of data according to LSTM needs,  create regressor & tune:

In [None]:
# parameters
steps = 96
#n_hidden = 1
#units = 100
batch_size = 100

from sklearn.model_selection import ParameterGrid

# no hidden or one hidden layer
parameters = {'n_hidden': [1, 2],
              'units': [50, 100, 150, 200]}

all_param = ParameterGrid(parameters)

# function to split data into correct shape for RNN
def split_data(X, y, steps):
    X_, y_ = list(), list()
    for i in range(steps, len(y)):
        X_.append(X[i - steps : i, :])
        y_.append(y[i]) 
    return np.array(X_), np.array(y_)

# function to cut data set so it can be divisible by the batch_size
def cut_data(data, batch_size):
     # see if it is divisivel
    condition = data.shape[0] % batch_size
    if condition == 0:
        return data
    else:
        return data[: -condition]

# divide features and labels
X_train = data_train[:, 0:15] 
y_train = data_train[:, -1]
X_test = data_test[:, 0:15] 
y_test = data_test[:, -1] 

# divide data into train and test 
X_train, X_val, y_train, y_val = train_test_split(
         X_train, y_train, test_size = 0.15, shuffle=False)

# put data into correct shape
X_train, y_train = split_data(X_train, y_train, steps)
X_test, y_test = split_data(X_test, y_test, steps)
X_val, y_val = split_data(X_val, y_val, steps)

X_train = cut_data(X_train, batch_size)
y_train = cut_data(y_train, batch_size)
X_test = cut_data(X_test, batch_size)
y_test = cut_data(y_test, batch_size)
X_val = cut_data(X_val, batch_size)
y_val = cut_data(y_val, batch_size)

# design the LSTM
def regressor_tunning(kernel_initializer = 'he_normal',
                      bias_initializer = initializers.Ones()):
    model = Sequential()
    if n_hidden == 0:
        model.add(LSTM(units = units,                    
                       batch_input_shape = (batch_size, steps, features_num), 
                       stateful = True,
                       kernel_initializer = kernel_initializer,
                       bias_initializer = bias_initializer))
        model.add(LeakyReLU(alpha = 0.2))
        model.add(Dropout(0.2))
    else:
        model.add(LSTM(units = units,                    
                       batch_input_shape = (batch_size, steps, features_num), 
                       stateful = True,
                       return_sequences = True,
                       kernel_initializer = kernel_initializer,
                       bias_initializer = bias_initializer))
        model.add(LeakyReLU(alpha = 0.2))
        model.add(Dropout(0.2))
        model.add(LSTM(units = units, 
                       batch_input_shape = (batch_size, steps, features_num), 
                       stateful = True,
                       kernel_initializer = kernel_initializer,
                       bias_initializer = bias_initializer))
        model.add(LeakyReLU(alpha = 0.2))
        model.add(Dropout(0.2))
    model.add(Dense(1, activation='linear'))
    model.compile(loss = 'mse', metrics = ['mse', 'mae'], optimizer = 'Adamax')
    return model

for i in range(len(all_param)):
    
    units = all_param[i]['units']
    n_hidden = all_param[i]['n_hidden']
    
    model = regressor_tunning()

    # fitting the LSTM to the training set
    history = model.fit(X_train,
                        y_train, 
                        batch_size = batch_size, 
                        epochs = 180,
                        shuffle = False, 
                        validation_data = (X_val, y_val))
    
    model = regressor_tunning()
    
    # make new predicitons with test set
    y_pred = model.predict(X_test, batch_size = batch_size)
    
    # prices col = 15
    y_pred = (y_pred * sc_X.data_range_[15]) + (sc_X.data_min_[15])
    y_test = (y_test * sc_X.data_range_[15]) + (sc_X.data_min_[15])

    y_pred_list.append(y_pred)
    
    from sklearn.metrics import mean_squared_error as mse
    from sklearn.metrics import mean_absolute_error as mae

    rmse_error = mse(y_test, y_pred, squared = False)
    mse_error = mse(y_test, y_pred) # 1479.61335
    mae_error = mae(y_test, y_pred) # 23.1525
    
    rmse_gen.append(rmse_error)
    mse_gen.append(mse_error)
    mae_gen.append(mae_error)
    
    # =============================================================================
    # Metrics evaluation on spike regions
    # =============================================================================
    
    y_spike_occ = pd.read_csv('Spike_binary_1std.csv', usecols = [6])
    
    # create array same size as y_test
    y_spike_occ = y_spike_occ.iloc[- len(y_test):]
    y_spike_occ = pd.Series(y_spike_occ.iloc[:,0]).values
    
    
    # smal adjustment
    y_test.replace(0, 0.0001,inplace = True)

    
    # select y_pred and y_test only for regions with spikes
    y_test_spike = (y_test.T * y_spike_occ).T
    y_pred_spike = (y_pred.T * y_spike_occ).T
    y_test_spike = y_test_spike[y_test_spike != 0]
    y_pred_spike = y_pred_spike[y_pred_spike != 0]
    
    # calculate metric
    rmse_spike = mse(y_test_spike, y_pred_spike, squared = False)
    mse_spike = mse(y_test_spike, y_pred_spike)
    mae_spike = mae(y_test_spike, y_pred_spike)
    
    rmse_spi.append(rmse_spike)
    mse_spi.append(mse_spike)
    mae_spi.append(mae_spike)
    
    # =============================================================================
    # Metric evaluation on normal regions
    # =============================================================================
    
    # inverse y_spike_occ so the only normal occurences are chosen
    y_normal_occ = (y_spike_occ - 1) * (-1)
    
    # sanity check
    y_normal_occ.sum() + y_spike_occ.sum() # gives the correct total 
    
    # select y_pred and y_test only for normal regions
    y_test_normal = (y_test.T * y_normal_occ).T
    y_pred_normal = (y_pred.T * y_normal_occ).T
    y_test_normal = y_test_normal[y_test_normal != 0.00]
    y_pred_normal = y_pred_normal[y_pred_normal != 0.00]
    
    # calculate metric
    rmse_normal = mse(y_test_normal, y_pred_normal, squared = False)
    mse_normal = mse(y_test_normal, y_pred_normal)
    mae_normal = mae(y_test_normal, y_pred_normal)
    
    rmse_nor.append(rmse_normal)
    mse_nor.append(mse_normal)
    mae_nor.append(mae_normal)

In [None]:
results = pd.DataFrame({'all_param':all_param,
                        
                        'rmse_general': rmse_gen, 
                 
                        'mae_general': mae_gen,
                        
                        'rmse_spike': rmse_spi,
                 
                        'mae_spike': mae_spi,
                        
                        'rmse_normal': rmse_nor,
                    
                        'mae_normal': mae_nor})

results.to_csv('Results_tuning_3_n_neurons_n_hidden.csv')
results

In [None]:
results = pd.DataFrame({'rmse_general': rmse_gen, 
                 
                        'mae_general': mae_gen,
                        
                        'rmse_spike': rmse_spi,
                 
                        'mae_spike': mae_spi,
                        
                        'rmse_normal': rmse_nor,
                    
                        'mae_normal': mae_nor})

def highlight_min(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_max]

results.style.apply(highlight_min)

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(rmse_spi[8 * i])
    two.append(rmse_spi[1 + (8 * i)])
    three.append(rmse_spi[2 + (8 * i)])
    four.append(rmse_spi[3 + (8 * i)])
    five.append(rmse_spi[4 + (8 * i)])
    six.append(rmse_spi[5 + (8 * i)])
    seven.append(rmse_spi[6 + (8 * i)])
    eight.append(rmse_spi[7 + (8 * i)])

In [None]:
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('RMSE (£/MWh)')
plt.title('RMSE on spike regions for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('RMSE_spike_n_hidden_n_neurons.png')

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(rmse_nor[8 * i])
    two.append(rmse_nor[1 + (8 * i)])
    three.append(rmse_nor[2 + (8 * i)])
    four.append(rmse_nor[3 + (8 * i)])
    five.append(rmse_nor[4 + (8 * i)])
    six.append(rmse_nor[5 + (8 * i)])
    seven.append(rmse_nor[6 + (8 * i)])
    eight.append(rmse_nor[7 + (8 * i)])
    
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('RMSE (£/MWh)')
plt.title('RMSE on normal regions for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('RMSE_normal_n_hidden_n_neurons.png')

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(rmse_gen[8 * i])
    two.append(rmse_gen[1 + (8 * i)])
    three.append(rmse_gen[2 + (8 * i)])
    four.append(rmse_gen[3 + (8 * i)])
    five.append(rmse_gen[4 + (8 * i)])
    six.append(rmse_gen[5 + (8 * i)])
    seven.append(rmse_gen[6 + (8 * i)])
    eight.append(rmse_gen[7 + (8 * i)])
    
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('RMSE (£/MWh)')
plt.title('RMSE for all test set for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('RMSE_general_n_hidden_n_neurons.png')

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(mae_spi[8 * i])
    two.append(mae_spi[1 + (8 * i)])
    three.append(mae_spi[2 + (8 * i)])
    four.append(mae_spi[3 + (8 * i)])
    five.append(mae_spi[4 + (8 * i)])
    six.append(mae_spi[5 + (8 * i)])
    seven.append(mae_spi[6 + (8 * i)])
    eight.append(mae_spi[7 + (8 * i)])
    
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('MAE (£/MWh)')
plt.title('MAE on spike regions for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('MAE_spike_n_hidden_n_neurons.png')

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(mae_nor[8 * i])
    two.append(mae_nor[1 + (8 * i)])
    three.append(mae_nor[2 + (8 * i)])
    four.append(mae_nor[3 + (8 * i)])
    five.append(mae_nor[4 + (8 * i)])
    six.append(mae_nor[5 + (8 * i)])
    seven.append(mae_nor[6 + (8 * i)])
    eight.append(mae_nor[7 + (8 * i)])
    
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('MAE (£/MWh)')
plt.title('MAE on normal regions for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('MAE_normal_n_hidden_n_neurons.png')

In [None]:
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []


for i in range(6):
    one.append(mae_gen[8 * i])
    two.append(mae_gen[1 + (8 * i)])
    three.append(mae_gen[2 + (8 * i)])
    four.append(mae_gen[3 + (8 * i)])
    five.append(mae_gen[4 + (8 * i)])
    six.append(mae_gen[5 + (8 * i)])
    seven.append(mae_gen[6 + (8 * i)])
    eight.append(mae_gen[7 + (8 * i)])
    
plt.figure(figsize=(9,4))
plt.plot(one, label = '10 neurons')
plt.plot(two, label = '15 neurons')
plt.plot(three, label = '20 neurons')
plt.plot(four, label = '25 neurons')
plt.plot(five, label = '30 neurons')
plt.plot(six, label = '35 neurons')
plt.plot(seven, label = '40 neurons')
plt.plot(eight, label = '45 neurons')
plt.minorticks_on()
plt.grid(which='major', linestyle='-', linewidth='0.5')
plt.grid(which='minor', linestyle=':', linewidth='0.5')
plt.xticks([0,1,2,3,4,5], [1,2,3,4,5,6])
plt.xlabel('Number of hidden layers')
plt.ylabel('MAE (£/MWh)')
plt.title('MAE for all test set for different combinations of: \nNumber hidden layers & Number of neurons per layer ')
plt.legend(bbox_to_anchor=(1.0, 1.02))
plt.tight_layout()
plt.savefig('MAE_general_n_hidden_n_neurons.png')