In [14]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.utils import model_to_dot, plot_model
from IPython.display import clear_output
from keras.layers import Reshape
from keras.layers import GlobalMaxPooling1D
from PIL import Image
import os
import numpy as np
from bayes_opt import BayesianOptimization
import time
import pandas as pd


#We are inheriting all the properties available in that class (Data_prep) by passing it as an argument to the predictor class.
from data_prep import Data_prep as prep

class Con_1D_BiLSTM_v2(prep):
    def __init__(self):
        #Even though we inherited the properties, we had our parent class initialise certain attributes through the constructor (__init__). With inheritance, we need to explicitly invoke this initialisation through the use of super() method otherwise it it wil be bypassed
        super().__init__()
        self.results_df = pd.DataFrame(columns=["Iteration", "Start Time", "End Time", "con_1D_filters", "con_1D_kernel_size", "biLSTM_units", "biLSTM_dropout_rate", 'biLSTM2_units',"learn_rate", "step1", "step2", "step3", "rate1", "rate2", "rate3", "Train Loss", "Val Loss", "Train MSE", "Val MSE"])

    def prepare_data(self, filepath, weeks=2, date_1 = '2016-01-01 00:00:00', date_2 = '2017-01-01 00:00:00', date_3 = '2018-01-01 00:00:00', n_features = 1):

        '''
        Note that this is the method that inherits methods from the parent class and runs all the data preparation operations in one method call.

        The method takes the path to the folder containing the input data, builds a dataframe, performs the splitting, scaling and eventual reshaping for the forecasting model. For more information, right click on the method in question and go to where it was declared.
        
        Inputs:
        path_to_folder (mandatory and as a string)
        
        The number of weeks for training (as integer but optional. Default is 2 weeks)
        
        Date up to which the training split should occur (as a string. Example: '2016-01-01 00:00:00'. By default, set to '2016-01-01 00:00:00')
         
        Date up to which the validation split should occur (as string.Example: '2016-01-01 00:00:00'. By default set to '2017-01-01 00:00:00'). Note that once set, the split will happen from the date provided in step 3 to step 4 but with an overlapping window back into training set by the 2 weeks of training to ensure match in forecast dates for both validation and test sets
        Date up to which the test split should occur (as string. Example: '2017-01-01 00:00:00'. By default, set to '2018-01-01 00:00:00'). To use the entire dataset, update this to the last date on your dataframe as printed when loaded.
        
        The number of features in your data. (as integer. Mandatory if dealing with multivariate models/ multi-feature datasets since by default, is set to 1 i.e single input feature.)
        
        Outputs:
        X_train, y_train, X_val, y_val, X_test, y_test
        Note: Also prints on the terminal all the steps taken'''

        #Loading csv - provide filepath and training weeks (optional)
        training_duration = self.load_csv(filepath, weeks)
        
        #Splitting - training duration provided for by previous method.
        # #Provide dates along which to perform the split (optional) - enter as string
        train, val, test = self.train_val_test_split(training_duration, date_1, date_2, date_3)

        #Scaling. All input data provided for by class methods
        train_scaled, val_scaled, test_scaled = self.train_val_test_scaling(train, val, test)
        
        #Reshaping. Most input data provided for by previous class methods. Can update n_features depending on input features but by default, set to 1. 
        prepared_data = [train_scaled, training_duration, val_scaled, test_scaled]
        
        return self.reshaping(prepared_data, n_features)

    def conv1D_bilstm_v2(self, con_1D_filters, con_1D_kernel_size, biLSTM_units, biLSTM_dropout_rate, printing = True, save_image=True, image_filename="model_architecture.png"):

        '''This method's focus is to build the different neural layers and aggregate them into a single model. The method automatically prints out a model summary to help make sense of the model that has just been built


        Input - the model parameters that the user wants to make adjustable for bayesian optimisation process. So far, these are set to: con_1D_filters, con_1D_kernel_size, biLSTM_units, biLSTM_dropout_rate, biLSTM2_units

        
        Output - model summary with the different layers, their output shapes and the overall learning parameters. While this is printed on the terminal for the user to see, the model itself is held by the method to be used in the optimisation process.'''

        #Create the input node. We omit batch size when using functional API
        inputs = keras.Input(shape = (X_train.shape[1], X_train.shape[2]))
        

        #Then we implement the convolution layer
        con_1D = keras.layers.Conv1D(filters=con_1D_filters, kernel_size=con_1D_kernel_size)(inputs)

        #Then we construct the parallel bilstm layer
        biLSTM = Bidirectional(LSTM(biLSTM_units,return_sequences=True))(inputs)
        biLSTM= Dropout(biLSTM_dropout_rate)(biLSTM)

        # ADDING PADDING SEQUENCE TO MAKE THEM COMPATIBLE BEFORE CONCATENATION

        #first we compute the length difference between the two outputs
        pad_difference = (biLSTM.shape[1] - con_1D.shape[1])

        #then we implement a for loop to ensure padding to equal lenth even if odd lenth difference
        #// double division is to give us an integer rather than a float output
        if (pad_difference) % 2 == 0:
            pad_1, pad_2 = pad_difference // 2, pad_difference // 2
        else:
            pad_1, pad_2 = pad_difference // 2, (pad_difference // 2) + 1

        #Now applying the padding to the convolution layer
        padding_layer = keras.layers.ZeroPadding1D(padding=(pad_1, pad_2))(con_1D)

        #Then we merge
        merged = keras.layers.Concatenate()([padding_layer,biLSTM])

        #Then we reshape such that steps is the last item on that list
        reshaped = Reshape((merged.shape[2], merged.shape[1])) (merged)


        #Then we global maxpooling
        max_pool_1D = GlobalMaxPooling1D()(reshaped)
        print(f'max_pool_1D layer shape:', max_pool_1D.shape)

        #Then the final output layer
        output_layer = Dense(48)(max_pool_1D)

        #Then pull everything together to build the final model
        model = keras.models.Model(inputs=inputs, outputs=output_layer)
        if printing == True:
            print(model.summary())
        if save_image:
        # Save model architecture as a PNG image
            plot_model(model, to_file=image_filename, show_shapes=True)
        return model
    
    def model_training(self, model, X_train, y_train, X_val, y_val, lr_schedule):
        '''Having built the model in the previous function, this method's focus is on training the model. It sets up the fixed hyperparameters and the callback functions to be used by the model when training'''

        #SET UP TRAINING PARAMETERS

        # Function to calculate root mean squared error (RMSE)
        def root_mean_squared_error(y_true, y_pred):
            return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

        #epoch and batches
        epochs = 5000
        batch_size = int(X_train.shape[0]/8)
        print(f'Batch size:', batch_size)

        #CREATING MODEL CALLBACKS
        #Implement the early stopping
        early_stop = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta= 1e-13, patience = 300)


        #Compiling the model
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss = root_mean_squared_error, metrics = ['mse'])

        #Fitting the model to the data
        history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stop],
        validation_data = (X_val, y_val)
        )  
        return history
        

    def optimize_hyperparameters(self, filepath, weeks = 2, date_1 = '2016-01-01 00:00:00', date_2 = '2017-01-01 00:00:00', date_3 = '2018-01-01 00:00:00', n_features = 1):

        '''This method defines some of the parameters to be updated. Eventually, this should be moved to a separate dictionary that the user can easily input data to'''
        pbounds = {
            'con_1D_filters': (32, 128),
            'con_1D_kernel_size': (2, 5),
            'biLSTM_units': (50, 150),
            'biLSTM_dropout_rate': (0.2, 0.5),
            'learn_rate': (0.0005, 0.005),
            'step1': (10, 500),
            'step2': (1000, 2000),
            'step3': (2000, 5000),
            'rate1': (0.0001, 0.001),
            'rate2': (0.00005, 0.0005),
            'rate3': (0.000005, 0.00005)
        }

        # Prepare data
        X_train, y_train, X_val, y_val,_,_ = self.prepare_data(filepath, weeks, date_1, date_2, date_3, n_features)
        
        #CREATING DATAFRAME TO SAVE RELEVANT RESULTS
        results_df = pd.DataFrame(columns=["Iteration", "Start Time", "End Time", "con_1D_filters", "con_1D_kernel_size", "biLSTM_units", "biLSTM_dropout_rate", 'biLSTM2_units',"learn_rate", "step1", "step2", "step3", "rate1", "rate2", "rate3", "Train Loss", "Val Loss", "Train MSE", "Val MSE"])


        def evaluate_hyperparameters(con_1D_filters, con_1D_kernel_size, biLSTM_units, biLSTM_dropout_rate, learn_rate,step1,rate1,step2,rate2, step3, rate3):
            '''This method focuses on trying out the different parameters for the changeable part of the model using the Bayesian Optimisation method.'''

            #Set the adjusted learning rate for the different time periods. 
            initial_learning_rate = learn_rate
            lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay([int(step1), int(step2), int(step3)], [learn_rate, rate1,rate2,rate3])

            #Here we define the start time
            start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())

            # Build the model using the provided hyperparameters. Note that we are picking up from the model that we built before, except we ensured that the model hyperparameters were adjusted in the previous method to make them flexible enough for varying input
            model = self.conv1D_bilstm_v2(int(con_1D_filters), int(con_1D_kernel_size), int(biLSTM_units), biLSTM_dropout_rate, printing = False, save_image=False)
            
            
            # Train and evaluate the model using the prepared data
            history = self.model_training(model, X_train, y_train, X_val, y_val, lr_schedule)


            #Obtaining all the results and saving them to a dictionary
            iteration_results = {
                "Iteration": len(self.results_df) + 1,
                "Start Time": start_time,
                "End Time": time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
                "con_1D_filters": int(con_1D_filters),
                'con_1D_kernel_size': int(con_1D_kernel_size),
                "biLSTM_units": int(biLSTM_units),
                "biLSTM_dropout_rate": biLSTM_dropout_rate,
                "learn_rate": learn_rate,
                "step1": int(step1),
                "step2": int(step2),
                "step3": int(step3),
                "rate1": rate1,
                "rate2": rate2,
                "rate3": rate3,
                "Train Loss": history.history["loss"][-1],
                "Val Loss": history.history["val_loss"][-1],
                "Train MSE": history.history["mse"][-1],
                "Val MSE": history.history["val_mse"][-1]
            }

            print(f'Results dataframe current length', iteration_results['Iteration'])

            # Append iteration results to results_df
            self.results_df = self.results_df.append(iteration_results, ignore_index=True)


            # Save results to CSV after each iteration
            self.results_df.to_csv('parallel_conv1D_biLSTM.csv', index=False)

            best_val_loss = min(history.history['val_loss'])

            return -best_val_loss
        
        
        optimizer = BayesianOptimization(
            f=evaluate_hyperparameters,
            pbounds=pbounds,
            random_state=None,  # Set the random state as needed
            verbose=2
        )
        
        # Run Bayesian Optimization
        optimizer.maximize(init_points=15, n_iter=15)
        
        # Return the best hyperparameters found
        best_hyperparameters = optimizer.max['params']
        return best_hyperparameters


CREATING AN INSTANCE FOR NEW CLASS

In [15]:
derived_instance = Con_1D_BiLSTM_v2()

Calling the prepare_data method to prepare the data for the model

In [16]:
X_train, y_train, X_val, y_val, X_test, y_test = derived_instance.prepare_data('Demand_data', 2);

DATA LOADING
Input data runs from  2015-01-01 00:00:00 to 2019-12-31 23:30:00 and with 87648 datapoints
Training duration is 672  timesteps, an equivalent of 2 weeks

SPLITTING
Split results
_____________
Set Length Datapoint_1
Train 17520 28.726
Val 18240 26.958
Test 18192 28.007

SCALING
Scaled results
_____________
Set Length Datapoint_1
Train 17520 0.31256682174126627
Val 18240 0.26147889155373194
Test 18192 0.34324056561289745

 RESHAPING
Reshaping results
_________________
X_train (351, 672, 1)
y_train (351, 48)
X_val (366, 672)
y_val (366, 48)
X_test (365, 672, 1)
y_test (365, 48)


Calling the conv1D_LSTM model builder with pre-fixed parameters to see if it actually builds the model

In [17]:
derived_instance.conv1D_bilstm_v2(32, 2, 100, 0.5);

max_pool_1D layer shape: (None, 672)
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 672, 1)]             0         []                            
                                                                                                  
 conv1d_2 (Conv1D)           (None, 671, 32)              96        ['input_3[0][0]']             
                                                                                                  
 bidirectional_2 (Bidirecti  (None, 672, 200)             81600     ['input_3[0][0]']             
 onal)                                                                                            
                                                                                                  
 zero_padding1d_2 (ZeroPadd  (None, 672, 32)           

Calling the optimiser method

In [18]:
derived_instance.optimize_hyperparameters('Demand_data')

DATA LOADING
Input data runs from  2015-01-01 00:00:00 to 2019-12-31 23:30:00 and with 87648 datapoints
Training duration is 672  timesteps, an equivalent of 2 weeks

SPLITTING
Split results
_____________
Set Length Datapoint_1
Train 17520 28.726
Val 18240 26.958
Test 18192 28.007

SCALING
Scaled results
_____________
Set Length Datapoint_1
Train 17520 0.31256682174126627
Val 18240 0.26147889155373194
Test 18192 0.34324056561289745

 RESHAPING
Reshaping results
_________________
X_train (351, 672, 1)
y_train (351, 48)
X_val (366, 672)
y_val (366, 48)
X_test (365, 672, 1)
y_test (365, 48)
|   iter    |  target   | biLSTM... | biLSTM... | con_1D... | con_1D... | learn_... |   rate1   |   rate2   |   rate3   |   step1   |   step2   |   step3   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
max_pool_1D layer shape: (None, 672)
Batch size: 43
Epoch 1/5000
Epoch 2/5000
Epoch 3/500