In [None]:
import numpy as np
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_path = '/content/drive/MyDrive/Predicting business cycle/Data/Financial_Indicator.csv'
output_path = '/content/drive/MyDrive/Predicting business cycle/Results/'

In [None]:
data_original = pd.read_csv(data_path)

# Rename the column
data_original.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)

In [None]:
#### Do LSTM/GRU with one month time step as well (the current one is 3)

In [None]:
data_original.head()

Unnamed: 0,Date,INDPRO,GDP,SP500,UNRATE,FEDFUNDS,AWHMAN,UMCSENT,TB3SMFFM,T10YFFM,CPALTT01USM657N,UEMPMEAN,PERMIT,business_cycle
0,1960-01-01,24.1712,542.648,58.029,5.2,3.99,40.6,100.0,0.36,0.73,-0.340136,13.5,1092.0,1.0
1,1960-02-01,23.9561,542.648,55.775,4.8,3.97,40.3,100.0,-0.01,0.52,0.341297,13.1,1088.0,1.0
2,1960-03-01,23.741,542.648,55.015217,5.4,3.84,40.0,93.3,-0.53,0.41,0.0,13.0,955.0,1.0
3,1960-04-01,23.5528,541.08,55.7,5.2,3.92,40.0,93.3,-0.69,0.36,0.340136,12.6,1016.0,1.0
4,1960-05-01,23.5259,541.08,55.215238,5.1,3.85,40.1,93.3,-0.56,0.5,0.0,11.9,1052.0,0.0


In [None]:
data=data_original.copy()
data=data.drop(['Date'], axis=1)
data.head()

Unnamed: 0,INDPRO,GDP,SP500,UNRATE,FEDFUNDS,AWHMAN,UMCSENT,TB3SMFFM,T10YFFM,CPALTT01USM657N,UEMPMEAN,PERMIT,business_cycle
0,24.1712,542.648,58.029,5.2,3.99,40.6,100.0,0.36,0.73,-0.340136,13.5,1092.0,1.0
1,23.9561,542.648,55.775,4.8,3.97,40.3,100.0,-0.01,0.52,0.341297,13.1,1088.0,1.0
2,23.741,542.648,55.015217,5.4,3.84,40.0,93.3,-0.53,0.41,0.0,13.0,955.0,1.0
3,23.5528,541.08,55.7,5.2,3.92,40.0,93.3,-0.69,0.36,0.340136,12.6,1016.0,1.0
4,23.5259,541.08,55.215238,5.1,3.85,40.1,93.3,-0.56,0.5,0.0,11.9,1052.0,0.0


### **Sequential train/test split**

In [None]:
def sequential_split(data, test_prop=0.2):
    # data: either X or y
    # Calculate lengths for train and test splits
    train_length = int(len(data) * (1 - test_prop))
    test_length = len(data) - train_length

    # handles pandas dataframe
    if isinstance(data, pd.DataFrame):
        # Using iloc to handle DataFrame slicing
        train_data = data.iloc[:train_length, :]
        test_data = data.iloc[train_length:, :]
    # handles numpy array
    elif isinstance(data, np.ndarray):
        # Using standard slicing for 1D NumPy arrays
        train_data = data[:train_length]
        test_data = data[train_length:len(data)]
    else:
        raise ValueError("The input data must be a pandas DataFrame or a NumPy array.")

    return train_data, test_data

### **Input creation with given time step for train and test set**

This takes the scaled X and y data and outputs input data for for a given time step.

In [None]:

def DatasetCreation(X_data,y_data, time_step=1): # plug in x_train and y_train/ x_test and y_test
  '''
  This function creates input data for x_train/y_train or x_test/y_test for
   a given time step.
   X_data: either X_train or X_test
   y_data: either y_train or y_test
  '''
  # X_array=X_data.values
  y_array=np.array(y_data)
  X, y =[], []
  for i in range(len(X_data)-time_step-1):
    x=X_data[i:(i+time_step), ]
    X.append(x)
    y.append(y_array[i+time_step]) # business cycle: 0 or 1
  return np.array(X), np.array(y)

In [None]:
def write_dic_to_file(dic_name, file_name):
  file = open(file_name, 'w')
  file.write(str(dic_name))
  file.close()

In [None]:
import tensorflow as tf
import time
from sklearn.preprocessing import StandardScaler

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras import optimizers

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

## **LSTM Model**

In [None]:
def Build_LSTM_Model(layers, time_step, num_features,
                     optimizer='Adam',
                     learning_rate=0.001,
                     verbose=1):

    model = Sequential()

    for i in range(len(layers)):
        if len(layers) == 1:
            model.add(LSTM(int(layers[i]), input_shape=(time_step, num_features)))
        else:
            if i < len(layers) - 1:
                if i == 0:
                    model.add(LSTM(int(layers[i]),
                                   input_shape=(time_step, num_features),
                                   return_sequences=True))
                else:
                    model.add(LSTM(int(layers[i]), return_sequences=True))
            else:
                model.add(LSTM(int(layers[i])))

    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'Adam':
        opt = optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'Adagrad':
        opt = optimizers.Adagrad(learning_rate=learning_rate)
    elif optimizer == 'Nadam':
        opt = optimizers.Nadam(learning_rate=learning_rate)
    elif optimizer == 'Adadelta':
        opt = optimizers.Adadelta(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = optimizers.RMSprop(learning_rate=learning_rate)
    else:
        print("No optimizer found among: Adam, Adagrad, Nadam, Adadelta, RMSprop")

    model.compile(loss='binary_crossentropy', optimizer=opt,  metrics = ['accuracy'])

    if verbose == 1:
        print(model.summary())
    return model

In [None]:
optimizers_names = ['Adam']
time_step = 3
num_features = 12
learning_rate = 0.001
verbose = 1
layers = [32]

Build_LSTM_Model(layers,
                 time_step,
                 num_features,
                 optimizer =  optimizers_names[0],
                 learning_rate= learning_rate,
                 verbose = verbose)

In [None]:
time_step = 3
optimizers_names = ['Adam', 'Nadam', 'Adagrad']
learning_rates =  [0.01,0.001, 0.005]
batch_sizes =  [8, 16,32]
epochs = 30
num_replicates = 10
test_prop = 0.3

In [None]:
X=data.drop(['business_cycle'],axis=1)
y=np.array(data['business_cycle'])


# Splitting train and test data
X_train0, X_test0=sequential_split(X, test_prop)
y_train0, y_test0=sequential_split(y, test_prop)

num_features = X_train0.shape[1]


# Normalizing the train and test input using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train0).reshape(X_train0.shape[0],  num_features)
X_test_scaled = scaler.transform(X_test0).reshape(X_test0.shape[0],  num_features)


# Creating input data
X_train1,  y_train1 =DatasetCreation(X_train_scaled,y_train0, time_step)
# 10% of the data equiv with 14% of train goes in validation
v=int(len(X_train1)*0.86)
X_train=X_train1[0:v]
X_val=X_train1[v:]
y_train=y_train1[0:v]
y_val=y_train1[v:]

X_test,  y_test = DatasetCreation(X_test_scaled, y_test0, time_step)

In [None]:
print(X_train1.shape)
print(y_train1.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)
print('Number of 1s in y_train', np.sum(y_train==1))
print('Number of 0s in y_train', np.sum(y_train==0))
print('Number of 1s in y_val', np.sum(y_val==1))
print('Number of 0s in y_val', np.sum(y_val==0))
print('Number of 1s in y_test', np.sum(y_test==1))
print('Number of 0s in y_test', np.sum(y_test==0))

(533, 3, 12)
(533,)
(458,)
(75,)
(227,)
Number of 1s in y_train 391
Number of 0s in y_train 67
Number of 1s in y_val 67
Number of 0s in y_val 8
Number of 1s in y_test 207
Number of 0s in y_test 20


In [None]:
def LSTM_Hyper_Parameter_Tuning(layers, data, time_step, test_prop,
                                optimizers_names,learning_rates, batch_sizes,
                                epochs, num_replicates=2):

    X=data.drop(['business_cycle'],axis=1)
    y=np.array(data['business_cycle'])


    # Splitting train and test data
    X_train0, X_test0=sequential_split(X, test_prop)
    y_train0, y_test0=sequential_split(y, test_prop)

    num_features = X_train0.shape[1]


    # Normalizing the train and test input using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train0).reshape(X_train0.shape[0],  num_features)
    X_test_scaled = scaler.transform(X_test0).reshape(X_test0.shape[0],  num_features)


    # Creating input data
    X_train1,  y_train1 =DatasetCreation(X_train_scaled,y_train0, time_step)
    # 10% of the data equiv with 14% of train goes in validation
    v=int(len(X_train1)*0.86)
    X_train=X_train1[0:v]
    X_val=X_train1[v:]
    y_train=y_train1[0:v]
    y_val=y_train1[v:]

    X_test,  y_test = DatasetCreation(X_test_scaled, y_test0, time_step)



    # collecting metrices

    best_avg_accuracy = 0
    collect_accuracy = []

    all_avg_accuracy = np.zeros((len(optimizers_names),
                                 len(learning_rates),
                                 len(batch_sizes)))




    best_hyper_parameters = {"model": layers,
                             "optimizer": None,
                             "learning_rate": None,
                             "batch_size": None,
                             "best_avg_accuracy": None}

    for opt in range(len(optimizers_names)):

        for lr in range(len(learning_rates)):

            for batch_size in range(len(batch_sizes)):

                for i in range(num_replicates):

                    print("Running for " + optimizers_names[opt] + " optimizer "\
                           + str(learning_rates[lr]) + " learning_rate "\
                          + str(batch_sizes[batch_size]) +" batch_size and "\
                           + str(i) +" replicate " + "\n")


                    model = Build_LSTM_Model(layers,
                                             time_step,
                                             num_features,
                                             optimizers_names[opt],
                                             learning_rate=learning_rates[lr],
                                             verbose=0)

                    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                                patience=5)

                    history = model.fit(X_train, y_train,
                                        batch_size=batch_sizes[batch_size],
                                        epochs=epochs,
                                        validation_data=(X_val, y_val),
                                        callbacks=[callback], verbose=1)


                    # Predict probabilities for each class
                    test_pred_probs = model.predict(X_test)

                    # Threshold the probabilities to get class labels
                    test_pred = (test_pred_probs > 0.5).astype(int)

                    # Compute evaluation metrics
                    accuracy = accuracy_score(y_test, test_pred)


                    collect_accuracy.append(accuracy)


                avg_accuracy = np.mean(np.array(collect_accuracy))
                all_avg_accuracy[opt][lr][batch_size] = avg_accuracy



                if avg_accuracy > best_avg_accuracy:
                  best_avg_accuracy = avg_accuracy
                  best_hyper_parameters = {"model": layers,
                                             "optimizer": optimizers_names[opt],
                                             "learning_rate": learning_rates[lr],
                                             "batch_size": batch_sizes[batch_size],
                                             "best_avg_accuracy": best_avg_accuracy
                                         }

    output_dictionary = {
        "best_hyper_parameters": best_hyper_parameters,
        "all_avg_accuracy": all_avg_accuracy
    }

    # writing output dictionary in the file

    file_name = output_path + "lstm-" + str(layers[0]) + "N-hyperparameter_tuning__results" + ".txt"
    write_dic_to_file(output_dictionary, file_name)

    print("Best_hyper_parameters(LSTM): \n", output_dictionary['best_hyper_parameters'])
    print("all_avg_accuracy(LSTM): \n", output_dictionary['all_avg_accuracy'])

    return output_dictionary['best_hyper_parameters']

In [None]:
layers = [4]

lstm_N4_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N4_best_hyper_parameters

In [None]:
# {'model': [4],
#  'optimizer': 'Adam',
#  'learning_rate': 0.005,
#  'batch_size': 8,
#  'best_avg_accuracy': 0.8310887350534927}

In [None]:
layers = [8]

lstm_N8_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N8_best_hyper_parameters

In [None]:
# {'model': [8],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.005,
#  'batch_size': 16,
#  'best_avg_accuracy': 0.8701995335579165}

In [None]:
layers = [16]

lstm_N16_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N16_best_hyper_parameters

In [None]:
# {'model': [16],
#  'optimizer': 'Adam',
#  'learning_rate': 0.001,
#  'batch_size': 32,
#  'best_avg_accuracy': 0.8914831130690161}

In [None]:
layers = [32]

lstm_N32_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N32_best_hyper_parameters

In [None]:
# {'model': [32],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.005,
#  'batch_size': 32,
#  'best_avg_accuracy': 0.9012481644640237}

In [None]:
layers = [64]

lstm_N64_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N64_best_hyper_parameters

In [None]:
# {'model': [64],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.005,
#  'batch_size': 32,
#  'best_avg_accuracy': 0.908516886930984}

In [None]:
layers = [128]

lstm_N128_best_hyper_parameters = LSTM_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
lstm_N128_best_hyper_parameters

In [None]:
# {'model': [128],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.005,
#  'batch_size': 32,
#  'best_avg_accuracy': 0.9079295154185025}

In [None]:
def LSTM_Model(layers,
               hyper_parameters,
               data,
               time_step = 1,
               test_prop = 0.2,
               epochs = 5,
               num_replicates = 2):

    X=data.drop(['business_cycle'],axis=1)
    y=np.array(data['business_cycle'])


    # Splitting train and test data
    X_train0, X_test0=sequential_split(X, test_prop)
    y_train0, y_test0=sequential_split(y, test_prop)

    num_features = X_train0.shape[1]


    # Normalizing the train and test input using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train0).reshape(X_train0.shape[0],  num_features)
    X_test_scaled = scaler.transform(X_test0).reshape(X_test0.shape[0],  num_features)


    # Creating input data
    X_train,  y_train =DatasetCreation(X_train_scaled,y_train0, time_step)
    X_test,  y_test = DatasetCreation(X_test_scaled, y_test0, time_step)


    # arrays for collecting test scores
    accuracy_array = np.zeros(num_replicates)
    precision_array = np.zeros(num_replicates)
    recall_array    = np.zeros(num_replicates)
    f1score_array    = np.zeros(num_replicates)
    sensitivity_array    = np.zeros(num_replicates)
    specificity_array    = np.zeros(num_replicates)
    elapsed_time_array = np.zeros(num_replicates)


    models_history = []
    train_predictions = []
    test_predictions = []

    for i in range(num_replicates):

      print("Program is running for %d replicate ----->\n" %i)

      model = Build_LSTM_Model(layers, time_step, num_features,
                               optimizer = hyper_parameters[0],
                               learning_rate = hyper_parameters[1], verbose = 0)

      callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience= 5)
      # This callback will stop the training when there is no improvement in
      # the loss for three consecutive epochs
      start = time.time()

      history = model.fit(X_train, y_train, batch_size = hyper_parameters[2],
                          epochs= epochs, callbacks=[callback], verbose = 1)

      end = time.time()
      elapsed_time = end - start

      models_history.append(history)


      # Train and test prediction
      # Predict probabilities for each class

      train_pred_probs = model.predict(X_train)
      # Threshold the probabilities to get class labels
      train_pred = (train_pred_probs > 0.5).astype(int)


      test_pred_probs = model.predict(X_test)
      # Threshold the probabilities to get class labels
      test_pred = (test_pred_probs > 0.5).astype(int)

      # Compute evaluation metrics
      accuracy = accuracy_score(y_test, test_pred)
      precision = precision_score(y_test, test_pred)
      recall = recall_score(y_test, test_pred)
      f1 = f1_score(y_test, test_pred)
      tn, fp, fn, tp = confusion_matrix(y_test, test_pred).ravel()
      sensitivity = tp / (tp + fn)
      specificity = tn / (tn + fp)

      train_predictions.append(train_pred)
      test_predictions.append(test_pred)


      accuracy_array[i] =  accuracy
      precision_array[i] = precision
      recall_array[i] = recall
      f1score_array[i] = f1
      sensitivity_array[i] = sensitivity
      specificity_array[i]=specificity
      elapsed_time_array[i] = elapsed_time

    max_index = accuracy_array.argmax()
    best_accuracy = accuracy_array[max_index]
    precision_with_best_accuracy = precision_array[max_index]
    recall_with_best_accuracy =  recall_array[max_index]
    f1score_with_best_accuracy=f1score_array[max_index]
    sensitivity_with_best_accuracy=sensitivity_array[max_index]
    specificity_with_best_accuracy=specificity_array[max_index]
    elapsed_time_with_best_accuracy = elapsed_time_array[max_index]

    train_predictions_with_best_accuracy = train_predictions[max_index]
    test_predictions_with_best_accuracy = test_predictions[max_index]

    loss_with_best_accuracy = models_history[max_index].history['loss']

    #val_loss_with_best_accuracy = models_history[max_index].history['val_loss']


    # Collecting important results
    performance_metrics =  {

                        'scores': {'accuracy': accuracy_array,
                                    'precision': precision_array,
                                    'recall': recall_array,
                                    'f1':f1score_array,
                                   'sensitivity':sensitivity_array,
                                   'specificity':specificity_array,
                                    'elapsed_time': elapsed_time_array
                                    },

                        'minimums': {'accuracy': np.min(accuracy_array),
                                      'precision': np.min(precision_array),
                                      'recall': np.min(recall_array),
                                      'f1': np.min(f1score_array),
                                      'sensitivity':np.min(sensitivity_array),
                                      'specificity': np.min(specificity_array),
                                      'elapsed_time': np.min(elapsed_time_array)
                                      },

                        'avg_scores':  {'accuracy': np.mean(accuracy_array),
                                      'precision': np.mean(precision_array),
                                      'recall': np.mean(recall_array),
                                      'f1': np.mean(f1score_array),
                                      'sensitivity':np.mean(sensitivity_array),
                                      'specificity': np.mean(specificity_array),
                                      'elapsed_time': np.mean(elapsed_time_array)
                                      },

                          'stds':     {'accuracy': np.std(accuracy_array),
                                      'precision': np.std(precision_array),
                                      'recall': np.std(recall_array),
                                      'f1': np.std(f1score_array),
                                      'sensitivity':np.std(sensitivity_array),
                                      'specificity': np.std(specificity_array),
                                      'elapsed_time': np.std(elapsed_time_array)
                                      },

                        'maximums': {'accuracy': np.max(accuracy_array),
                                      'precision': np.max(precision_array),
                                      'recall': np.max(recall_array),
                                      'f1': np.max(f1score_array),
                                      'sensitivity':np.max(sensitivity_array),
                                      'specificity': np.max(specificity_array),
                                      'elapsed_time': np.max(elapsed_time_array)
                                      }

                  }




    model_with_best_accuracy = {

                            'replicate': max_index,
                            'accuracy': best_accuracy,
                            'precision': precision_with_best_accuracy,
                            'recall': recall_with_best_accuracy,
                            'f1': f1score_with_best_accuracy,
                            'sensitivity': sensitivity_with_best_accuracy,
                            'specificity': specificity_with_best_accuracy,
                            'elapsed_time': elapsed_time_with_best_accuracy,
                            'train_predictions':train_predictions_with_best_accuracy,
                            'test_predictions': test_predictions_with_best_accuracy,
                            'loss':loss_with_best_accuracy
                             #'val_loss': val_loss_with_best_accuracy
                            }

     # Collecting hyperparameters
    hyper_parameters = {'layers': layers,
                        'model_specific_hyper_parameters': hyper_parameters,
                       'epochs': epochs,
                       'time_step':time_step,
                       'num_replicates': num_replicates,
                       'test_prop':test_prop
                        }


     #======= Collecting all the outputs together =============#
    output_dictionary = {'hyper_parameters': hyper_parameters,
                        'performance_metrics': performance_metrics,
                         'best_model': model_with_best_accuracy,
                       }

    print("Progress: All works are done successfully, congratulations!!\n")
    return output_dictionary


In [None]:
hyper_parameters= ['Adam', 0.001, 32]
layers=[8]
lstm_output= LSTM_Model(layers,
               hyper_parameters,
                        data,
               time_step = 1,
               test_prop = 0.3,
               epochs = 5,
               num_replicates = 2)
lstm_output

## **Executing Multiple LSTM Models**

In [None]:
def Multiple_LSTM_Models(hidden_layers,
                         hyper_parameters,
                         data,
                         time_step = 1,
                         test_prop = 0.2,
                         epochs = 5,
                         num_replicates = 2):

  num_models = len(hidden_layers)

  # collect all scores
  accuracy = []
  precision = []
  recall = []
  f1 = []
  sensitivity = []
  specificity = []
  elapsed_time = []

  # collect all avg scores
  avg_accuracy = []
  avg_precision = []
  avg_recall = []
  avg_f1 = []
  avg_sensitivity = []
  avg_specificity = []
  avg_elapsed_time = []

  # iteratively update the best rmse and the corresponding model
  best_accuracy = 0
  best_avg_accuracy = 0
  best_model_hidden_layers = None
  best_model_output = None

  for i in range(num_models):
    #print("Running model with hidden neurons: ", hidden_layers[i])

    #print("\n")

    #print("Best Hyper_parameters used: ", hyper_parameters[i])

    #print("\n")

    output = LSTM_Model(hidden_layers[i],
                        hyper_parameters[i],
                        data,
                        time_step,
                        test_prop,
                        epochs,
                        num_replicates)


    accuracy.append(output['performance_metrics']['scores']['accuracy'])
    precision.append(output['performance_metrics']['scores']['precision'])
    recall.append(output['performance_metrics']['scores']['recall'])
    f1.append(output['performance_metrics']['scores']['f1'])
    sensitivity.append(output['performance_metrics']['scores']['sensitivity'])
    specificity.append(output['performance_metrics']['scores']['specificity'])
    elapsed_time.append(output['performance_metrics']['scores']['elapsed_time'])

    avg_accuracy.append(output['performance_metrics']['avg_scores']['accuracy'])
    avg_precision.append(output['performance_metrics']['avg_scores']['precision'])
    avg_recall.append(output['performance_metrics']['avg_scores']['recall'])
    avg_f1.append(output['performance_metrics']['avg_scores']['f1'])
    avg_sensitivity.append(output['performance_metrics']['avg_scores']['sensitivity'])
    avg_specificity.append(output['performance_metrics']['avg_scores']['specificity'])
    avg_elapsed_time.append(output['performance_metrics']['avg_scores']['elapsed_time'])

    if avg_accuracy[i] > best_avg_accuracy:
      best_avg_accuracy = avg_accuracy[i]
      best_accuracy = output['best_model']['accuracy']
      best_model_hidden_layers = hidden_layers[i]
      best_model_output = output


  accuracy = np.array(accuracy)
  precision = np.array(precision)
  recall =  np.array(recall)
  f1 = np.array(f1)
  sensitivity = np.array(sensitivity)
  specificity =  np.array(specificity)

  # Collecting all scores

  performance_metrics = {

       'scores':  {'layers': hidden_layers,
                   'accuracy': accuracy,
                   'precision': precision,
                   'recall':recall,
                   'f1': f1,
                   'sensitivity': sensitivity,
                   'specificity':specificity,
                   'elapsed_time': elapsed_time },

       'avg_scores':  pd.DataFrame({'layers': hidden_layers,
                                    'accuracy': np.array(avg_accuracy),
                                    'precision': np.array(avg_precision),
                                    'recall':np.array(avg_recall),
                                    'f1': np.array(avg_f1),
                                    'sensitivity': np.array(avg_sensitivity),
                                    'specificity':np.array(avg_specificity),
                                    'elapsed_time':np.array(avg_elapsed_time)}),

       'stds':     pd.DataFrame({'layers': hidden_layers,
                                 'accuracy': np.std(accuracy, axis = 1),
                                 'precision': np.std(precision, axis = 1),
                                 'recall':  np.std(recall, axis = 1 ),
                                 'f1': np.std(f1, axis = 1),
                                 'sensitivity': np.std(sensitivity, axis = 1),
                                 'specificity':  np.std(specificity, axis = 1 ),
                                 'elapsed_time': np.std(elapsed_time, axis = 1 )}),

       'minimums': pd.DataFrame({'layers': hidden_layers,
                                'accuracy': np.min(accuracy, axis =1 ),
                                'precision': np.min(precision, axis= 1),
                                'recall': np.min(recall, axis =1),
                                'f1': np.min(f1, axis =1 ),
                                'sensitivity': np.min(sensitivity, axis= 1),
                                'specificity': np.min(specificity, axis =1),
                                'elapsed_time': np.min(elapsed_time, axis =1)}),

       'maximums': pd.DataFrame({'layers': hidden_layers,
                                'accuracy': np.max(accuracy, axis =1),
                                'precision': np.max(precision, axis =1),
                                'recall': np.max(recall, axis =1),
                                'f1': np.max(f1, axis =1),
                                'sensitivity': np.max(sensitivity, axis =1),
                                'specificity': np.max(specificity, axis =1),
                                'elapsed_time': np.max(elapsed_time,axis =1)})
    }


  output_dictionary = {
                     'hyper_parameters': hyper_parameters,

                      'best_avg_accuracy': best_avg_accuracy,
                      'best_accuracy': best_accuracy,
                      'best_model_hidden_layers': best_model_hidden_layers,
                      'best_model_output': best_model_output
                      }

  #Save all statistics:
  performance_metrics['avg_scores'].to_csv(output_path+'multiple_lstm_models_average_scores.csv')
  performance_metrics['stds'].to_csv(output_path+'multiple_lstm_models_stds.csv')
  performance_metrics['minimums'].to_csv(output_path+'multiple_lstm_models_minimums.csv')
  performance_metrics['maximums'].to_csv(output_path+'multiple_lstm_models_maximums.csv')


  #Save all scores in the file for future analysis
  pd.DataFrame(performance_metrics['scores']['accuracy']).to_csv(output_path+'multiple_lstm_models_all_accuracy.csv')
  pd.DataFrame(performance_metrics['scores']['precision']).to_csv(output_path+'multiple_lstm_models_all_precision.csv')
  pd.DataFrame(performance_metrics['scores']['recall']).to_csv(output_path+'multiple_lstm_models_all_recall.csv')
  pd.DataFrame(performance_metrics['scores']['f1']).to_csv(output_path+'multiple_lstm_models_all_f1.csv')
  pd.DataFrame(performance_metrics['scores']['sensitivity']).to_csv(output_path+'multiple_lstm_models_all_sensitivity.csv')
  pd.DataFrame(performance_metrics['scores']['specificity']).to_csv(output_path+'multiple_lstm_models_all_specificity.csv')

  #Save best model results
  pd.DataFrame(best_model_output['best_model']['loss']).to_csv(output_path+'best_lstm_model_loss.csv')
  pd.DataFrame(best_model_output['best_model']['train_predictions']).to_csv(output_path+'best_lstm_model_train_predictions.csv')
  pd.DataFrame(best_model_output['best_model']['test_predictions']).to_csv(output_path+'best_lstm_model_test_predictions.csv')
  pd.DataFrame(best_model_output['performance_metrics']['scores']['accuracy']).to_csv(output_path+'best_lstm_model_all_accuracy.csv')

  #writing all result in the file
  write_dic_to_file(output_dictionary,
                    output_path + "multiple_lstm_models_full_results.txt")

  #Display some key results in the screen
  print("\nBest model and its avg accuracy and maximum accuracy):\n", best_model_hidden_layers, best_avg_accuracy, best_accuracy)
  print("Hyper_parameters:\n", hyper_parameters)
  print('\nAverage scores:\n',  performance_metrics['avg_scores'])
  print('\nStandard_deviations:\n',  performance_metrics['stds'])
  print('\nMinimums:\n',  performance_metrics['minimums'])
  print('\nMaximums:\n',  performance_metrics['maximums'])
  print("Progress: All works are done successfully, congratulations!!\n")

### **Execution**

In [None]:
hidden_layers = [[4], [8], [16], [32],[64],[128]]
best_hyper_parameters   = [
                            ['Adam', 0.005, 8], #4N model
                            ['Nadam', 0.005, 16],  #8N model
                            ['Adam', 0.001, 32], #16N model
                            ['Nadam', 0.005, 32], #32N model
                            ['Nadam', 0.005, 32], #64 N model
                            ['Nadam', 0.005, 32] #128N model
                        ]

Multiple_LSTM_Models(hidden_layers,
                     best_hyper_parameters,
                     data,
                     time_step = 3,
                     test_prop = 0.3,
                     epochs = 30,
                     num_replicates = 30)

In [None]:
# Average scores:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.595448   0.937682  0.595813  0.707133     0.595813     0.591667
# 1    [8]  0.688253   0.930411  0.710467  0.800391     0.710467     0.458333
# 2   [16]  0.828047   0.936098  0.870853  0.900946     0.870853     0.385000
# 3   [32]  0.729662   0.926630  0.763607  0.835916     0.763607     0.378333
# 4   [64]  0.739648   0.926998  0.775362  0.843826     0.775362     0.370000
# 5  [128]  0.726285   0.925235  0.761192  0.834416     0.761192     0.365000

#    elapsed_time
# 0     14.633699
# 1     11.351831
# 2      7.316849
# 3      8.135706
# 4      8.827169
# 5     11.728423

# Standard_deviations:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.173160   0.023077  0.198704  0.172141     0.198704     0.169353
# 1    [8]  0.104575   0.008789  0.118313  0.082138     0.118313     0.065933
# 2   [16]  0.058481   0.009001  0.066985  0.038267     0.066985     0.096738
# 3   [32]  0.059403   0.006538  0.065291  0.042495     0.065291     0.030777
# 4   [64]  0.040763   0.005133  0.044534  0.028579     0.044534     0.027689
# 5  [128]  0.044078   0.003996  0.049811  0.031729     0.049811     0.029297

#    elapsed_time
# 0      3.337256
# 1      1.747687
# 2      1.047211
# 3      0.872982
# 4      2.390827
# 5      2.323720

# Minimums:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.193833   0.896226  0.125604  0.221277     0.125604         0.30
# 1    [8]  0.462555   0.912000  0.449275  0.603896     0.449275         0.35
# 2   [16]  0.651982   0.902913  0.685990  0.782369     0.685990         0.00
# 3   [32]  0.585903   0.912409  0.603865  0.726744     0.603865         0.30
# 4   [64]  0.634361   0.913333  0.661836  0.767507     0.661836         0.30
# 5  [128]  0.603524   0.914894  0.623188  0.741379     0.623188         0.30

#    elapsed_time
# 0     11.887335
# 1      8.421127
# 2      5.773108
# 3      6.377988
# 4      6.493367
# 5      7.768668

# Maximums:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.889868   1.000000  0.932367  0.939173     0.932367         1.00
# 1    [8]  0.867841   0.949239  0.903382  0.925743     0.903382         0.60
# 2   [16]  0.938326   0.948571  0.995169  0.967136     0.995169         0.55
# 3   [32]  0.828194   0.942105  0.874396  0.902743     0.874396         0.45
# 4   [64]  0.797357   0.934426  0.840580  0.883249     0.840580         0.40
# 5  [128]  0.797357   0.933702  0.845411  0.883838     0.845411         0.40

#    elapsed_time
# 0     23.468831
# 1     14.484608
# 2     10.707877
# 3     10.502277
# 4     18.378201
# 5     16.303485
# Progress: All works are done successfully, congratulations!!

## **GRU Model**

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
def Build_GRU_Model(layers, time_step, num_features,
                     optimizer='Adam',
                     learning_rate=0.001, verbose=1):

    model = Sequential()

    for i in range(len(layers)):
        if len(layers) == 1:
            model.add(GRU(int(layers[i]), input_shape=(time_step, num_features)))
        else:
            if i < len(layers) - 1:
                if i == 0:
                    model.add(GRU(int(layers[i]),
                                   input_shape=(time_step, num_features),
                                   return_sequences=True))
                else:
                    model.add(GRU(int(layers[i]), return_sequences=True))
            else:
                model.add(GRU(int(layers[i])))

    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'Adam':
        opt = optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'Adagrad':
        opt = optimizers.Adagrad(learning_rate=learning_rate)
    elif optimizer == 'Nadam':
        opt = optimizers.Nadam(learning_rate=learning_rate)
    elif optimizer == 'Adadelta':
        opt = optimizers.Adadelta(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = optimizers.RMSprop(learning_rate=learning_rate)
    else:
        print("No optimizer found among [Adam, Adagrad, Nadam, Adadelta,RMSprop]")

    model.compile(loss='binary_crossentropy', optimizer=opt, metrics = ['accuracy'])

    if verbose == 1:
        print(model.summary())
    return model

In [None]:
optimizers_names = ['Adam']
time_step = 1
num_features = 12
learning_rate = 0.001
verbose = 1
layers = [50,20]

Build_GRU_Model(layers,
                 time_step,
                 num_features,
                 optimizer =  optimizers_names[0],
                 learning_rate= learning_rate,
                 verbose = verbose)

In [None]:
def GRU_Hyper_Parameter_Tuning(layers, data, time_step, test_prop,
                                optimizers_names,learning_rates, batch_sizes,
                                epochs, num_replicates=2):

    X=data.drop(['business_cycle'],axis=1)
    y=np.array(data['business_cycle'])


    # Splitting train and test data
    X_train0, X_test0=sequential_split(X, test_prop)
    y_train0, y_test0=sequential_split(y, test_prop)

    num_features = X_train0.shape[1]


    # Normalizing the train and test input using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train0).reshape(X_train0.shape[0],  num_features)
    X_test_scaled = scaler.transform(X_test0).reshape(X_test0.shape[0],  num_features)


    # Creating input data
    X_train1,  y_train1 =DatasetCreation(X_train_scaled,y_train0, time_step)
    # 10% of the data equiv with 14% of train goes in validation
    v=int(len(X_train1)*0.86)
    X_train=X_train1[0:v]
    X_val=X_train1[v:]
    y_train=y_train1[0:v]
    y_val=y_train1[v:]

    X_test,  y_test = DatasetCreation(X_test_scaled, y_test0, time_step)



    # collecting metrices

    best_avg_accuracy = 0
    collect_accuracy = []

    all_avg_accuracy = np.zeros((len(optimizers_names),
                                 len(learning_rates),
                                 len(batch_sizes)))




    best_hyper_parameters = {"model": layers,
                             "optimizer": None,
                             "learning_rate": None,
                             "batch_size": None,
                             "best_avg_accuracy": None}

    for opt in range(len(optimizers_names)):

        for lr in range(len(learning_rates)):

            for batch_size in range(len(batch_sizes)):

                for i in range(num_replicates):

                    print("Running for " + optimizers_names[opt] + " optimizer "\
                           + str(learning_rates[lr]) + " learning_rate "\
                          + str(batch_sizes[batch_size]) +" batch_size and "\
                           + str(i) +" replicate " + "\n")


                    model = Build_GRU_Model(layers,
                                             time_step,
                                             num_features,
                                             optimizers_names[opt],
                                             learning_rate=learning_rates[lr],
                                             verbose=0)

                    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                                patience=5)

                    history = model.fit(X_train, y_train,
                                        batch_size=batch_sizes[batch_size],
                                        epochs=epochs,
                                        validation_data=(X_test, y_test),
                                        callbacks=[callback], verbose=1)


                    # Predict probabilities for each class
                    test_pred_probs = model.predict(X_test)

                    # Threshold the probabilities to get class labels
                    test_pred = (test_pred_probs > 0.5).astype(int)

                    # Compute evaluation metrics
                    accuracy = accuracy_score(y_test, test_pred)


                    collect_accuracy.append(accuracy)


                avg_accuracy = np.mean(np.array(collect_accuracy))
                all_avg_accuracy[opt][lr][batch_size] = avg_accuracy



                if avg_accuracy > best_avg_accuracy:
                  best_avg_accuracy = avg_accuracy
                  best_hyper_parameters = {"model": layers,
                                             "optimizer": optimizers_names[opt],
                                             "learning_rate": learning_rates[lr],
                                             "batch_size": batch_sizes[batch_size],
                                             "best_avg_accuracy": best_avg_accuracy
                                         }

    output_dictionary = {
        "best_hyper_parameters": best_hyper_parameters,
        "all_avg_accuracy": all_avg_accuracy
    }

    # writing output dictionary in the file

    file_name = output_path + "gru-" + str(layers[0]) + "N-hyperparameter_tuning__results" + ".txt"
    write_dic_to_file(output_dictionary, file_name)

    print("Best_hyper_parameters(GRU): \n", output_dictionary['best_hyper_parameters'])
    print("all_avg_accuracy(GRU): \n", output_dictionary['all_avg_accuracy'])

    return output_dictionary['best_hyper_parameters']

In [None]:
time_step = 3
optimizers_names = ['Adam', 'Nadam', 'Adagrad']
learning_rates =  [0.01,0.005, 0.001]
batch_sizes =  [8, 16,32]
epochs = 30
num_replicates = 10
test_prop = 0.3

In [None]:
layers = [4]

gru_N4_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N4_best_hyper_parameters

In [None]:
# {'model': [4],
#  'optimizer': 'Adagrad',
#  'learning_rate': 0.01,
#  'batch_size': 16,
#  'best_avg_accuracy': 0.798898678414097}

In [None]:
layers = [8]

gru_N8_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N8_best_hyper_parameters

In [None]:
# {'model': [8],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.001,
#  'batch_size': 16,
#  'best_avg_accuracy': 0.8187613371339724}

In [None]:
layers = [16]

gru_N16_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N16_best_hyper_parameters

In [None]:
# {'model': [16],
#  'optimizer': 'Adam',
#  'learning_rate': 0.001,
#  'batch_size': 16,
#  'best_avg_accuracy': 0.8414096916299559}

In [None]:
layers = [32]

gru_N32_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N32_best_hyper_parameters

In [None]:
# {'model': [32],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.001,
#  'batch_size': 32,
#  'best_avg_accuracy': 0.8633137542829173}

In [None]:
layers = [64]

gru_N64_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N64_best_hyper_parameters

In [None]:
# {'model': [64],
#  'optimizer': 'Adam',
#  'learning_rate': 0.01,
#  'batch_size': 8,
#  'best_avg_accuracy': 0.879295154185022}

In [None]:
layers = [128]

gru_N128_best_hyper_parameters = GRU_Hyper_Parameter_Tuning(layers, data,
                                                             time_step,
                                                             test_prop,
                                                             optimizers_names,
                                                             learning_rates,
                                                             batch_sizes,
                                                             epochs = epochs,
                                                             num_replicates=num_replicates)
gru_N128_best_hyper_parameters

In [None]:
# {'model': [128],
#  'optimizer': 'Nadam',
#  'learning_rate': 0.005,
#  'batch_size': 16,
#  'best_avg_accuracy': 0.8869100062932663}

In [None]:
def GRU_Model(layers,
               hyper_parameters,
               data,
               time_step = 1,
               test_prop = 0.2,
               epochs = 5,
               num_replicates = 2):

    X=data.drop(['business_cycle'],axis=1)
    y=np.array(data['business_cycle'])


    # Splitting train and test data
    X_train0, X_test0=sequential_split(X, test_prop)
    y_train0, y_test0=sequential_split(y, test_prop)

    num_features = X_train0.shape[1]


    # Normalizing the train and test input using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train0).reshape(X_train0.shape[0],
                                                            num_features)
    X_test_scaled = scaler.transform(X_test0).reshape(X_test0.shape[0],
                                                      num_features)


    # Creating input data
    X_train,  y_train =DatasetCreation(X_train_scaled,y_train0, time_step)
    X_test,  y_test = DatasetCreation(X_test_scaled, y_test0, time_step)


    # arrays for collecting test scores
    accuracy_array = np.zeros(num_replicates)
    precision_array = np.zeros(num_replicates)
    recall_array    = np.zeros(num_replicates)
    f1score_array    = np.zeros(num_replicates)
    sensitivity_array    = np.zeros(num_replicates)
    specificity_array    = np.zeros(num_replicates)
    elapsed_time_array = np.zeros(num_replicates)


    models_history = []
    train_predictions = []
    test_predictions = []

    for i in range(num_replicates):

      print("Program is running for %d replicate ----->\n" %i)

      model = Build_GRU_Model(layers, time_step, num_features,
                               optimizer = hyper_parameters[0],
                               learning_rate = hyper_parameters[1], verbose = 0)

      callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience= 5)
      # This callback will stop the training when there is no improvement in
      # the loss for three consecutive epochs
      start = time.time()

      history = model.fit(X_train, y_train, batch_size = hyper_parameters[2],
                          epochs= epochs, callbacks=[callback], verbose = 1)

      end = time.time()
      elapsed_time = end - start

      models_history.append(history)


      # Train and test prediction
      # Predict probabilities for each class

      train_pred_probs = model.predict(X_train)
      # Threshold the probabilities to get class labels
      train_pred = (train_pred_probs > 0.5).astype(int)


      test_pred_probs = model.predict(X_test)
      # Threshold the probabilities to get class labels
      test_pred = (test_pred_probs > 0.5).astype(int)

      # Compute evaluation metrics
      accuracy = accuracy_score(y_test, test_pred)
      precision = precision_score(y_test, test_pred)
      recall = recall_score(y_test, test_pred)
      f1 = f1_score(y_test, test_pred)
      tn, fp, fn, tp = confusion_matrix(y_test, test_pred).ravel()
      sensitivity = tp / (tp + fn)
      specificity = tn / (tn + fp)

      train_predictions.append(train_pred)
      test_predictions.append(test_pred)


      accuracy_array[i] =  accuracy
      precision_array[i] = precision
      recall_array[i] = recall
      f1score_array[i] = f1
      sensitivity_array[i] = sensitivity
      specificity_array[i]=specificity
      elapsed_time_array[i] = elapsed_time

    max_index = accuracy_array.argmax()
    best_accuracy = accuracy_array[max_index]
    precision_with_best_accuracy = precision_array[max_index]
    recall_with_best_accuracy =  recall_array[max_index]
    f1score_with_best_accuracy=f1score_array[max_index]
    sensitivity_with_best_accuracy=sensitivity_array[max_index]
    specificity_with_best_accuracy=specificity_array[max_index]
    elapsed_time_with_best_accuracy = elapsed_time_array[max_index]

    train_predictions_with_best_accuracy = train_predictions[max_index]
    test_predictions_with_best_accuracy = test_predictions[max_index]

    loss_with_best_accuracy = models_history[max_index].history['loss']

    #val_loss_with_best_accuracy = models_history[max_index].history['val_loss']


    # Collecting important results
    performance_metrics =  {

                        'scores': {'accuracy': accuracy_array,
                                    'precision': precision_array,
                                    'recall': recall_array,
                                    'f1':f1score_array,
                                   'sensitivity':sensitivity_array,
                                   'specificity':specificity_array,
                                    'elapsed_time': elapsed_time_array
                                    },

                        'minimums': {'accuracy': np.min(accuracy_array),
                                      'precision': np.min(precision_array),
                                      'recall': np.min(recall_array),
                                      'f1': np.min(f1score_array),
                                      'sensitivity':np.min(sensitivity_array),
                                      'specificity': np.min(specificity_array),
                                      'elapsed_time': np.min(elapsed_time_array)
                                      },

                        'avg_scores':  {'accuracy': np.mean(accuracy_array),
                                      'precision': np.mean(precision_array),
                                      'recall': np.mean(recall_array),
                                      'f1': np.mean(f1score_array),
                                      'sensitivity':np.mean(sensitivity_array),
                                      'specificity': np.mean(specificity_array),
                                      'elapsed_time': np.mean(elapsed_time_array)
                                      },

                          'stds':     {'accuracy': np.std(accuracy_array),
                                      'precision': np.std(precision_array),
                                      'recall': np.std(recall_array),
                                      'f1': np.std(f1score_array),
                                      'sensitivity':np.std(sensitivity_array),
                                      'specificity': np.std(specificity_array),
                                      'elapsed_time': np.std(elapsed_time_array)
                                      },

                        'maximums': {'accuracy': np.max(accuracy_array),
                                      'precision': np.max(precision_array),
                                      'recall': np.max(recall_array),
                                      'f1': np.max(f1score_array),
                                      'sensitivity':np.max(sensitivity_array),
                                      'specificity': np.max(specificity_array),
                                      'elapsed_time': np.max(elapsed_time_array)
                                      }

                  }




    model_with_best_accuracy = {

                            'replicate': max_index,
                            'accuracy': best_accuracy,
                            'precision': precision_with_best_accuracy,
                            'recall': recall_with_best_accuracy,
                            'f1': f1score_with_best_accuracy,
                            'sensitivity': sensitivity_with_best_accuracy,
                            'specificity': specificity_with_best_accuracy,
                            'elapsed_time': elapsed_time_with_best_accuracy,
                            'train_predictions':train_predictions_with_best_accuracy,
                            'test_predictions': test_predictions_with_best_accuracy,
                            'loss':loss_with_best_accuracy
                             #'val_loss': val_loss_with_best_accuracy
                            }

     # Collecting hyperparameters
    hyper_parameters = {'layers': layers,
                        'model_specific_hyper_parameters': hyper_parameters,
                       'epochs': epochs,
                       'time_step':time_step,
                       'num_replicates': num_replicates,
                       'test_prop':test_prop
                        }


     #======= Collecting all the outputs together =============#
    output_dictionary = {'hyper_parameters': hyper_parameters,
                        'performance_metrics': performance_metrics,
                         'best_model': model_with_best_accuracy,
                       }

    print("Progress: All works are done successfully, congratulations!!\n")
    return output_dictionary

In [None]:
hyper_parameters= ['Adam', 0.001, 32]
layers=[8]
gru_output= GRU_Model(layers,
               hyper_parameters,
                        data,
               time_step = 1,
               test_prop = 0.3,
               epochs = 5,
               num_replicates = 2)
gru_output

## **Executing Multiple GRU Models**

In [None]:
def Multiple_GRU_Models(hidden_layers,
                         hyper_parameters,
                         data,
                         time_step = 1,
                         test_prop = 0.2,
                         epochs = 5,
                         num_replicates = 2):

  num_models = len(hidden_layers)

  # collect all scores
  accuracy = []
  precision = []
  recall = []
  f1 = []
  sensitivity = []
  specificity = []
  elapsed_time = []

  # collect all avg scores
  avg_accuracy = []
  avg_precision = []
  avg_recall = []
  avg_f1 = []
  avg_sensitivity = []
  avg_specificity = []
  avg_elapsed_time = []

  # iteratively update the best rmse and the corresponding model
  best_accuracy = 0
  best_avg_accuracy = 0
  best_model_hidden_layers = None
  best_model_output = None

  for i in range(num_models):
    #print("Running model with hidden neurons: ", hidden_layers[i])

    #print("\n")

    #print("Best Hyper_parameters used: ", hyper_parameters[i])

    #print("\n")

    output = GRU_Model(hidden_layers[i],
                        hyper_parameters[i],
                        data,
                        time_step,
                        test_prop,
                        epochs,
                        num_replicates)


    accuracy.append(output['performance_metrics']['scores']['accuracy'])
    precision.append(output['performance_metrics']['scores']['precision'])
    recall.append(output['performance_metrics']['scores']['recall'])
    f1.append(output['performance_metrics']['scores']['f1'])
    sensitivity.append(output['performance_metrics']['scores']['sensitivity'])
    specificity.append(output['performance_metrics']['scores']['specificity'])
    elapsed_time.append(output['performance_metrics']['scores']['elapsed_time'])

    avg_accuracy.append(output['performance_metrics']['avg_scores']['accuracy'])
    avg_precision.append(output['performance_metrics']['avg_scores']['precision'])
    avg_recall.append(output['performance_metrics']['avg_scores']['recall'])
    avg_f1.append(output['performance_metrics']['avg_scores']['f1'])
    avg_sensitivity.append(output['performance_metrics']['avg_scores']['sensitivity'])
    avg_specificity.append(output['performance_metrics']['avg_scores']['specificity'])
    avg_elapsed_time.append(output['performance_metrics']['avg_scores']['elapsed_time'])

    if avg_accuracy[i] > best_avg_accuracy:
      best_avg_accuracy = avg_accuracy[i]
      best_accuracy = output['best_model']['accuracy']
      best_model_hidden_layers = hidden_layers[i]
      best_model_output = output


  accuracy = np.array(accuracy)
  precision = np.array(precision)
  recall =  np.array(recall)
  f1 = np.array(f1)
  sensitivity = np.array(sensitivity)
  specificity =  np.array(specificity)

  # Collecting all scores

  performance_metrics = {

       'scores':  {'layers': hidden_layers,
                   'accuracy': accuracy,
                   'precision': precision,
                   'recall':recall,
                   'f1': f1,
                   'sensitivity': sensitivity,
                   'specificity':specificity,
                   'elapsed_time': elapsed_time },

       'avg_scores':  pd.DataFrame({'layers': hidden_layers,
                                    'accuracy': np.array(avg_accuracy),
                                    'precision': np.array(avg_precision),
                                    'recall':np.array(avg_recall),
                                    'f1': np.array(avg_f1),
                                    'sensitivity': np.array(avg_sensitivity),
                                    'specificity':np.array(avg_specificity),
                                    'elapsed_time':np.array(avg_elapsed_time)}),

       'stds':     pd.DataFrame({'layers': hidden_layers,
                                 'accuracy': np.std(accuracy, axis = 1),
                                 'precision': np.std(precision, axis = 1),
                                 'recall':  np.std(recall, axis = 1 ),
                                 'f1': np.std(f1, axis = 1),
                                 'sensitivity': np.std(sensitivity, axis = 1),
                                 'specificity':  np.std(specificity, axis = 1 ),
                                 'elapsed_time': np.std(elapsed_time, axis = 1 )}),

       'minimums': pd.DataFrame({'layers': hidden_layers,
                                'accuracy': np.min(accuracy, axis =1 ),
                                'precision': np.min(precision, axis= 1),
                                'recall': np.min(recall, axis =1),
                                'f1': np.min(f1, axis =1 ),
                                'sensitivity': np.min(sensitivity, axis= 1),
                                'specificity': np.min(specificity, axis =1),
                                'elapsed_time': np.min(elapsed_time, axis =1)}),

       'maximums': pd.DataFrame({'layers': hidden_layers,
                                'accuracy': np.max(accuracy, axis =1),
                                'precision': np.max(precision, axis =1),
                                'recall': np.max(recall, axis =1),
                                'f1': np.max(f1, axis =1),
                                'sensitivity': np.max(sensitivity, axis =1),
                                'specificity': np.max(specificity, axis =1),
                                'elapsed_time': np.max(elapsed_time,axis =1)})
    }


  output_dictionary = {
                     'hyper_parameters': hyper_parameters,

                      'best_avg_accuracy': best_avg_accuracy,
                      'best_accuracy': best_accuracy,
                      'best_model_hidden_layers': best_model_hidden_layers,
                      'best_model_output': best_model_output
                      }

  #Save all statistics:
  performance_metrics['avg_scores'].to_csv(output_path+'multiple_lstm_models_average_scores.csv')
  performance_metrics['stds'].to_csv(output_path+'multiple_lstm_models_stds.csv')
  performance_metrics['minimums'].to_csv(output_path+'multiple_lstm_models_minimums.csv')
  performance_metrics['maximums'].to_csv(output_path+'multiple_lstm_models_maximums.csv')


  #Save all scores in the file for future analysis
  pd.DataFrame(performance_metrics['scores']['accuracy']).to_csv(output_path+'multiple_lstm_models_all_accuracy.csv')
  pd.DataFrame(performance_metrics['scores']['precision']).to_csv(output_path+'multiple_lstm_models_all_precision.csv')
  pd.DataFrame(performance_metrics['scores']['recall']).to_csv(output_path+'multiple_lstm_models_all_recall.csv')
  pd.DataFrame(performance_metrics['scores']['f1']).to_csv(output_path+'multiple_lstm_models_all_f1.csv')
  pd.DataFrame(performance_metrics['scores']['sensitivity']).to_csv(output_path+'multiple_lstm_models_all_sensitivity.csv')
  pd.DataFrame(performance_metrics['scores']['specificity']).to_csv(output_path+'multiple_lstm_models_all_specificity.csv')

  #Save best model results
  pd.DataFrame(best_model_output['best_model']['loss']).to_csv(output_path+'best_lstm_model_loss.csv')
  pd.DataFrame(best_model_output['best_model']['train_predictions']).to_csv(output_path+'best_lstm_model_train_predictions.csv')
  pd.DataFrame(best_model_output['best_model']['test_predictions']).to_csv(output_path+'best_lstm_model_test_predictions.csv')
  pd.DataFrame(best_model_output['performance_metrics']['scores']['accuracy']).to_csv(output_path+'best_lstm_model_all_accuracy.csv')

  #writing all result in the file
  write_dic_to_file(output_dictionary,
                    output_path + "multiple_gru_models_full_results.txt")

  #Display some key results in the screen
  print("\nBest model and its avg accuracy and maximum accuracy):\n", best_model_hidden_layers, best_avg_accuracy, best_accuracy)
  print("Hyper_parameters:\n", hyper_parameters)
  print('\nAverage scores:\n',  performance_metrics['avg_scores'])
  print('\nStandard_deviations:\n',  performance_metrics['stds'])
  print('\nMinimums:\n',  performance_metrics['minimums'])
  print('\nMaximums:\n',  performance_metrics['maximums'])
  print("Progress: All works are done successfully, congratulations!!\n")

### **Execution**

In [None]:
hidden_layers = [[4], [8], [16], [32],[64],[128]]
best_hyper_parameters   = [
                            ['Adagrad', 0.01, 16], #4N model
                            ['Nadam', 0.001, 16],  #8N model
                            ['Adam', 0.001, 16], #16N model
                            ['Nadam', 0.001, 32], #32N model
                            ['Adam', 0.01, 8], #64N model
                            ['Nadam', 0.005, 16] #64N model
                        ]

Multiple_GRU_Models(hidden_layers,
                     best_hyper_parameters,
                     data,
                     time_step = 3,
                     test_prop = 0.3,
                     epochs = 30,
                     num_replicates = 30)

In [None]:
# Average scores:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.875184   0.940483  0.923833  0.929582     0.923833     0.371667
# 1    [8]  0.767988   0.975184  0.766506  0.853753     0.766506     0.783333
# 2   [16]  0.760206   0.954501  0.774235  0.851333     0.774235     0.615000
# 3   [32]  0.765345   0.943106  0.790499  0.858299     0.790499     0.505000
# 4   [64]  0.604405   0.942125  0.604026  0.732626     0.604026     0.608333
# 5  [128]  0.586050   0.922387  0.595974  0.721866     0.595974     0.483333

#    elapsed_time
# 0      9.637760
# 1     11.699339
# 2     11.220967
# 3      8.512129
# 4     16.009488
# 5     12.927092

# Standard_deviations:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.051151   0.025895  0.074775  0.032903     0.074775     0.303503
# 1    [8]  0.087312   0.024501  0.103584  0.064410     0.103584     0.219975
# 2   [16]  0.086809   0.020980  0.098286  0.063870     0.098286     0.178022
# 3   [32]  0.060583   0.008765  0.071246  0.042052     0.071246     0.094296
# 4   [64]  0.068685   0.021430  0.081484  0.059494     0.081484     0.159208
# 5  [128]  0.060717   0.017243  0.068654  0.054826     0.068654     0.113529

#    elapsed_time
# 0      2.223549
# 1      1.835493
# 2      1.945417
# 3      1.946306
# 4      5.717080
# 5      3.333227

# Minimums:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.726872   0.905660  0.700483  0.823864     0.700483         0.00
# 1    [8]  0.533040   0.923611  0.487923  0.655844     0.487923         0.25
# 2   [16]  0.537445   0.911111  0.502415  0.664537     0.502415         0.40
# 3   [32]  0.607930   0.929936  0.608696  0.739003     0.608696         0.35
# 4   [64]  0.431718   0.913386  0.391304  0.556701     0.391304         0.45
# 5  [128]  0.387665   0.886364  0.376812  0.528814     0.376812         0.35

#    elapsed_time
# 0      6.834773
# 1      8.557289
# 2      8.331985
# 3      6.382083
# 4      7.701796
# 5      6.795640

# Maximums:
#    layers  accuracy  precision    recall        f1  sensitivity  specificity  \
# 0    [4]  0.955947   1.000000  1.000000  0.976190     1.000000         1.00
# 1    [8]  0.933921   1.000000  1.000000  0.965035     1.000000         1.00
# 2   [16]  0.885463   0.993711  0.932367  0.936893     0.932367         0.95
# 3   [32]  0.872247   0.967949  0.917874  0.929095     0.917874         0.75
# 4   [64]  0.814978   1.000000  0.850242  0.893401     0.850242         1.00
# 5  [128]  0.726872   0.991803  0.763285  0.835979     0.763285         0.95

#    elapsed_time
# 0     13.103123
# 1     13.695225
# 2     13.808754
# 3     18.143008
# 4     23.755467
# 5     23.762189
# Progress: All works are done successfully, congratulations!!