# SETTING UP

### Import libraries

In [65]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16) 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler #to scale data 
import warnings
import logging

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

2.6.4


### Set seed for reproducibility

In [66]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Suppress warnings

In [67]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)

tf.get_logger().setLevel(logging.ERROR)
tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

### Importing data

In [68]:
dataset_dir = '/kaggle/input/training-datasethomework2/training_dataset_homework2'

In [69]:
X_train = np.load(os.path.join(dataset_dir,'x_train.npy'))
Y_train = np.load(os.path.join(dataset_dir,'y_train.npy'))

In [70]:
classes_dict = {0: "Wish",
          1: "Another",
          2: "Comfortably",
          3: "Money",
          4: "Breathe",
          5: "Time",
          6: "Brain",
          7: "Echoes",
          8: "Wearing",
          9: "Sorrow",
          10: "Hey", 
          11: "Shine"
          }

classes_dict.values()

classes_labels = list(classes_dict.values())

Y_train_labels=[]

for i in Y_train:
    Y_train_labels.append(classes_labels[Y_train[i]])

In [71]:
feat_dict = {0: 'FEAT_1',
               1: 'FEAT_2',
               2: 'FEAT_3',
               3: 'FEAT_4',
               4: 'FEAT_5',
               5: 'FEAT_6'}

feat_labels = list(feat_dict.values())

### Splitting data

In [72]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, 
                                                  test_size = 0.2, 
                                                  random_state = seed,
                                                  stratify = Y_train)

y_train = tfk.utils.to_categorical(y_train)
y_val = tfk.utils.to_categorical(y_val)

print('Shapes:')
print('x_train: ', x_train.shape)
print('x_val: ', x_val.shape)
print('y_train: ', y_train.shape)
print('y_val: ', y_val.shape)

Shapes:
x_train:  (1943, 36, 6)
x_val:  (486, 36, 6)
y_train:  (1943, 12)
y_val:  (486, 12)


# DATA PREPROCESSING

Insert a preprocessing routine from the notebook HW2_preprocessing.ipynb 

In [75]:
#scaling along features
from sklearn.preprocessing import MinMaxScaler,StandardScaler,RobustScaler

scale_columns = feat_labels
scaler = RobustScaler() #could be replaced by MinMaxScaler


x_train_new = scaler.fit_transform(x_train.reshape(-1, x_train.shape[-1])).reshape(x_train.shape)
x_val_new = scaler.transform(x_val.reshape(-1, x_val.shape[-1])).reshape(x_val.shape)

print('Output shape:')
x_train_new.shape, x_val_new.shape

Output shape:


((1943, 36, 6), (486, 36, 6))

In [76]:
x_train = x_train_new
x_val = x_val_new

# BUILD AND TRAIN 

### Keras tuner
doc: https://keras.io/guides/keras_tuner/getting_started/

In [77]:
input_shape = x_train.shape[1:]
classes = y_train.shape[-1]
batch_size = 128
epochs = 500

In [78]:
import keras_tuner

In [79]:
def build_BiLSTM_classifier(hp):
    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    # Feature extractor
    units_lstm_1 = hp.Choice("units_lstm_1", values=[512])
    x = tfkl.Bidirectional(tfkl.LSTM(units_lstm_1, kernel_regularizer = tfk.regularizers.L2(l2=1e-2), return_sequences=True))(input_layer)
    
    drop_rate_1 = hp.Choice("drop_rate_1", values=[0.5])
    x = tfkl.Dropout(drop_rate_1, seed=seed)(x)
    
    units_lstm_2 = hp.Choice("units_lstm_2", values=[512])
    x = tfkl.Bidirectional(tfkl.LSTM(units_lstm_2, kernel_regularizer = tfk.regularizers.L2(l2=1e-2)))(x)
    
    drop_rate_2 = hp.Choice("drop_rate_2", values=[0.5])
    x = tfkl.Dropout(drop_rate_2, seed=seed)(x)

    # Classifier
    x = tfkl.Dense(units=hp.Int(f"units_{i}", min_value=32, max_value=64, step=32), 
                   activation='leaky_relu', 
                   kernel_initializer = tfk.initializers.HeUniform(seed),
                   kernel_regularizer = tfk.regularizers.L2(l2=1e-2))(x)
    
    drop_rate_3 = hp.Choice("drop_rate_3", values=[0.0])
    x = tfkl.Dropout(drop_rate_3, seed=seed)(x)
    
    output_layer = tfkl.Dense(classes, 
                              activation='softmax',
                              kernel_initializer = tfk.initializers.GlorotUniform(seed),
                              kernel_regularizer = tfk.regularizers.L2(l2=1e-2))(x)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    learning_rate = hp.Float("lr", min_value=5*1e-5, max_value=1e-3, sampling="log")
    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate), metrics='accuracy')

    # Return the model
    return model

In [80]:
build_BiLSTM_classifier(keras_tuner.HyperParameters())

<keras.engine.functional.Functional at 0x7fb4c008cd90>

In [81]:
output_dir = '/kaggle/working/'

In [82]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_BiLSTM_classifier,
    objective="val_accuracy",
    max_trials=7,
    executions_per_trial=1,
    overwrite=True,
    directory= output_dir,
    project_name="BiLSTM",
)

tuner.search_space_summary()

Search space summary
Default search space size: 7
units_lstm_1 (Choice)
{'default': 512, 'conditions': [], 'values': [512], 'ordered': True}
drop_rate_1 (Choice)
{'default': 0.5, 'conditions': [], 'values': [0.5], 'ordered': True}
units_lstm_2 (Choice)
{'default': 512, 'conditions': [], 'values': [512], 'ordered': True}
drop_rate_2 (Choice)
{'default': 0.5, 'conditions': [], 'values': [0.5], 'ordered': True}
units_11 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 64, 'step': 32, 'sampling': None}
drop_rate_3 (Choice)
{'default': 0.0, 'conditions': [], 'values': [0.0], 'ordered': True}
lr (Float)
{'default': 5e-05, 'conditions': [], 'min_value': 5e-05, 'max_value': 0.001, 'step': None, 'sampling': 'log'}


In [83]:
tuner.search(x_train, y_train, epochs=40, validation_data=(x_val, y_val))

Trial 7 Complete [00h 01m 41s]
val_accuracy: 0.6563786268234253

Best val_accuracy So Far: 0.6913580298423767
Total elapsed time: 00h 14m 04s


In [84]:
tuner.results_summary()

Results summary
Results in /kaggle/working/BiLSTM
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7fb4c21a2910>
Trial summary
Hyperparameters:
units_lstm_1: 512
drop_rate_1: 0.5
units_lstm_2: 512
drop_rate_2: 0.5
units_11: 32
drop_rate_3: 0.0
lr: 0.0002248851200803347
Score: 0.6913580298423767
Trial summary
Hyperparameters:
units_lstm_1: 512
drop_rate_1: 0.5
units_lstm_2: 512
drop_rate_2: 0.5
units_11: 32
drop_rate_3: 0.0
lr: 8.331157447910707e-05
Score: 0.6687242984771729
Trial summary
Hyperparameters:
units_lstm_1: 512
drop_rate_1: 0.5
units_lstm_2: 512
drop_rate_2: 0.5
units_11: 64
drop_rate_3: 0.0
lr: 0.00045597049261032974
Score: 0.6666666865348816
Trial summary
Hyperparameters:
units_lstm_1: 512
drop_rate_1: 0.5
units_lstm_2: 512
drop_rate_2: 0.5
units_11: 32
drop_rate_3: 0.0
lr: 0.00028774062434606615
Score: 0.6625514626502991
Trial summary
Hyperparameters:
units_lstm_1: 512
drop_rate_1: 0.5
units_lstm_2: 512
drop_rate_2: 0.5
units_11: 64
drop_rate_3: 

### Final choice

In [87]:
best_hps = tuner.get_best_hyperparameters(5)
model = build_BiLSTM_classifier(best_hps[0])
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           [(None, 36, 6)]           0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 36, 1024)          2125824   
_________________________________________________________________
dropout_6 (Dropout)          (None, 36, 1024)          0         
_________________________________________________________________
bidirectional_5 (Bidirection (None, 1024)              6295552   
_________________________________________________________________
dropout_7 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                32800     
_________________________________________________________________
dropout_8 (Dropout)          (None, 32)                0     

In [88]:
# Train the model
history = model.fit(
    x = x_train,
    y = y_train,
    validation_data = (x_val, y_val),
    #class_weight = class_weights,
    batch_size = batch_size,
    epochs = epochs,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=80, restore_best_weights=True)]
).history

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

KeyboardInterrupt: 

### Performance

In [None]:
#plot the LOSS
best_epoch = np.argmax(history['val_accuracy'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Categorical Crossentropy')
plt.legend()
plt.grid(alpha=.3)
plt.show()

#plot the ACCURACY
plt.figure(figsize=(17,4))
plt.plot(history['accuracy'], label='Training accuracy', alpha=.8, color='#ff7f0e')
plt.plot(history['val_accuracy'], label='Validation accuracy', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Accuracy')
plt.legend()
plt.grid(alpha=.3)
plt.show()


In [None]:
# Predict the test set with the LSTM
predictions = model.predict(x_val)
predictions.shape

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))

# Compute the classification metrics
accuracy = accuracy_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm.T, cmap='Blues', xticklabels=list(classes_dict.values()), yticklabels=list(classes_dict.values()), annot = True)
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

#Print the classification report
print(classification_report(np.argmax(y_val, axis=-1), np.argmax(predictions, axis=-1), target_names=classes_dict.values()))