# SETTING UP

### Import libraries

In [89]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16) 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import MinMaxScaler #to scale data 
import warnings
import logging

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

2.6.4


### Set seed for reproducibility

In [90]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Suppress warnings

In [91]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
tf.get_logger().setLevel('INFO')
tf.autograph.set_verbosity(0)

tf.get_logger().setLevel(logging.ERROR)
tf.get_logger().setLevel('ERROR')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

### Importing data

In [92]:
dataset_dir = '/kaggle/input/training-datasethomework2/training_dataset_homework2'

In [93]:
X_train = np.load(os.path.join(dataset_dir,'x_train.npy'))
Y_train = np.load(os.path.join(dataset_dir,'y_train.npy'))

In [94]:
classes_dict = {0: "Wish",
          1: "Another",
          2: "Comfortably",
          3: "Money",
          4: "Breathe",
          5: "Time",
          6: "Brain",
          7: "Echoes",
          8: "Wearing",
          9: "Sorrow",
          10: "Hey", 
          11: "Shine"
          }

classes_dict.values()

classes_labels = list(classes_dict.values())

Y_train_labels=[]

for i in Y_train:
    Y_train_labels.append(classes_labels[Y_train[i]])

In [95]:
feat_dict = {0: 'FEAT_1',
               1: 'FEAT_2',
               2: 'FEAT_3',
               3: 'FEAT_4',
               4: 'FEAT_5',
               5: 'FEAT_6'}

feat_labels = list(feat_dict.values())

### Splitting data

In [96]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, 
                                                  test_size = 0.2, 
                                                  random_state = seed,
                                                  stratify = Y_train)

#y_train = tfk.utils.to_categorical(y_train)
#y_val = tfk.utils.to_categorical(y_val)

print('Shapes:')
print('x_train: ', x_train.shape)
print('x_val: ', x_val.shape)
print('y_train: ', y_train.shape)
print('y_val: ', y_val.shape)

Shapes:
x_train:  (1943, 36, 6)
x_val:  (486, 36, 6)
y_train:  (1943,)
y_val:  (486,)


# DATA PREPROCESSING

- Insert a preprocessing routine from the notebook HW2_preprocessing.ipynb 
- In this case the preferred standardization seems to be the one along timesteps

In [97]:
#scaling along features
from sklearn.preprocessing import MinMaxScaler,StandardScaler,RobustScaler

scaler = RobustScaler() #could be replaced

x_train_new = scaler.fit_transform(x_train.reshape(-1, x_train.shape[-1])).reshape(x_train.shape)
x_val_new = scaler.transform(x_val.reshape(-1, x_val.shape[-1])).reshape(x_val.shape)

print('Output shape:')
x_train_new.shape, x_val_new.shape

Output shape:


((1943, 36, 6), (486, 36, 6))

In [98]:
x_train = x_train_new
x_val = x_val_new

# DATA AUGMENTATION
Insert augmentation routine from HW2_augmentation&oversampling.ipynb

**!!!   implementation is slightly modified to manage integer encoded labels   !!!**

In [99]:
!pip install tsaug

[0m

In [100]:
#sample a random fraction of data
np.random.seed(seed)

#implementation is slightly changed to manage integer encoded labels
def random_sample(X,Y, rate):
    indexes = np.random.choice(X.shape[0], size=int(np.floor(rate*X.shape[0])), replace=False)
    return X[indexes,:,:], Y[indexes]

x_sampled, y_sampled = random_sample(x_train, y_train, 0.8)

print('Sampled shapes:')
print(x_sampled.shape)
print(y_sampled.shape)

Sampled shapes:
(1554, 36, 6)
(1554,)


In [101]:
#feeding the correct shape of y to the augmenter
y_sampled = tfk.utils.to_categorical(y_sampled)

y_sampled_new = np.reshape(y_sampled, newshape = (y_sampled.shape[0],1,12))
y_sampled_new = np.repeat(y_sampled_new,36,axis=1)

y_sampled_new = np.argmax(y_sampled_new,axis=-1)
y_sampled_new.shape

(1554, 36)

In [102]:
#augmenter...
from tsaug import AddNoise, Dropout, Convolve, Pool

augmenter = (
    AddNoise(scale=0.01)
    + Dropout(p=0.05)
    + Convolve(window="flattop", size=10)
    + Pool(size = 1)
)

#...augment data
x_aug, y_aug = augmenter.augment(x_sampled, y_sampled_new)

In [103]:
#going back to the original shape of y to be fed to model.fit()
y_aug_new = np.reshape(np.unique(y_aug,axis=-1) , newshape = (y_aug.shape[0]))

print('Shapes:')
print(x_aug.shape)
print(y_aug_new.shape)

Shapes:
(1554, 36, 6)
(1554,)


In [104]:
#add the augmented data to the training set
x_train = np.concatenate((x_train, x_aug))
y_train = np.concatenate((y_train, y_aug_new))

print('Shapes:')
print(x_train.shape)
print(y_train.shape)

Shapes:
(3497, 36, 6)
(3497,)


# BALANCE OUR PREDICTION

Choose a method to deal with class_imbalance from HW2_augmentation&oversampling.ipynb
- no deal (often better)
- set class_weight when fitting the model
- oversampling (over_under_sampling to be implemented)

In [105]:
from sklearn.utils import class_weight
 
class_weights = class_weight.compute_class_weight(
                'balanced',
                classes = np.unique(Y_train), 
                y = Y_train)

class_weights = dict(enumerate(class_weights))  

# BUILD AND TRAIN 

In [106]:
input_shape = x_train.shape[1:]
classes = 12
batch_size = 128
epochs = 500

### Transformers
docs: 
- https://keras.io/examples/timeseries/timeseries_classification_transformer/
- https://www.google.com/url?client=internal-element-cse&cx=005305742766141300566:jc9gtvxee8o&q=https://keras.io/api/layers/attention_layers/multi_head_attention&sa=U&ved=2ahUKEwjp1OiU1vT7AhW__7sIHdrlAjIQFnoECAEQAQ&usg=AOvVaw2ekNi0W91dvAvvbbXfyH2e
- https://arxiv.org/pdf/1706.03762.pdf

### Tuning the model

In [107]:
import keras_tuner

In [108]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = tfkl.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    x = tfkl.Dropout(dropout)(x)
    x = tfkl.LayerNormalization(epsilon=1e-5)(x)
    
    res = x + inputs

    # Feed Forward Part
    x = tfkl.Conv1D(filters=ff_dim, 
                    kernel_size=3, 
                    padding = 'same',
                    kernel_regularizer = tfk.regularizers.L2(l2=1e-2),
                    activation="relu")(res)
    x = tfkl.Dropout(dropout)(x)
    x = tfkl.Conv1D(filters=ff_dim, 
                    kernel_size=3,
                    padding = 'same',
                    kernel_regularizer = tfk.regularizers.L2(l2=1e-2),
                    activation="relu")(x)
    x = tfkl.Dropout(dropout)(x)
    x = tfkl.Conv1D(filters=ff_dim, 
                    kernel_size=3, 
                    padding = 'same',
                    kernel_regularizer = tfk.regularizers.L2(l2=1e-2),
                    activation="relu")(res)
    x = tfkl.Dropout(dropout)(x)
    
    x = tfkl.Conv1D(filters=inputs.shape[-1], kernel_size=1,activation="relu")(x)
    x = tfkl.LayerNormalization(epsilon=1e-6)(x)
    
    return x + res

In [109]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    mlp_dropout=0,
    dropout=0
):
    inputs = tfk.Input(shape=input_shape)
    x = inputs
    
    
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = tfkl.GlobalAveragePooling1D(data_format='channels_last')(x)
    
    for dim in mlp_units:
        x = tfkl.Dense(dim, activation="leaky_relu",
                       kernel_initializer = tfk.initializers.HeUniform(seed),
                       kernel_regularizer = tfk.regularizers.L2(l2=1e-2))(x)
        x = tfkl.Dropout(mlp_dropout)(x)
    
    outputs = tfkl.Dense(classes, 
                         activation="softmax", 
                         kernel_initializer = tfk.initializers.GlorotUniform(seed),
                         kernel_regularizer = tfk.regularizers.L2(l2=1e-2),
                         name = 'output_layer')(x)
    return tfk.Model(inputs, outputs)

In [110]:
def final_build(hp):

    num_heads=hp.Choice("num_head", values = [3,6,12])
    
    model = build_model(
        input_shape,
        head_size=int(36/num_heads),
        num_heads=num_heads,
        ff_dim=hp.Choice("ff_dim", values=[64,128]),
        num_transformer_blocks=hp.Choice("num_tr_blocks", values=[6,12]),
        mlp_units=[32],
        mlp_dropout=0,
        dropout=hp.Choice("dropout_feat_extr", values=[0.25, 0.5])
    )

    learning_rate = hp.Float("lr", min_value=5e-4, max_value=1e-2, sampling="log")
    
    model.compile(loss=tfk.losses.SparseCategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate), metrics='sparse_categorical_accuracy')
    
    return model

In [111]:
final_build(keras_tuner.HyperParameters())

<keras.engine.functional.Functional at 0x7f18e8c28bd0>

In [112]:
output_dir = '/kaggle/working/'

In [113]:
tuner = keras_tuner.RandomSearch(
    hypermodel=final_build,
    objective="val_sparse_categorical_accuracy",
    max_trials=10,
    executions_per_trial=1,
    overwrite=True,
    directory= output_dir,
    project_name="Transformer",
)

tuner.search_space_summary()

Search space summary
Default search space size: 5
num_head (Choice)
{'default': 3, 'conditions': [], 'values': [3, 6, 12], 'ordered': True}
ff_dim (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128], 'ordered': True}
num_tr_blocks (Choice)
{'default': 6, 'conditions': [], 'values': [6, 12], 'ordered': True}
dropout_feat_extr (Choice)
{'default': 0.25, 'conditions': [], 'values': [0.25, 0.5], 'ordered': True}
lr (Float)
{'default': 0.0005, 'conditions': [], 'min_value': 0.0005, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [None]:
tuner.search(x_train, y_train, 
             epochs=40, 
             validation_data=(x_val, y_val), 
             #class_weight = class_weights
            )

Trial 4 Complete [00h 02m 28s]
val_sparse_categorical_accuracy: 0.5843621492385864

Best val_sparse_categorical_accuracy So Far: 0.6625514626502991
Total elapsed time: 00h 11m 15s

Search: Running Trial #5

Value             |Best Value So Far |Hyperparameter
3                 |3                 |num_head
128               |64                |ff_dim
6                 |12                |num_tr_blocks
0.5               |0.25              |dropout_feat_extr
0.0011917         |0.0014423         |lr

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
 11/110 [==>...........................] - ETA: 2s - loss: 1.7736 - sparse_categorical_

In [None]:
tuner.results_summary()

### Final choice

In [None]:
best_hps = tuner.get_best_hyperparameters(5)
model = final_build(best_hps[0])
model.summary()

In [None]:
# Train the model
history = model.fit(
    x = x_train,
    y = y_train,
    validation_data = (x_val, y_val),
    #class_weight = class_weights,
    batch_size = batch_size,
    epochs = epochs,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', mode='max', patience=80, restore_best_weights=True)]
).history

#### Performance

In [None]:
#plot the LOSS
best_epoch = np.argmax(history['val_sparse_categorical_accuracy'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Categorical Crossentropy')
plt.legend()
plt.grid(alpha=.3)
plt.show()

#plot the ACCURACY
plt.figure(figsize=(17,4))
plt.plot(history['sparse_categorical_accuracy'], label='Training accuracy', alpha=.8, color='#ff7f0e')
plt.plot(history['val_sparse_categorical_accuracy'], label='Validation accuracy', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Accuracy')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
# Predict the test set with the LSTM
predictions = model.predict(x_val)
predictions.shape

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(y_val, np.argmax(predictions, axis=-1))

# Compute the classification metrics
accuracy = accuracy_score(y_val, np.argmax(predictions, axis=-1))
precision = precision_score(y_val, np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(y_val, np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(y_val, np.argmax(predictions, axis=-1), average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm.T, cmap='Blues', xticklabels=list(classes_dict.values()), yticklabels=list(classes_dict.values()), annot = True)
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

#Print the classification report
print(classification_report(y_val, np.argmax(predictions, axis=-1), target_names=classes_dict.values()))