# Recurrent Neural Network
This file traines a recurrent neural network on the melspectogram features

In [1]:
import json
import yaml
from pathlib import Path
from lib_util import utils, plot


import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping 

from keras.layers import (
    Input,
    GlobalAvgPool1D,
    Dense,
    Bidirectional,
    GRU,
    Dropout,
)
from keras.models import Model, load_model
from keras.optimizers import Adam

from tensorflow.python.ops import math_ops
from tensorflow.python.framework import ops
from tensorflow.python.keras import backend as K
from tensorflow.python.ops import clip_ops

%load_ext autoreload
%autoreload 2
%matplotlib inline

2023-01-12 23:27:20.838999: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Define crossentropy and accuracy metric for the training routine

For the metric we use a binary accuracy, for the loss a binary cross-entropy loss

In [2]:
def custom_binary_accuracy(y_true, y_pred, threshold=0.5):
    """Calculating the accuracy for the model where the treshold is 0.5 and the result will be between 0 and 1 
    """
    threshold = math_ops.cast(threshold, y_pred.dtype)
    y_pred = math_ops.cast(y_pred > threshold, y_pred.dtype)
    y_true = math_ops.cast(y_true > threshold, y_true.dtype)

    return K.mean(math_ops.equal(y_true, y_pred), axis=-1)


def custom_binary_crossentropy(y_true, y_pred):
    """Calculating the cross entropy from probabilities where the result will be between 0 and 1
    """
    y_pred = ops.convert_to_tensor(y_pred)
    y_true = math_ops.cast(y_true, y_pred.dtype)
    epsilon_ = K._constant_to_tensor(K.epsilon(), y_pred.dtype.base_dtype)
    output = clip_ops.clip_by_value(y_pred, epsilon_, 1.0 - epsilon_)

    # Compute cross entropy from probabilities.
    bce = 4 * y_true * math_ops.log(output + K.epsilon())
    bce += (1 - y_true) * math_ops.log(1 - output + K.epsilon())
    return K.sum(-bce, axis=-1)

# Define the model

define the RNN model structure 

In [3]:
def rnn_model(model_config, n_classes):
    """RNN model where the model will be trained on the training data
    """
    d_model = model_config['d_model']
    n_layers = model_config['n_layers']
    init_lr = model_config['init_learning_rate']
    dropout_rate = model_config['dropout_rate']
    activations = model_config['activations']
    inp = Input((None, d_model))
    #Bidirectional means having a neural network in both directions backwards
    x = Bidirectional(GRU(d_model, return_sequences=True))(inp)
    # Making different bidirectional layers 
    if n_classes > 1:
        for i in range(n_layers - 3):
            x = Bidirectional(GRU(d_model, return_sequences=True))(x)

    x = Dropout(dropout_rate)(x)
    x = GlobalAvgPool1D()(x)
    x = Dense(4 * n_classes, activation=activations[0])(x)
    out = Dense(n_classes, activation=activations[1])(x)
    model = Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=Adam(init_lr), loss=custom_binary_crossentropy, metrics=[custom_binary_accuracy]
    )

    model.summary()

    return model

# Main routine

Load the configuration for this model

In [6]:
config = utils.get_config('rnn')

# Extract the values
model_name = config['model_name']
batch_size = config['batch_size']
epochs = config['epochs']
data_dir = Path(config['data_dir'])

# Load preprocessed data

Load the numpy arrays and the label-class mapping. Split the data accordingly into train, validation and test set

In [8]:
melspec_data = utils.load_sliced_numpy_array('melspec_features', data_dir=data_dir)
labels = np.load(data_dir/'labels.npy')

labels_to_id = utils.get_class_mapping()

mel_train, mel_test_val, lab_train, lab_test_val = train_test_split(melspec_data, labels, train_size=config['train_set_size'], random_state=config['random_state'])
mel_val, mel_test, lab_val, lab_test             = train_test_split(mel_test_val, lab_test_val, test_size=(config['val_set_size']/(1-config['train_set_size'])), shuffle=False)

# Check the shapes of the splitted sets
assert mel_train.shape[0] == lab_train.shape[0] and mel_test.shape[0] == lab_test.shape[0] and mel_val.shape[0] == lab_val.shape[0]
assert mel_train.shape[1] == mel_test.shape[1] == mel_val.shape[1] and lab_train.shape[1] == lab_test.shape[1] == lab_val.shape[1]

Loaded 12 files:
    melspec_features_001.npy
    melspec_features_002.npy
    melspec_features_003.npy
    melspec_features_007.npy
    melspec_features_012.npy
    melspec_features_006.npy
    melspec_features_010.npy
    melspec_features_004.npy
    melspec_features_005.npy
    melspec_features_011.npy
    melspec_features_008.npy
    melspec_features_009.npy


# Set up the model callbacks

For initializing parameters, refer to the config file. For callbacks, we use a frequently backup of the model as checkpoints, and early stopping mechanism to prevent overfitting on the train data and a learning rate reducer. The learning rate reducer smallers the update step when the validation metric does not improve anymore

In [9]:
train_config = config['training']

checkpoint = ModelCheckpoint(
    model_name,
    monitor=train_config['monitor'],
    verbose=1,
    save_best_only=train_config['save_best_weights'],
    mode=train_config['monitor_mode'],
    save_weights_only=False
)

early_stopping = EarlyStopping(
    monitor = 'val_loss',
    patience = 10,
    verbose = 1,
    mode = 'min',
    restore_best_weights = True,
    start_from_epoch = 5
)

# Reduce learning rate when val_loss stopps improving
lr_reduce_config = train_config['lr_reducing']
lr_reducing_on_platteau = ReduceLROnPlateau(
    monitor=lr_reduce_config['monitor'], patience=lr_reduce_config['patience'], min_lr=lr_reduce_config['min_lr'], mode=lr_reduce_config['mode']
    )

# Train routine

In [None]:
# read in the config
pp_config = utils.get_config('preprocess')

n_mels = pp_config['melspectogram']['n_mels']
mel_train = mel_train.reshape(mel_train.shape[0], -1, n_mels)
mel_val   = mel_val.reshape(mel_val.shape[0], -1, n_mels)

model = rnn_model(config['model_structure'], n_classes=len(labels_to_id))

history = model.fit(
        x=mel_train,
        y=lab_train,
        validation_data=(mel_val, lab_val),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[checkpoint, lr_reducing_on_platteau, early_stopping],
        use_multiprocessing=True,
        verbose=2
    )


In [None]:
to_dump = {
    'model': model,
    'history': history,
    'config': config
}

utils.save_training(to_dump, model_name)

In [None]:
history = utils.load_history(model_name)

#Plots for the accuracies and losses of the train and validation data per epoch
plot.plot_hist(history, ('accuracy', 'val_accuracy'), legends=('train', 'validation'), title='Accuracy', y_label='accuracy ->', x_label='epochs ->', save_to=f'Plots/short_chunk_cnn_{epochs}_acuracy')
plot.plot_hist(history, ('loss', 'val_loss'), legends=('train', 'validation'), title='Loss', y_label='loss ->', x_label='epochs ->', save_to=f'Plots/short_chunk_cnn_{epochs}_loss')

In [None]:
predict = lambda model, data: np.argmax(model.predict(data), axis=-1)

#Loading in the model
model = utils.load_model(model_name)

# Training prediction
y_pred_train = predict(model, mel_train)
y_true_train = np.argmax(lab_train, axis= -1)
print(f"ACCURACY FOR TRAIN SET {accuracy_score(y_true_train, y_pred_train)*100:.4f} %")
print(f"MACRO F1 SCORE FOR TRAIN SET {f1_score(y_true_train, y_pred_train, average='macro')*100:.4f} %")
print(f"MICRO F1 SCORE FOR TRAIN SET {f1_score(y_true_train, y_pred_train, average='micro')*100:.4f} %")


# Validation prediction
y_pred_val = predict(model, mel_val)
y_true_val = np.argmax(lab_val, axis= -1)
print(f"ACCURACY FOR VAL SET {accuracy_score(y_true_val, y_pred_val)*100:.4f} %")
print(f"MACRO F1 SCORE FOR VAL SET {f1_score(y_true_val, y_pred_val, average='macro')*100:.4f} %")
print(f"MICRO F1 SCORE FOR VAL SET {f1_score(y_true_val, y_pred_val, average='micro')*100:.4f} %")

# Test prediction
y_pred_test = predict(model, mel_test)
y_true_test = np.argmax(lab_test, axis= -1)
print(f"ACCURACY FOR TEST SET {accuracy_score(y_true_test, y_pred_test)*100:.4f} %")
print(f"MACRO F1 SCORE FOR TEST SET {f1_score(y_true_test, y_pred_test, average='macro')*100:.4f} %")
print(f"MICRO F1 SCORE FOR TEST SET {f1_score(y_true_test, y_pred_test, average='micro')*100:.4f} %")

In [None]:
class_names = utils.get_class_names()

#Confusion matrix of the predicted labels versus the true labels
conf_mat = confusion_matrix(y_true_test, y_pred_test, normalize= 'true')
conf_mat = np.round(conf_mat, 2)

conf_mat_df = pd.DataFrame(conf_mat, columns=class_names, index=class_names)

plot_conf_mat(conf_mat_df, save_to=f"Plots/{model_name}{epochs}_test_conf_mat.png")