In [None]:
import os
from glob import glob
import random

import pandas as pd
from tqdm.notebook import tqdm
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import joblib

import librosa

import warnings
warnings.filterwarnings('ignore') # to silence librosa warnings

from sklearn.model_selection import train_test_split

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])
    except RuntimeError as e:
        print(e)

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dense, Lambda, Dropout, GlobalAveragePooling1D, GlobalMaxPooling1D, Concatenate, LeakyReLU
from tensorflow.keras.optimizers.legacy import Adam, RMSprop
from tensorflow.keras import backend as K
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.utils import plot_model

tqdm.pandas()

import seaborn as sns

from triplet_dataset import TripletDataset

%load_ext dotenv
%dotenv

## Dataset

In [None]:
def plot_spec(spec: np.ndarray):
    plt.figure(figsize=(10, 2))

    librosa.display.specshow(spec, y_axis='mel')
    plt.colorbar()

    plt.tight_layout()
    plt.show()

In [None]:
# use data generated from script generate training data
X, y = joblib.load("data_n2.joblib")

print(f"X.shape: {X.shape}")
print(f"y.shape: {y.shape}")

In [None]:
#Check input data
print(f"Label: {y[0]}")
plot_spec(X[0][0])
plot_spec(X[0][1])

print(f"Label: {y[-7]}")
plot_spec(X[-7][0])
plot_spec(X[-7][1])

## Build model

In [None]:
input_shape = X.shape[2:]
input_shape

In [None]:
class GlobalL2Pooling1D(tf.keras.layers.Layer):
    def call(self, inputs):
        return tf.sqrt(tf.reduce_sum(tf.square(inputs), axis=1))

leaky_relu_layer = LeakyReLU(alpha=0.01)

def euclidean_distance(embeddings):
    x, y = embeddings
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def eucl_dist_output_shape(shapes):
    shape1, _ = shapes
    return (shape1[0], 1)

def contrastive_loss_with_margin(margin):
    def contrastive_loss(y_true, y_pred):
        square_pred = K.square(y_pred)
        margin_square = K.square(K.maximum(margin - y_pred, 0))
        return (y_true * square_pred + (1 - y_true) * margin_square)
    return contrastive_loss

# Base network is from:
# (1) Recommending music on Spotify with deep learning. Sander Dieleman. https://sander.ai/2014/08/05/spotify-cnns.html (accessed 2024-03-23).
def build_base_network(input_shape):
    inputs = Input(shape=input_shape)

    x = Conv1D(filters=256, kernel_size=4, activation=leaky_relu_layer)(inputs)
    x = MaxPooling1D(pool_size=4)(x)

    x = Conv1D(filters=256, kernel_size=4, activation=leaky_relu_layer)(x)
    x = MaxPooling1D(pool_size=2)(x)

    x = Conv1D(filters=512, kernel_size=4, activation=leaky_relu_layer)(x)

    # global temporal pooling
    mean_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    l2_pool = GlobalL2Pooling1D()(x)

    pooled_features = Concatenate()([mean_pool, max_pool, l2_pool])

    x = Dense(2048, activation=leaky_relu_layer)(pooled_features)
    x = Dense(2048, activation=leaky_relu_layer)(x)

    outputs = Dense(40)(x)

    return Model(inputs, outputs)

def build_siamese_network(base_network, input_shape):
    input_left = Input(shape=input_shape, name="input_left")
    input_right = Input(shape=input_shape, name="input_right")

    embeddings_left = base_network(input_left)
    embeddings_right = base_network(input_right)

    outputs = Lambda(euclidean_distance, name='output_layer',
        output_shape=eucl_dist_output_shape)([embeddings_left, embeddings_right])
    
    siamese_network = Model(inputs=[input_left, input_right], outputs=outputs)

    return siamese_network

base_network = build_base_network(input_shape)
model = build_siamese_network(base_network, input_shape)

model.compile(optimizer=Adam(learning_rate=0.00001), loss=contrastive_loss_with_margin(margin=1.0))

model.summary()

In [None]:
base_network.summary()

## Train

In [None]:
X_train, X_test, y_train, y_test, = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=0)

# free up memory
del X
del y

In [None]:
input_shape

In [None]:
history = model.fit(
    [X_train[:,0], X_train[:,1]],
    y_train,
    epochs=50,
    batch_size=32,
    validation_data=([X_test[:,0], X_test[:,1]], y_test),
)

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
train_dists = model.predict([X_train[:,0], X_train[:,1]])
test_dists = model.predict([X_test[:,0], X_test[:,1]])

In [None]:
plt.subplot(2, 1, 1)
plt.title("Predicted distances Train")
sns.histplot(train_dists.ravel())
plt.xlim(0.0, 2.0)

plt.subplot(2, 1, 2)
plt.title("Predicted distances Test")
sns.histplot(test_dists.ravel())
plt.xlim(0.0, 2.0)

plt.tight_layout()
plt.show()

In [None]:
def compute_accuracy(y_true, y_pred):
    return np.mean(y_pred == y_true)

y_pred_train = (train_dists < np.mean(test_dists)).ravel().astype(int)
train_accuracy = compute_accuracy(y_train, y_pred_train)

y_pred_val = (test_dists < np.mean(test_dists)).ravel().astype(int)
val_accuracy = compute_accuracy(y_test, y_pred_val)

print("Train Accuracy = {} Val accuracy = {}".format(train_accuracy, val_accuracy))

In [None]:
from sklearn.metrics import roc_curve, confusion_matrix, ConfusionMatrixDisplay

# ROC curve
fpr, tpr, thresholds = roc_curve(y_test, 1 - test_dists)

plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve')
plt.plot([0, 1], [0, 1], color='gray', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_val)

disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()

In [None]:
model.save('siamese_model_n2.h5')