#Imports and Configurations

In [14]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/Audio_Final_Project/Siamese_Network

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[Errno 2] No such file or directory: 'drive/MyDrive/Audio_Final_Project/Siamese_Network'
/content/drive/MyDrive/Audio_Final_Project/Siamese_Network


In [15]:
import pickle
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization,\
    Dropout, Activation, Lambda, concatenate
from tensorflow.keras.models import Model, Sequential

#Siamese Architecture

In [16]:
def build_conv_branch(name, input_shape):
        model = Sequential(name=name)
        # Layer 1
        model.add(Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform', input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        # Layer 2
        model.add(Conv2D(256, (3, 3), padding='same', kernel_initializer='he_uniform'))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        # Layer 3
        model.add(Conv2D(128, (3, 3), padding='same', kernel_initializer='he_uniform'))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        return model

def flatten_conv(x, dropout_rate):
    x = Flatten()(x)
    x = Dense(256, kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)

    x = Dense(256, kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)

    x = Dense(128, kernel_initializer='he_uniform')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)

    return x

def siamese_network(input_shape, loss_method, dropout_rate=0.2):
    if loss_method == 'triplet':
        input_anchor = Input(input_shape)
        input_positive = Input(input_shape)
        input_negative = Input(input_shape)

        model_inputs = [input_anchor, input_positive, input_negative]

        processed_anchor = build_conv_branch(name="triplet_branch", input_shape=input_shape)
        processed_positive = build_conv_branch(name="", input_shape=input_shape)
        processed_negative = build_conv_branch(name="", input_shape=input_shape)

        output_anchor = flatten_conv(processed_anchor(input_anchor), dropout_rate=dropout_rate)
        output_positive = flatten_conv(processed_positive(input_positive), dropout_rate=dropout_rate)
        output_negative = flatten_conv(processed_negative(input_negative), dropout_rate=dropout_rate)

        model_output = concatenate([output_anchor, output_positive, output_negative])

    else:
        input_a = Input(shape=input_shape)
        input_b = Input(shape=input_shape)
        model_inputs = [input_a, input_b]

        processed_a = build_conv_branch(name="contrast_branch", input_shape=input_shape)
        processed_b = build_conv_branch(name="", input_shape=input_shape)

        output_a = flatten_conv(processed_a(input_a), dropout_rate=dropout_rate)
        output_b = flatten_conv(processed_b(input_b), dropout_rate=dropout_rate)

        model_output = Lambda(lambda embeddings: K.abs(embeddings[0] - embeddings[1]))([output_a, output_b])

    model = Model(inputs=model_inputs, outputs=model_output)

    return model

#Loss Metrics

In [39]:
@tf.function
def contrastive_loss(y_true, y_pred, margin=1):
    square_pred = tf.square(y_pred)
    margin_square = tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)

@tf.function
def triplet_loss(y_true, y_pred, alpha=0.001):
    length = y_pred.shape[1]
    divided_size = int(length / 3)

    anchor = y_pred[:, :divided_size]
    positive = y_pred[:, divided_size:divided_size * 2]
    negative = y_pred[:, divided_size * 2:]
    pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=1)
    neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=1)

    basic_loss = pos_dist - neg_dist + alpha
    loss = tf.maximum(basic_loss, 0.0)

    return loss

#Models Fit Function

In [42]:
def fit_and_save_model(datasets, input_shape, batch_size, loss_method, epochs, dtype):
    assert dtype in ['30_sec', '3_sec'], "dtype must be '30_sec' or '3_sec'"

    loss_metric = triplet_loss if loss_method == 'triplet' else contrastive_loss
    X_train, X_test, y_train, y_test = datasets
    model = siamese_network(input_shape, loss_method)
    print("Model is created, starting train process ...")
    opt = tf.compat.v1.train.AdamOptimizer(learning_rate=0.00001)
    model.compile(optimizer=opt, loss=loss_metric)
    early_stopping = EarlyStopping(monitor='val_loss', patience=20, verbose=1, mode='min', restore_best_weights=True)
    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        batch_size=batch_size, epochs=epochs, verbose=1,
                        callbacks=[early_stopping])

    anchor_branch_layer = model.get_layer(f'{loss_method}_branch')
    weights_path = f'{dtype}/{loss_method}_branch.h5'
    anchor_branch_layer.save_weights(weights_path)

    pickle.dump(history.history, open(f'{dtype}/{loss_method}_history.pkl', 'wb'))

    return model

#Data Preprocessing

In [19]:
def load_dataset(loss_method, dataset_paths, is_train_process=True):
    if loss_method == 'triplet':
        if is_train_process:
            train_anchors = np.load(dataset_paths['train_anchors'])
            train_positives = np.load(dataset_paths['train_positives'])
            train_negatives = np.load(dataset_paths['train_negatives'])

            val_anchors = np.load(dataset_paths['val_anchors'])
            val_positives = np.load(dataset_paths['val_positives'])
            val_negatives = np.load(dataset_paths['val_negatives'])

            input_shape = train_anchors.shape[1:]
            y_train = np.zeros((train_anchors.shape[0],))
            y_val = np.zeros((val_anchors.shape[0],))

            X_train = [train_anchors, train_positives, train_negatives]
            X_val = [val_anchors, val_positives, val_negatives]

        else:
            val_anchors = np.load(dataset_paths['test_anchors'])
            val_positives = np.load(dataset_paths['test_positives'])
            val_negatives = np.load(dataset_paths['test_negatives'])

            input_shape = val_anchors.shape[1:]
            y_val = np.zeros((val_anchors.shape[0],))
            X_val = [val_anchors, val_positives, val_negatives]
            X_train, y_train = [], []

    else:
        if is_train_process:
            X_train = np.load(dataset_paths['X_train'])
            y_train = np.load(dataset_paths['y_train'])
            X_val = np.load(dataset_paths['X_val'])
            y_val = np.load(dataset_paths['y_val'])

            input_shape = X_train.shape[2:]
            X_train = [X_train[:, 0], X_train[:, 1]]
            X_val = [X_val[:, 0], X_val[:, 1]]

        else:
            X_val = np.load(dataset_paths['X_test'])
            y_val = np.load(dataset_paths['y_test'])

            input_shape = X_val.shape[2:]
            X_val = [X_val[:, 0], X_val[:, 1]]
            X_train, y_train = [], []

    return X_train, X_val, y_train, y_val, input_shape


#30 Seconds Files

In [20]:
triplets_path = '../datasets/30_sec_datasets/triplets_dataset'
dataset_paths_triplet = {f"{dataset}_{type}": f"{triplets_path}/{type}_{dataset}.npy"
                            for dataset in ['train', 'val', 'test'] for type in ['anchors', 'positives', 'negatives']}

pairs_path = '../datasets/30_sec_datasets/pairs_dataset/'
dataset_paths_pairs = {f"{d_type}_{dataset}": f"{pairs_path}{d_type}_{dataset}_pairs.npy" for dataset in ['train', 'val', 'test'] for d_type in ['X', 'y']}

##Triplet Loss Model

In [24]:
X_train, X_test, y_train, y_test, input_shape = load_dataset('triplet', dataset_paths_triplet, is_train_process=True)
datasets = [X_train, X_test, y_train, y_test]
print("Datasets are loaded")

Datasets are loaded


In [32]:
train_indices = np.arange(X_train[0].shape[0])
np.random.shuffle(train_indices)
X_train_shuffled = [X_train[i][train_indices] for i in range(len(X_train))]
y_train_shuffled = y_train[train_indices]

validation_indices = np.arange(X_test[0].shape[0])
np.random.shuffle(validation_indices)
X_validation_shuffled = [X_test[i][validation_indices] for i in range(len(X_test))]
y_validation_shuffled = y_test[validation_indices]
datasets = [X_train_shuffled, X_validation_shuffled, y_train_shuffled, y_validation_shuffled]

In [44]:
triplet_model = fit_and_save_model(datasets, input_shape, batch_size=4, loss_method='triplet', epochs=100, dtype='30_sec')

Model is created, starting train process ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [None]:
triplet_model = fit_and_save_model(datasets, input_shape, batch_size=64, loss_method='triplet', epochs=100, dtype='30_sec')

Model is created, starting train process ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 22: early stopping


In [None]:
_, X_test, _, y_test, input_shape = load_dataset('triplet', dataset_paths_triplet, is_train_process=False)
predictions = triplet_model.predict(X_test)

##Contrastive Loss Model

In [21]:
X_train, X_test, y_train, y_test, input_shape = load_dataset('contrast', dataset_paths_pairs, is_train_process=True)
datasets = [X_train, X_test, y_train, y_test]
print("Datasets are loaded", input_shape)

Datasets are loaded (231, 232, 3)


In [22]:
contrast_model = fit_and_save_model(datasets, input_shape, batch_size=32, loss_method='contrast', epochs=100, dtype='30_sec')

Model is created, starting train process ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [23]:
_, X_test, _, y_test, input_shape = load_dataset('contrast', dataset_paths_pairs, is_train_process=False)
predictions = contrast_model.predict(X_test)



#3 Seconds Files

In [None]:
triplets_path = '../datasets/3_sec_datasets/triplets_dataset'
dataset_paths_triplet = {f"{dataset}_{type}": f"{triplets_path}/{type}_{dataset}.npy"
                            for dataset in ['train', 'val', 'test'] for type in ['anchors', 'positives', 'negatives']}

pairs_path = '../datasets/3_sec_datasets/pairs_dataset/'
dataset_paths_pairs = {f"{d_type}_{dataset}": f"{pairs_path}{d_type}_{dataset}_pairs.npy" for dataset in ['train', 'val', 'test'] for d_type in ['X', 'y']}

##Triplet Loss Model

In [None]:
X_train, X_test, y_train, y_test, input_shape = load_dataset('triplet', dataset_paths_triplet, is_train_process=True)
datasets = [X_train, X_test, y_train, y_test]
print("Datasets are loaded", input_shape)

Datasets are loaded (6387, 231, 232, 3)


In [None]:
triplet_model = fit_and_save_model(datasets, input_shape, batch_size=64, loss_method='triplet', epochs=100, dtype='3_sec')

Model is created, starting train process ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 22: early stopping


In [None]:
_, X_test, _, y_test, input_shape = load_dataset('triplet', dataset_paths_triplet, is_train_process=False)
predictions = triplet_model.predict(X_test)

In [None]:
X_test.shape

##Contrastive Loss Model

In [None]:
X_train, X_test, y_train, y_test, input_shape = load_dataset('contrast', dataset_paths_pairs, is_train_process=True)
datasets = [X_train, X_test, y_train, y_test]
print("Datasets are loaded", input_shape)

Datasets are loaded (231, 232, 3)


In [None]:
contrast_model = fit_and_save_model(datasets, input_shape, batch_size=64, loss_method='contrast', epochs=100, dtype='3_sec')

Model is created, starting train process ...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 40: early stopping


In [None]:
X_train[0].shape

(6387, 231, 232, 3)

In [None]:
_, X_test, _, y_test, input_shape = load_dataset('contrast', dataset_paths_pairs, is_train_process=False)
predictions = contrast_model.predict(X_test)

