## Speaker Verification using Siamese Network

In [None]:
#load the Data
import numpy as np
import pickle

with open('trs.pkl', 'rb') as file:
    training_data = pickle.load(file)

with open('tes.pkl', 'rb') as file:
    testing_data = pickle.load(file)

In [None]:
import random
import librosa
import itertools
import random

In [None]:
#Data Processing
training_data[1].shape
L=45

In [None]:
def apply_stft(pair):
  stft_pair = []
  for utterance in pair:
    stft = librosa.stft(utterance, n_fft=1024, hop_length=512)
    stft_pair.append(tf.transpose(tf.abs(stft)))
  return stft_pair

In [None]:
def generate_pairs_from_training_data(training_data, L=45):

    all_positive_pairs = []
    all_negative_pairs = []
    num_speakers = len(training_data) // 10

    for speaker_index in range(num_speakers):

        speaker_utterances = training_data[speaker_index * 10 : (speaker_index + 1) * 10]
        positive_pairs = random.sample(list(itertools.combinations(speaker_utterances, 2)), L)
        all_positive_pairs.extend(positive_pairs)
        other_speakers_utterances = [training_data[i] for i in range(len(training_data))
                                     if i // 10 != speaker_index]
        negative_pairs = random.sample(list(itertools.product(speaker_utterances, other_speakers_utterances)), L)
        all_negative_pairs.extend(negative_pairs)

    all_positive_pairs_stft = [apply_stft(pair) for pair in all_positive_pairs]
    all_negative_pairs_stft = [apply_stft(pair) for pair in all_negative_pairs]

    combined_pairs_stft = all_positive_pairs_stft + all_negative_pairs_stft
    labels = np.array([1] * len(all_positive_pairs_stft) + [0] * len(all_negative_pairs_stft))

    return np.array(combined_pairs_stft), labels

In [None]:
X_trian, y_train=generate_pairs_from_training_data(training_data)

In [None]:
X_trian.shape

(4500, 2, 32, 513)

In [None]:
input_shape=(None,513)
gru_units=128

In [None]:
def create_siamese_network_with_gru(input_shape, gru_units=128):


    input1 = tf.keras.Input(shape=input_shape)
    input2 = tf.keras.Input(shape=input_shape)
    output_1 = tf.keras.layers.GRU(128)(input1)
    output_2 = tf.keras.layers.GRU(128)(input2)
    similarity = tf.keras.layers.Dot(axes=-1, normalize=True)([output_1, output_2])

    # Output layer with sigmoid activation for binary classification
    output = tf.keras.layers.Dense(1, activation="sigmoid")(similarity)

    siamese_model = tf.keras.Model(inputs=[input1, input2], outputs=output)
    return siamese_model

In [None]:
model_1=create_siamese_network_with_gru(input_shape,128)

In [None]:
model_1.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model_1.fit(x=[np.array(X_trian)[:, 0], np.array(X_trian)[:, 1]],
          y=y_train, batch_size=64, epochs=100)

Epoch 1/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.5117 - loss: 0.6947
Epoch 2/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6633 - loss: 0.6465
Epoch 3/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7159 - loss: 0.5762
Epoch 4/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7996 - loss: 0.5118
Epoch 5/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8233 - loss: 0.4758
Epoch 6/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8538 - loss: 0.4368
Epoch 7/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8740 - loss: 0.4158
Epoch 8/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8844 - loss: 0.3930
Epoch 9/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7cc5fa1757e0>

In [None]:
#Creating Test dataset by creating positive pairs.
x_test, y_test=generate_pairs_from_training_data(testing_data)

In [None]:
res = model_1.evaluate([np.array(x_test)[:, 0], np.array(x_test)[:, 1]], y_test)

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6624 - loss: 0.7851


In [None]:
print("Test accuracy:", res[1] * 100, "%")

Test accuracy: 74.8888909816742 %


The Test accracy is well over 70% ~ 74% while using the created siamese network