# Speaker Verification 

Import libraries

In [5]:
import pickle
import librosa
import numpy as np
from itertools import combinations, product
import random

Load the data

In [2]:
with open('hw4_trs.pkl', 'rb') as f:
    trs_data = pickle.load(f)
print(trs_data.shape)
with open('hw4_tes.pkl', 'rb') as f:
    tes_data = pickle.load(f)
print(tes_data.shape)

(500, 16180)
(200, 22631)


Process STFT

In [3]:
#train
trs_stft = []
#test
tes_stft = []

for i in range(500):
    X=librosa.stft(trs_data[i], n_fft=1024, hop_length=512)
    X=np.pad(X,((0,0),(0,45-X.shape[1])),'constant')
    trs_stft.append(np.abs(np.transpose(X)))
for i in range(200):
    T=librosa.stft(tes_data[i], n_fft=1024, hop_length=512)
    tes_stft.append(np.abs(np.transpose(T)))

Define number of pairs to consider for training

In [5]:
num_pairs = 20

Computer True pairs

In [6]:
def get_true_pairs(data):
    final_pairs = []
    for i in range(0,len(data),10):
        comb = list(combinations(data[i:i+10], 2))
        final_pairs.append(random.choices(comb, k = num_pairs))
    return final_pairs

In [7]:
trs_true_pairs = get_true_pairs(trs_stft)

In [8]:
tes_true_pairs = get_true_pairs(tes_stft)

Computer False pairs

In [9]:
def get_false_pairs(data):
    final_false = []
    for i in range(0,len(data),10):
        current = list(range(i,i+10))
        first = list(range(0,i))
        last = list(range(i+10,len(data)))
        final = first+ last
        prod = list(product(current, final))
        false_comb = (random.choices(prod, k = num_pairs))
        comb_list= []
        for comb in false_comb:
            comb_list.append((data[comb[0]],data[comb[1]]))
        final_false.append(comb_list)
        
    return final_false

In [10]:
trs_false_pairs = get_false_pairs(trs_stft)

In [11]:
tes_false_pairs = get_false_pairs(tes_stft)

Make data

In [12]:
def generate_data(true_pairs, false_pairs):
    final_true_pairs = []
    final_false_pairs = []
    true_labels = []
    false_labels = []
    for i in true_pairs:
        for j in i:
            final_true_pairs.append(j)
            true_labels.append(1)

    for i in false_pairs:
        for j in i:
            final_false_pairs.append(j)
            false_labels.append(0)
    final_labels = true_labels + false_labels
    final_data = final_true_pairs + final_false_pairs
    return final_data, final_labels

In [13]:
train_data, train_labels = generate_data(trs_true_pairs, trs_false_pairs)

In [14]:
test_data, test_labels = generate_data(tes_true_pairs, tes_false_pairs)

In [15]:
np.array(train_data).shape

(2000, 2, 45, 513)

In [16]:
from keras import models
from keras import layers
from keras.layers import Input, Dense, Lambda, MaxPool2D, Conv2D, Flatten
import keras.backend as K
from keras.models import Model,Sequential
from keras.regularizers import l2
from keras import Input
from keras import optimizers

Using TensorFlow backend.


In [17]:
baseinput1 = Input(shape=(45,513,1))
baseinput2 = Input(shape=(45,513,1))

model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3,9),activation='relu',strides=2, padding='same',input_shape=(45,513,1)))
model.add(MaxPool2D(pool_size=(2,2),strides=1, padding='valid'))
model.add(Conv2D(filters=64, kernel_size=(3,5),activation='relu',strides=2, padding='same',input_shape=(45,513,1)))
model.add(MaxPool2D(pool_size=(2,2),strides=1, padding='valid'))
model.add(Conv2D(filters=64, kernel_size=(1,3),activation='relu',strides=2, padding='same',input_shape=(45,513,1)))
model.add(MaxPool2D(pool_size=(2,2),strides=1, padding='valid'))
model.add(Conv2D(filters=64, kernel_size=(1,1),activation='relu',strides=2, padding='same',input_shape=(45,513,1)))
model.add(MaxPool2D(pool_size=(2,2),strides=1, padding='valid'))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))

baseoutput1= model(baseinput1)
baseoutput2 = model(baseinput2)

print("output1",baseoutput1)
print("output2",baseoutput2)
# distance=Lambda(lambda tensors:K.abs(tensors[0]-tensors[1]))([output1,output2])
# print(distance.shape)



output1 Tensor("sequential_1/dense_1/Relu:0", shape=(?, 1024), dtype=float32)
output2 Tensor("sequential_1_1/dense_1/Relu:0", shape=(?, 1024), dtype=float32)


In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 23, 257, 64)       1792      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 22, 256, 64)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 128, 64)       61504     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 127, 64)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 5, 64, 64)         12352     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 4, 63, 64)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 2, 32, 64)        

In [19]:
def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)

distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([baseoutput1, baseoutput2])

Siamese_model = Model([baseinput1, baseinput2], distance)


def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    return K.mean(y_true * K.square(y_pred) +
                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def acc(a, b):
    return K.mean(K.equal(a, K.cast(b<0.5, a.dtype)))
    
Siamese_model.compile(loss=contrastive_loss,optimizer=optimizers.Adam(0.0001), metrics = [acc])

In [20]:
#final_layer=Dense(1,activation='sigmoid')#(distance)
#Siamese_model=Model(inputs=[first,second],outputs=final_layer)
# Siamese_model.compile(loss='binary_crossentropy',optimizer=optimizers.Adam(0.0001), metrics = ['accuracy'])

In [21]:
Siamese_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 45, 513, 1)   0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 45, 513, 1)   0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 1024)         2112448     input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 1)            0           sequential_1[1][0]         

In [22]:
Siamese_model.fit([np.expand_dims(np.array(train_data)[:,0],-1),np.expand_dims(np.array(train_data)[:,1],-1)],np.array(train_labels),\
                  batch_size=10,epochs=20,\
                  validation_data = ([np.expand_dims(np.array(test_data)[:,0],-1),np.expand_dims(np.array(test_data)[:,1],-1)],\
                 np.array(test_labels)))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 2000 samples, validate on 800 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x219916d1dd8>

In [23]:
predictions = Siamese_model.predict([np.expand_dims(np.array(test_data)[:,0],-1),np.expand_dims(np.array(test_data)[:,1],-1)])

In [26]:
len(predictions)
count = 0
for x,y in zip(predictions,test_labels):
    temp = 0
    print(x)
    if x < 0.5:
        temp=1
    if temp==y:
        count+=1
        

[0.40129486]
[0.22594629]
[0.3529852]
[0.17945689]
[0.23034655]
[0.4260963]
[0.39400625]
[0.18588144]
[0.25167125]
[0.3243593]
[0.41715217]
[0.3349954]
[0.5077506]
[0.41287428]
[0.3673569]
[0.18099289]
[0.22594629]
[0.5454541]
[0.5077506]
[0.40129486]
[0.20886666]
[0.67007023]
[0.19873625]
[0.22744939]
[0.35943824]
[0.55953217]
[0.29373392]
[0.5921633]
[0.32292774]
[0.33193043]
[0.43245068]
[0.198351]
[0.7841445]
[0.61699903]
[0.22744939]
[0.73344487]
[0.7841445]
[0.7290576]
[0.7290576]
[0.1971048]
[0.45370954]
[0.45370954]
[0.24057868]
[0.56011426]
[1.0946196]
[1.0231754]
[0.44992316]
[0.4165552]
[0.58732975]
[0.86318344]
[1.328255]
[0.67197114]
[0.7233881]
[0.8191278]
[0.33402947]
[0.29884386]
[0.20754388]
[0.9160494]
[0.5604217]
[1.0231754]
[0.39592913]
[0.28678387]
[0.3524798]
[0.27409324]
[0.29375604]
[0.38393608]
[0.22755039]
[0.38393608]
[0.41156653]
[0.41156653]
[0.39451241]
[0.28004685]
[0.24317922]
[0.41156653]
[0.2710624]
[0.29375604]
[0.2731707]
[0.2710624]
[0.35394278]
[0.

In [25]:
count/len(test_labels)

0.68375