[View in Colaboratory](https://colab.research.google.com/github/selimelawwa/Speaker_Verification/blob/master/Testing_f1_equal_samples.ipynb)

In [0]:
#@title
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
import tensorflow as tf
tf.test.gpu_device_name()

In [0]:
!pip install tqdm
!pip install librosa
!pip install imbalanced-learn
!pip install pydub
!pip install python_speech_features

In [0]:
import numpy as np
import os
from sklearn.metrics.pairwise import cosine_similarity
from keras.models import load_model,Model
import numpy as np
#data specs
feature_dim_1 = 80  #time
feature_dim_2 = 40  #frequency
channel = 1

#numpy
numpy_files_path = "drive/big-numpy-data/"
numpy_test_path = "drive/big-numpy-test/"

def get_labels(path):
    #get folder name 'label' for each speaker
    labels = os.listdir(path)
    return labels

In [0]:
def test_model(model,test_numpy_files_path,threshold):
    labels = get_labels(test_numpy_files_path)
    threshold_distance = threshold
    overall_accuracy_with_fake_data=0 #Accuracy when testing with user data
    overall_accuracy_with_true_data=0   #Accuracy when testing with fake data 
    overall_precision = 0
    overall_recall = 0
    overall_f1 = 0 
    for label in labels:
        #Load user numpy files
        user_files = np.load(test_numpy_files_path + label)
        #Creat user embeddings(enrollment)
        user_embs = model.predict(user_files[0].reshape(1,user_files[0].shape[0], user_files[0].shape[1], 1))
        for i in range(1,5):
            user_embs = user_embs + model.predict(user_files[i].reshape(1,user_files[i].shape[0], user_files[i].shape[1], 1))
        user_embs = user_embs / 5
        #Test against the user numpy files (Should get cosine distance near 1.0 and allow)
        honest_test_cases_count = 0     #number of test cases with user data
        fake_test_cases_count = 0
        true_negative=0 
        accuracy_with_fake_data=0
        true_positive = 0
        false_negative = 0
        false_positive = 0
        accuracy_with_user_data = 0
        precision_of_user = 0
        recall_of_user = 0
        for i in range(5,user_files.shape[0]):
            honest_test_cases_count+=1     #increment number of test cases with user data
            calculated_embs = model.predict(user_files[i].reshape(1,user_files[i].shape[0], user_files[i].shape[1], 1))
            #get cosine distance between user saved embeddings and test case embeddings
            distance = cosine_similarity(user_embs,calculated_embs)
            #if distance >= threshold_distance will accept this test case
            if distance >= threshold_distance:
                #correctly_allowed +=1
                true_positive +=1
            else:
              false_negative +=1
            #else it will be in correctly dis-allowed
        accuracy_with_user_data = true_positive / honest_test_cases_count
        
        #Now will test user against all other users ( should get cosine distance close to 0 and dont allow)
        fake_users_list = labels.copy()#this list will contain all users in dataset excep current enrolled user
        fake_users_list.remove(label)#remove enrolled user from list of fake users
        
        test_fake_users_index = np.arange(len(fake_users_list))
        np.random.shuffle(test_fake_users_index)
        
        for i in range(honest_test_cases_count):
            if i < len(test_fake_users_index):
                target_index = test_fake_users_index[i]
            else:
                target_index = test_fake_users_index[i%len(test_fake_users_index)]
            fake_user_files = np.load(test_numpy_files_path + fake_users_list[target_index])
            target_file_index = np.random.randint(low=0,high=fake_user_files.shape[0])
            calculated_embs = model.predict(fake_user_files[target_file_index].reshape(1,fake_user_files[target_file_index].shape[0], fake_user_files[target_file_index].shape[1], 1))
            distance = cosine_similarity(user_embs,calculated_embs)
            fake_test_cases_count+=1
            #if distance < threshold then we will not accept this case
            if distance < threshold_distance:
                true_negative +=1
            else:
              false_positive +=1
        accuracy_with_fake_data = true_negative / fake_test_cases_count
        
        precision_of_user = true_positive / (true_positive + false_positive)
        recall_of_user = true_positive / (true_positive + false_negative)
        f1_user = 2 * (precision_of_user * recall_of_user ) / (precision_of_user + recall_of_user )
        
        overall_precision += precision_of_user
        overall_recall += recall_of_user
        overall_f1 += f1_user
        overall_accuracy_with_fake_data += accuracy_with_fake_data
        overall_accuracy_with_true_data += accuracy_with_user_data
    overall_precision = overall_precision / len(labels)
    overall_recall = overall_recall / len(labels)
    overall_f1 = overall_f1 / len(labels)
    overall_accuracy_with_true_data = overall_accuracy_with_true_data / len(labels)
    overall_accuracy_with_fake_data = overall_accuracy_with_fake_data / len(labels)

    return overall_precision,overall_recall,overall_f1,overall_accuracy_with_true_data,overall_accuracy_with_fake_data



In [0]:
#Loading pre-trained model
loaded_model = load_model('drive/model_21June.h5')
#Removing last layer and creating updated model
inp = loaded_model.input
out = loaded_model.layers[-3].output
#Crearing new model which is old model with final layer removed
model = Model(inp, out)

In [24]:
overall_precision,overall_recall,overall_f1,overall_accuracy_with_true_data,overall_accuracy_with_fake_data = test_model(model=model,test_numpy_files_path=numpy_test_path,threshold=0.7)
print("Testing with threshold: 0.7")
print("overall_accuracy_with_true_data",overall_accuracy_with_true_data,"overall_accuracy_with_fake_data",overall_accuracy_with_fake_data)
print("overall_precision; ",overall_precision," overall_recall: ",overall_recall)
print("overall_f1:",overall_f1)

Testing with threshold: 0.7
overall_accuracy_with_true_data 0.7054980252252031 overall_accuracy_with_fake_data 0.9808118430143105
overall_precision;  0.9715896167670776  overall_recall:  0.7054980252252031
overall_f1: 0.7922697463077286


In [25]:
overall_precision,overall_recall,overall_f1,overall_accuracy_with_true_data,overall_accuracy_with_fake_data = test_model(model=model,test_numpy_files_path=numpy_test_path,threshold=0.65)
print("Testing with threshold: 0.65")
print("overall_accuracy_with_true_data",overall_accuracy_with_true_data,"overall_accuracy_with_fake_data",overall_accuracy_with_fake_data)
print("overall_precision; ",overall_precision," overall_recall: ",overall_recall)
print("overall_f1:",overall_f1)

Testing with threshold: 0.65
overall_accuracy_with_true_data 0.7992825653326177 overall_accuracy_with_fake_data 0.9596852796698138
overall_precision;  0.9515833054305678  overall_recall:  0.7992825653326177
overall_f1: 0.8542308780849049


In [26]:
overall_precision,overall_recall,overall_f1,overall_accuracy_with_true_data,overall_accuracy_with_fake_data = test_model(model=model,test_numpy_files_path=numpy_test_path,threshold=0.6)
print("Testing with threshold: 0.6")
print("overall_accuracy_with_true_data",overall_accuracy_with_true_data,"overall_accuracy_with_fake_data",overall_accuracy_with_fake_data)
print("overall_precision; ",overall_precision," overall_recall: ",overall_recall)
print("overall_f1:",overall_f1)

Testing with threshold: 0.6
overall_accuracy_with_true_data 0.8667682438024137 overall_accuracy_with_fake_data 0.931732157530077
overall_precision;  0.9266325268978536  overall_recall:  0.8667682438024137
overall_f1: 0.8885180944478699
