# Benchmarking ASVs

In [1]:
from IPython.display import display, HTML
from sklearn.metrics import roc_curve
import pandas as pd
import numpy as np
import pickle
import os

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib

In [2]:
pd.options.display.float_format = '{:,.4f}'.format

### Parameters

- nets ['arch/vxxx']: comma-separated list of ASVs to test
- tars [None, 1.0, 0.1]: comma-separated list of false acceptance levels to test (None stands for EER level)
- pols ['avg', 'any']: comma-separated list of verification policies to test
- thrs_types [None, 'avg', 'any']: comma-separated list of thresholds to select across policies (None stands for EER level)  

In [15]:
nets = ['xvector/v000']

In [16]:
tars = [None, 1.0]

In [17]:
pols = ['avg', 'any']

In [18]:
thrs_types = [None]

### Verification Performance

In [19]:
def tuneThreshold(scores, labels, target_fa=None):
    far, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
    frr = 1 - tpr
    frr = frr*100
    far = far*100
    if target_fa:
        idx = np.nanargmin(np.absolute((target_fa - far))) 
        return thresholds[idx], far[idx], frr[idx]
    idxE = np.nanargmin(np.absolute((frr - far)))
    eer  = max(far[idxE], frr[idxE])
    return thresholds[idxE], far[idxE], frr[idxE]

In [20]:
def count_far(targets, similarities, thr):
    fars = 0
    count = 0
    for t, s in zip(targets, similarities):
        if t == 0:
            if s >= thr:
                fars += 1
            count += 1
    return fars / count * 100

In [21]:
def count_frr(targets, similarities, thr):
    frrs = 0
    count = 0
    for t, s in zip(targets, similarities):
        if t == 1:
            if s < thr:
                frrs += 1
            count += 1
    return frrs / count * 100

In [22]:
vox1_test_results = {}
for net in nets:
    vox1_test_results[net] = pd.read_csv(os.path.join('../data/pt_models', net, 'test_vox1_sv_test.csv'))
    vox1_test_results[net] = vox1_test_results[net].loc[:, ~vox1_test_results[net].columns.str.contains('^Unnamed')]
    vox1_test_results[net].columns = ['label', 'score']

In [23]:
train_history_results = {}
for net in nets:
    train_history_results[net] = pd.read_csv(os.path.join('../data/pt_models', net, 'history.csv'))
    train_history_results[net] = train_history_results[net].loc[:, ~train_history_results[net].columns.str.contains('^Unnamed')]

In [24]:
def groupScores(scores, labels, thrs_type, size=8):
    if thrs_type is None:
        return scores, labels
    func = np.mean if thrs_type == 'avg' else np.max
    grp_scores, grp_labels = [], []
    for i in range(0, len(scores), size):
        curr_scores = scores[i:i+size]
        grp_scores.append(func(curr_scores[1::2]))
        grp_labels.append(0)
        grp_scores.append(func(curr_scores[0::2]))
        grp_labels.append(1)
    return grp_scores, grp_labels

In [25]:
ress = {}
thrs = {}
for thrs_type in thrs_types:
    ress[thrs_type] = {}
    thrs[thrs_type] = {}
    for tar in tars:
        ress[thrs_type][tar] = {}
        thrs[thrs_type][tar] = {}
        for net in nets:
            loss = train_history_results[net]['loss'].values[-1]
            acc = train_history_results[net]['acc'].values[-1]
            if thrs_type is None:
                thr, far, frr = tuneThreshold(vox1_test_results[net]['score'].values, vox1_test_results[net]['label'].values, tar)
                thrs[thrs_type][tar][net] = thr
                ress[thrs_type][tar][net] = [np.mean([far, frr]), far, frr, thr, len(vox1_test_results[net].index), loss, acc]
            else:
                grp_scores, grp_labels = groupScores(vox1_test_results[net]['score'].values, vox1_test_results[net]['label'].values, thrs_type)
                thr = thrs[None][tar][net]
                far = count_far(grp_labels, grp_scores, thr)
                frr = count_frr(grp_labels, grp_scores, thr)
                ress[thrs_type][tar][net] = [np.mean([far, frr]), far, frr, thr, len(vox1_test_results[net].index), loss, acc]

In [26]:
for thrs_type in thrs_types:
    for tar in tars:
        tar_label = (thrs_type if thrs_type is not None else 'raw') + '  ' + ('FAR@'+str(tar) if tar is not None else 'EER')
        df = pd.DataFrame.from_dict(ress[thrs_type][tar], orient='index', columns=['eer', 'far', 'frr', 'thr', 'no-trials', 'loss', 'acc'])
        df.columns = pd.MultiIndex.from_tuples([(tar_label,'eer'), (tar_label,'far'), (tar_label,'frr'), (tar_label, 'thr'), (tar_label, 'no-trials'), (tar_label, 'loss'), (tar_label, 'acc')])
        df.style.set_properties(**{'width':'10em', 'text-align':'center'})
        df.sort_index(inplace=True)
        display(HTML(df.to_html()))
        print()

Unnamed: 0_level_0,raw EER,raw EER,raw EER,raw EER,raw EER,raw EER,raw EER
Unnamed: 0_level_1,eer,far,frr,thr,no-trials,loss,acc
xvector/v000,12.5133,12.5133,12.5133,0.4682,37720,0.1421,0.9923





Unnamed: 0_level_0,raw FAR@1.0,raw FAR@1.0,raw FAR@1.0,raw FAR@1.0,raw FAR@1.0,raw FAR@1.0,raw FAR@1.0
Unnamed: 0_level_1,eer,far,frr,thr,no-trials,loss,acc
xvector/v000,21.4767,1.0021,41.9512,0.6128,37720,0.1421,0.9923





### Benchmark MV

In [27]:
def computeImpersonation(fp, thr, pol, size=10):
    df = pd.read_csv(fp)
    imp_m, tot_m = 0, 0
    imp_f, tot_f = 0, 0
    user_ids_f, user_ids_m = [], []
    for i in range(0, len(df), size) : 
        user_id = i // size
        tot_f += 1 if df.loc[i, 'gender'] == 'f' else 0
        tot_m += 1 if df.loc[i, 'gender'] == 'm' else 0
        imp_r = len([i for i in df.loc[i:i+size-1, 'score'] if i >= thr]) if pol == 'any' else (1 if np.mean(df.loc[i:i+size-1, 'score']) > thr else 0)
        imp_f += 1 if df.loc[i, 'gender'] == 'f' and imp_r > 0 else 0
        imp_m += 1 if df.loc[i, 'gender'] == 'm' and imp_r > 0 else 0
        user_ids_f += [user_id] if df.loc[i, 'gender'] == 'f' and imp_r > 0 else []
        user_ids_m += [user_id] if df.loc[i, 'gender'] == 'm' and imp_r > 0 else []
    assert imp_m / tot_m <= 1.0 and imp_f / tot_f <= 1.0
    return imp_m, imp_f, user_ids_m, user_ids_f, tot_m, tot_f

In [32]:
mv_test_results = {}
for i1, pol in enumerate(pols):
    mv_test_results[pol] = {}
    for i2, tar in enumerate(tars):
        mv_test_results[pol][tar] = {}
        for i3, net in enumerate(nets):
            mv_test_results[pol][tar][net] = {}
            dp = os.path.join('../data/pt_models', net, 'mvcmp_any')
            for i4, mvset in enumerate(os.listdir(os.path.join(dp))): 
                for mvsam in os.listdir(os.path.join(dp, mvset, 'v000')):
                    if int(mvsam.split('.')[0].split('_')[-1]) <=5:
                        if mvset not in mv_test_results[pol][tar][net]:
                            mv_test_results[pol][tar][net][mvset] = {}
                        mv_test_results[pol][tar][net][mvset][mvsam] = computeImpersonation(os.path.join(dp, mvset, 'v000', mvsam), thrs[None][tar][net], pol) 
                        print('>\r', pol, '(' + str(i1+1) + '/' + str(len(pols)) + ')', tar, '(' + str(i2+1) + '/' + str(len(tars)) + ')', 
                                     net, '(' + str(i3+1) + '/' + str(len(nets)) + ')',  mvset, '('+str(i4+1)+'/'+str(len(os.listdir(dp))) +')', end='')

 any (2/2) 1.0 (2/2) xvector/v000 (1/1) real_m-m_sv (9/9)

In [33]:
def arrangeData(data, ress, no_trials=1):
    for mvset, mvsamps in data.items():
        imp_m = []
        imp_f = []
        if no_trials <= 1:
            for mvsam, mvress in mvsamps.items():
                imp_m.append(mvress[0] / mvress[4])
                imp_f.append(mvress[1] / mvress[5])
        else:
            keys = list(mvsamps.keys())
            keys.sort()
            tot_m, tot_f = 0, 0
            for t in range(min(no_trials, len(keys))):
                imp_m += mvsamps[keys[t]][2]
                imp_f += mvsamps[keys[t]][3]
                tot_m = mvsamps[keys[t]][4]
                tot_f = mvsamps[keys[t]][5]
            imp_m = [len(set(imp_m)) / tot_m]
            imp_f = [len(set(imp_f)) / tot_f]
        if mvset not in ress:
            ress[mvset] = [round(np.mean(imp_m)*100,2), round(np.mean(imp_f)*100,2)]
        else:
            ress[mvset] += [round(np.mean(imp_m)*100,2), round(np.mean(imp_f)*100,2)]
    return ress

In [34]:
nets = ['xvector/v000']

In [35]:
for pol in pols:
    for tar in tars:
        ress = {}
        cols = []
        for net in nets:
            ress = arrangeData(mv_test_results[pol][tar][net], ress, no_trials=1) 
            cols += [net + '-m', net + '-f']
        tar_label = pol.upper() + '  ' + ('FAR@'+str(tar) if tar is not None else 'EER')
        df = pd.DataFrame.from_dict(ress, orient='index', columns=cols)
        df = df.mask(df==0).fillna('-')
        df.sort_index(inplace=True)
        df.columns = pd.MultiIndex.from_tuples([(tar_label, col.split('-')[0], col.split('-')[1]) for col in cols])
        df.style.set_properties(**{'width':'10em', 'text-align':'center'})
        display(HTML(df.to_html()))
        print()

Unnamed: 0_level_0,AVG EER,AVG EER
Unnamed: 0_level_1,xvector/v000,xvector/v000
Unnamed: 0_level_2,m,f
gan_f-f_sv,1.5,0.0700
gan_m-m_mv,2.9,-
gan_m-m_sv,2.67,-
gan_mf-f_sv,2.57,-
gan_mf-m_mv,3.43,0.1000
gan_mf-m_sv,2.8,0.3700
real_f-f_sv,1.8,21.8700
real_m-m_sv,17.73,0.8000
vggvox-v003_real_u-f_sv,9.4,15.4000





Unnamed: 0_level_0,AVG FAR@1.0,AVG FAR@1.0
Unnamed: 0_level_1,xvector/v000,xvector/v000
Unnamed: 0_level_2,m,f
gan_f-f_sv,-,-
gan_m-m_mv,-,-
gan_m-m_sv,-,-
gan_mf-f_sv,-,-
gan_mf-m_mv,-,-
gan_mf-m_sv,-,-
real_f-f_sv,0.0300,0.4000
real_m-m_sv,0.2000,-
vggvox-v003_real_u-f_sv,0.2000,0.4000





Unnamed: 0_level_0,ANY EER,ANY EER
Unnamed: 0_level_1,xvector/v000,xvector/v000
Unnamed: 0_level_2,m,f
gan_f-f_sv,44.5,37.2
gan_m-m_mv,46.6,13.53
gan_m-m_sv,45.9,13.0
gan_mf-f_sv,47.87,16.33
gan_mf-m_mv,52.83,27.7
gan_mf-m_sv,47.0,33.53
real_f-f_sv,28.2,80.27
real_m-m_sv,71.2,26.47
vggvox-v003_real_u-f_sv,49.0,70.6





Unnamed: 0_level_0,ANY FAR@1.0,ANY FAR@1.0
Unnamed: 0_level_1,xvector/v000,xvector/v000
Unnamed: 0_level_2,m,f
gan_f-f_sv,6.5,2.23
gan_m-m_mv,7.6,0.23
gan_m-m_sv,7.4,0.17
gan_mf-f_sv,6.6,0.6
gan_mf-m_mv,9.27,2.03
gan_mf-m_sv,7.0,2.97
real_f-f_sv,1.63,20.8
real_m-m_sv,11.73,0.47
vggvox-v003_real_u-f_sv,11.4,19.4



