In [1]:
def save_obj(obj, name):
    print('Saving object', name)
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    print('Loading object', name)
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [4]:
def findThresholdAtFAR(far, value):
    return np.argmin(np.abs(value - far))

In [5]:
def getStats(identical, scores):
    far, tpr, thresholds = roc_curve(np.array(identical), np.array(scores))
    frr = 1 - tpr
    idx_eer = np.argmin(np.abs(far - frr))
    idx_far1 = findThresholdAtFAR(far, 0.01)
    return {'eer': {'thr': thresholds[idx_eer], 'far': far[idx_eer], 'frr': frr[idx_eer]}, 
            'far1': {'thr': thresholds[idx_far1], 'far': far[idx_far1], 'frr': frr[idx_far1]}}

In [6]:
def getStatsThr(identical, scores,thr):
    far, tpr, thresholds = roc_curve(np.array(identical), np.array(scores))
    frr = 1 - tpr
    idx_thr = findThresholdAtFAR(thresholds, thr)
    return {thr: {'thr': thresholds[idx_thr], 'far': far[idx_thr], 'frr': frr[idx_thr]}}

In [10]:
model = load_model('/beegfs/mm10572/master-voices/dl_master_voices/models/vggvox/ks_model/vggvox.h5')

In [9]:
flatten_layer = Flatten()(model.output)
contr_model = Model(model.inputs, flatten_layer)

In [11]:
train_paths = load_obj('/beegfs/mm10572/master-voices/dl_master_voices/data/vox2_mv/train_vox2_abspaths_1000_users')
train_users = np.unique([p.split('/')[5] for p in train_paths])
print('Found mv train users', len(train_users))

Loading object /beegfs/mm10572/master-voices/dl_master_voices/data/vox2_mv/train_vox2_abspaths_1000_users
Found mv train users 1000


In [12]:
samples = {}
for path in train_paths:
    user = path.split('/')[5]
    if not user in samples:
        samples[user] = []
    samples[user].append(path)

In [24]:
setups = [(1, np.mean), (10, np.max), (10, np.mean)]
statistics = []
n_comp = 10

for n_trials, scores_func in setups:
    identical = []
    scores = []
    for index in range(n_comp):
        user_1 = np.random.choice(list(samples.keys()))
        user_2 = np.random.choice(list(set(list(samples.keys())) - set([user_1])))
        path_1_1 = np.random.choice(samples[user_1])
        sp_1_1 = get_fft_spectrum(path_1_1)
        emb_1_1 = contr_model.predict(sp_1_1.reshape(1, *sp_1_1.shape, 1))[0]
        paths_1_2 = np.random.choice(list(set(samples[user_1]) - set([path_1_1])), n_trials)
        partial_scores_1_2 = []
        for path in paths_1_2:
            sp = get_fft_spectrum(path)
            emb = contr_model.predict(sp.reshape(1, *sp.shape, 1))[0]
            partial_scores_1_2.append(1 - spatial.distance.cosine(emb_1_1, emb))
        paths_2 = np.random.choice(samples[user_2], n_trials)
        partial_scores_2 = []
        for path in paths_2:
            sp = get_fft_spectrum(path)
            emb = contr_model.predict(sp.reshape(1, *sp.shape, 1))[0]
            partial_scores_2.append(1 - spatial.distance.cosine(emb_1_1, emb))
        identical.append(1)
        scores.append(scores_func(partial_scores_1_2))
        identical.append(0)
        scores.append(scores_func(partial_scores_2))
        print('\r' + str(index+1) + ' of ' + str(n_comp), '(', n_trials, str(scores_func).split(' ')[1], '):', getStats(identical, scores), end='')
    statistics.append(getStats(identical, scores))
    print()

10 of 10 ( 1 mean ): {'far1': {'far': 0.0, 'thr': 1.8923906087875366, 'frr': 1.0}, 'eer': {'far': 0.0, 'thr': 0.6226051449775696, 'frr': 0.0}}
10 of 10 ( 10 amax ): {'far1': {'far': 0.0, 'thr': 1.946548044681549, 'frr': 1.0}, 'eer': {'far': 0.0, 'thr': 0.7413321137428284, 'frr': 0.0}}
10 of 10 ( 10 mean ): {'far1': {'far': 0.0, 'thr': 1.8351682424545288, 'frr': 1.0}, 'eer': {'far': 0.2, 'thr': 0.5025533527135849, 'frr': 0.19999999999999996}}


In [25]:
len(statistics)

3

In [27]:
for i, setup_1 in enumerate(setups):
    for j, setup_2 in enumerate(setups):
        if i != j:
            for thr_type in ['eer', 'far1']:
                n_trials, scores_func = setup_1
                n_trials_cmp, scores_func_cmp = setup_2
                identical = []
                scores = []
                for index in range(n_comp):
                    user_1 = np.random.choice(list(samples.keys()))
                    user_2 = np.random.choice(list(set(list(samples.keys())) - set([user_1])))
                    path_1_1 = np.random.choice(samples[user_1])
                    sp_1_1 = get_fft_spectrum(path_1_1)
                    emb_1_1 = contr_model.predict(sp_1_1.reshape(1, *sp_1_1.shape, 1))[0]
                    paths_1_2 = np.random.choice(list(set(samples[user_1]) - set([path_1_1])), n_trials)
                    partial_scores_1_2 = []
                    for path in paths_1_2:
                        sp = get_fft_spectrum(path)
                        emb = contr_model.predict(sp.reshape(1, *sp.shape, 1))[0]
                        partial_scores_1_2.append(1 - spatial.distance.cosine(emb_1_1, emb))
                    paths_2 = np.random.choice(samples[user_2], n_trials)
                    partial_scores_2 = []
                    for path in paths_2:
                        sp = get_fft_spectrum(path)
                        emb = contr_model.predict(sp.reshape(1, *sp.shape, 1))[0]
                        partial_scores_2.append(1 - spatial.distance.cosine(emb_1_1, emb))
                    identical.append(1)
                    scores.append(scores_func(partial_scores_1_2))
                    identical.append(0)
                    scores.append(scores_func(partial_scores_2))
                    print('\r' + str(index+1) + ' of ' + str(n_comp), '(', n_trials, str(scores_func).split(' ')[1], thr_type, statistics[j][thr_type]['thr'], n_trials_cmp, str(scores_func_cmp).split(' ')[1], '):', getStatsThr(identical, scores, statistics[j][thr_type]['thr']), end='')
                print()

10 of 10 ( 1 mean eer 0.7413321137428284 10 amax ): {0.7413321137428284: {'far': 0.0, 'thr': 0.9222062230110168, 'frr': 0.9}}
10 of 10 ( 1 mean far1 1.946548044681549 10 amax ): {1.946548044681549: {'far': 0.0, 'thr': 1.9046813249588013, 'frr': 1.0}}
10 of 10 ( 1 mean eer 0.5025533527135849 10 mean ): {0.5025533527135849: {'far': 0.1, 'thr': 0.5123698711395264, 'frr': 0.0}}
10 of 10 ( 1 mean far1 1.8351682424545288 10 mean ): {1.8351682424545288: {'far': 0.0, 'thr': 1.845118761062622, 'frr': 1.0}}
10 of 10 ( 10 amax eer 0.6226051449775696 1 mean ): {0.6226051449775696: {'far': 0.0, 'thr': 0.8278138637542725, 'frr': 0.0}}
10 of 10 ( 10 amax far1 1.8923906087875366 1 mean ): {1.8923906087875366: {'far': 0.0, 'thr': 1.9309051632881165, 'frr': 1.0}}
10 of 10 ( 10 amax eer 0.5025533527135849 10 mean ): {0.5025533527135849: {'far': 0.3, 'thr': 0.6345411539077759, 'frr': 0.0}}
10 of 10 ( 10 amax far1 1.8351682424545288 10 mean ): {1.8351682424545288: {'far': 0.0, 'thr': 1.9697477221488953, 'f