In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import os
import sys
import pandas as pd
import pickle
import numpy as np

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
from batch_sv_system import get_embeds, cosine_sim, compute_plda_score
from utils import key2df, df2dict, compute_eer, get_id2idx

In [3]:
def compare_value(val_a, val_b, mask=None, verbose=True):
    if mask is not None:
        val_a = val_a[mask]
        val_b = val_b[mask]
    assert len(val_a) == len(val_b)
    n = len(val_a)
    r_inc = np.count_nonzero(val_a < val_b) / n
    r_equal = np.count_nonzero(val_a == val_b) / n
    r_dec = np.count_nonzero(val_a > val_b) / n
    if verbose:
        print("inc:{:.2f}, equal:{:.2f}, dec:{:.2f}".format(r_inc, r_equal, r_dec))
    return r_inc, r_equal, r_dec

In [4]:
embed_dir = "embeddings/voxc2_fbank64_voxc2untied_embeds"
sv_embeds = np.load(embed_dir + "/sv_embeds.npy")
keys = pickle.load(open(embed_dir + "/sv_keys.pkl", "rb"))
id2idx = get_id2idx(keys)

plda_embed_dir = "embeddings/voxc2_fbank64_voxc2untied_xvector/"
plda_model_dir = plda_embed_dir + "plda_train/"
plda_sv_embeds = np.load(plda_embed_dir + "/sv_embeds.npy")
plda_keys = pickle.load(open(plda_embed_dir + "/sv_keys.pkl", "rb"))
plda_id2idx = get_id2idx(plda_keys)

In [5]:
voxc1_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1.csv")
spk_uttr_stat = voxc1_df.spk.value_counts()
voxc1_meta = pd.read_pickle("/dataset/SV_sets/voxceleb1/dataframes/voxc1_meta.pkl")
spk2gender = voxc1_meta.Gender.to_dict()

In [6]:
dev_spks = spk_uttr_stat[(spk_uttr_stat < 150)].index.tolist()
dev_uttrs = voxc1_df[voxc1_df.spk.isin(dev_spks)][['id', 'spk', 'gender', 'session']]
eval_spks = spk_uttr_stat[spk_uttr_stat >= 150].index.tolist()
eval_uttrs = voxc1_df[voxc1_df.spk.isin(eval_spks)][['id', 'spk', 'gender', 'session']]

In [23]:
from ioffe_plda.verifier import Verifier
py_plda_model = Verifier()

## Eval 

In [7]:
eval_cohort_uttrs = dev_uttrs.groupby("spk").apply(lambda x: x.sample(n=10))
eval_cohort_embeds = get_embeds(eval_cohort_uttrs.id, plda_sv_embeds, plda_id2idx, norm=False)

In [8]:
eval_spk = eval_spks[44]
eval_target_uttrs = eval_uttrs[eval_uttrs.spk == eval_spk].sample(frac=1.0) # for random init enrollments

In [9]:
# target
n_enr = 3
len_adapt = 100
enr_session = np.random.choice(eval_target_uttrs.session.unique(), size=1)[0]
print(enr_session)
enr_uttrs = eval_target_uttrs[eval_target_uttrs.session==enr_session].sample(n=n_enr, replace=True)
eval_init_enr_embeds = get_embeds(enr_uttrs.id, plda_sv_embeds, plda_id2idx, norm=False)

eval_target_uttrs = eval_target_uttrs.drop(index=enr_uttrs.index)
eval_target_embeds = get_embeds(eval_target_uttrs.id, plda_sv_embeds, plda_id2idx, norm=False)
eval_target_adapt_embeds = eval_target_embeds[:len_adapt]
eval_target_test_embeds = eval_target_embeds[len_adapt:]

VTAyGLEk_FE


In [10]:
# nontarget
eval_nonTarget_uttrs = eval_uttrs[eval_uttrs.spk != eval_spk]
nontarget_spks = np.random.choice(eval_nonTarget_uttrs.spk.unique(), 100, replace=False)
eval_nonTarget_uttrs = eval_nonTarget_uttrs[eval_nonTarget_uttrs.spk.isin(nontarget_spks)]
eval_nonTarget_embeds = get_embeds(eval_nonTarget_uttrs.id, plda_sv_embeds, id2idx, norm=False)
eval_nonTarget_scores = compute_plda_score(eval_init_enr_embeds, eval_nonTarget_embeds, plda_model_dir)
eval_nonTarget_sorted_idx = np.argsort(eval_nonTarget_scores.mean(0), axis=0)
eval_hard_utter_idx = eval_nonTarget_sorted_idx[-len(eval_target_test_embeds)*9:] 
eval_hard_scores = eval_nonTarget_scores[:, eval_hard_utter_idx]
eval_hard_nonTarget_uttrs = eval_nonTarget_uttrs.iloc[eval_hard_utter_idx]
eval_hard_nonTarget_embeds = get_embeds(eval_hard_nonTarget_uttrs.id, plda_sv_embeds, plda_id2idx, norm=False)  

In [11]:
# real adaptation
eT = 5

eval_adapt_scores = compute_plda_score(eval_init_enr_embeds, eval_target_adapt_embeds, plda_model_dir)
eval_true_adapted_embeds = eval_target_adapt_embeds[eval_adapt_scores.mean(0) > eT]
true_adapt_norm_scores = np.empty(0)
if len(eval_true_adapted_embeds) > 0:
    adapt_cohort_scores = compute_plda_score(eval_true_adapted_embeds, eval_cohort_embeds, plda_model_dir)
    adapt_cohort_mu = adapt_cohort_scores.mean(1)
    adapt_cohort_std = adapt_cohort_scores.std(1)
    true_adapt_norm_scores = (eval_adapt_scores.mean(0)[eval_adapt_scores.mean(0) > eT] - adapt_cohort_mu)/(adapt_cohort_std)

In [12]:
eval_false_adapted_embeds = eval_nonTarget_embeds[eval_nonTarget_scores.mean(0) > eT]
false_adapt_norm_scores = np.empty(0)
if len(eval_false_adapted_embeds) > 0:
    adapt_cohort_scores = compute_plda_score(eval_false_adapted_embeds, eval_cohort_embeds, plda_model_dir)
    adapt_cohort_mu = adapt_cohort_scores.mean(1)
    adapt_cohort_std = adapt_cohort_scores.std(1)
    false_adapt_norm_scores = (eval_nonTarget_scores.mean(0)[eval_nonTarget_scores.mean(0) > eT] - adapt_cohort_mu)/(adapt_cohort_std)

In [13]:
if len(eval_false_adapted_embeds) > 0:
    eval_total_enr_embeds = np.concatenate([eval_init_enr_embeds, eval_true_adapted_embeds, eval_false_adapted_embeds])
else:
    eval_total_enr_embeds = np.concatenate([eval_init_enr_embeds, eval_true_adapted_embeds])
adapt_sorted_idx = np.array([0,1,2] + (np.flip(np.argsort(np.concatenate([true_adapt_norm_scores, false_adapt_norm_scores])))+3).tolist())
adapt_labels = np.concatenate([np.ones(len(eval_true_adapted_embeds)+3), np.zeros(len(eval_false_adapted_embeds))])

In [14]:
print(eval_true_adapted_embeds.shape)
print(eval_false_adapted_embeds.shape)
print(eval_target_test_embeds.shape)

(99, 512)
(1, 512)
(188, 512)


In [15]:
score_sorted_idx = np.array([0,1,2] + (np.flip(
    np.argsort(np.concatenate([eval_adapt_scores.mean(0)[eval_adapt_scores.mean(0)>eT],
    eval_nonTarget_scores.mean(0)[eval_nonTarget_scores.mean(0)>eT]])))+3).tolist())
adapt_labels[score_sorted_idx]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       0.])

### eval test 

In [16]:
# eval_random_nonTarget_embeds = get_embeds(eval_nonTarget_uttrs.sample(n=5000).id, plda_sv_embeds, plda_id2idx, norm=False)  
# eval_test_embeds = np.concatenate([eval_target_test_embeds, eval_random_nonTarget_embeds])
# eval_test_labels = np.concatenate([np.ones(len(eval_target_test_embeds)), 
#                                            np.zeros(len(eval_random_nonTarget_embeds))])

eval_test_embeds = np.concatenate([eval_target_test_embeds, eval_hard_nonTarget_embeds])
eval_test_labels = np.concatenate([np.ones(len(eval_target_test_embeds)), 
                                           np.zeros(len(eval_hard_nonTarget_embeds))])

# eval_test_embeds = np.concatenate([eval_target_test_embeds, eval_nonTarget_embeds])
# eval_test_labels = np.concatenate([np.ones(len(eval_target_test_embeds)), 
#                                            np.zeros(len(eval_nonTarget_embeds))])

In [17]:
eval_test_scores = compute_plda_score(eval_total_enr_embeds, eval_test_embeds, plda_model_dir)
eval_cent_scores = compute_plda_score(eval_total_enr_embeds, eval_test_embeds, plda_model_dir, mean=True)

# eval_adapt_scores = compute_plda_score(eval_init_enr_embeds, eval_total_enr_embeds[n_enr:], plda_model_dir)
# eval_adapt_scores_ = cosine_sim(eval_init_enr_embeds_, eval_true_adapted_embeds_)
# eval_test_scores = np.concatenate([eval_test_scores[:n_enr], 
#                                    eval_test_scores[n_enr:][np.argsort(eval_adapt_scores.mean(0))]])

print("init")
print(compute_eer(eval_test_scores[:3].mean(0), eval_test_labels))
print("score fusion")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))
print("feat fusion")
print(compute_eer(eval_cent_scores.mean(0), eval_test_labels))
print("score fusion, max")
print(compute_eer(eval_test_scores.mean(0)+eval_test_scores.max(0), eval_test_labels))
print("score fusion, std")
print(compute_eer(eval_test_scores.mean(0)+eval_test_scores.std(0), eval_test_labels))

init
(0.005319148936170248, 0.000591016548463357, 0.005319148936170248, 4.595991000000001)
score fusion
(0.005319148936170248, 0.0, 0.005319148936170248, 6.816882456310678)
feat fusion
(0.005319148936170248, 0.0, 0.005319148936170248, 1.300376)
score fusion, max
(0.010638297872340385, 0.01004728132387707, 0.010638297872340385, 28.469936278737862)
score fusion, std
(0.005319148936170248, 0.0, 0.005319148936170248, 11.1119173124317)


In [18]:
proxy_test_embeds = np.concatenate([eval_total_enr_embeds[3:], eval_cohort_embeds], axis=0)
eval_proxy_labels = np.concatenate([np.ones(len(eval_total_enr_embeds[3:])), np.zeros(len(eval_cohort_embeds))])
eval_proxy_scores = compute_plda_score(eval_total_enr_embeds, proxy_test_embeds, plda_model_dir)
proxy_eers = []
for idx in range(0, len(eval_proxy_scores)):
    proxy_eers.append(compute_eer(eval_proxy_scores[idx], eval_proxy_labels)[0])
proxy_eer_sorted = np.argsort(proxy_eers)

In [19]:
n_adapt = 30
eval_test_proxy_scores = eval_test_scores[proxy_eer_sorted[:n_adapt]]
eval_test_proxy1_scores = eval_test_scores[adapt_sorted_idx[:n_adapt]]
print("score fusion true")
if len(eval_false_adapted_embeds) > 0:
    print(compute_eer(eval_test_scores[:-len(eval_false_adapted_embeds)].mean(0), eval_test_labels))
else:
    print(compute_eer(eval_test_scores.mean(0), eval_test_labels))
print("score fusion true, budget (random)")
print(compute_eer(eval_test_scores[[0,1,2] + 
                  np.random.randint(3, 3+len(eval_true_adapted_embeds), n_adapt-3).tolist()].mean(0), 
                  eval_test_labels))
print("score fusion total (total)")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))
# print("score fusion total budget (worst)")
# print(compute_eer(eval_test_scores[[0,1,2] + np.arange(-n_adapt, 0).tolist()].mean(0), eval_test_labels))
print()
print("score fusion proxy")
print(compute_eer(eval_test_proxy_scores.mean(0), eval_test_labels))
print("score fusion proxy, max")
print(compute_eer(eval_test_proxy_scores.mean(0)+eval_test_proxy_scores.max(0), eval_test_labels))
print("score fusion proxy, std")
print(compute_eer(eval_test_proxy_scores.mean(0)+eval_test_proxy_scores.std(0), eval_test_labels))

print()
print("score fusion proxy1")
print(compute_eer(eval_test_proxy1_scores.mean(0), eval_test_labels))
print("score fusion proxy1, max")
print(compute_eer(eval_test_proxy1_scores.mean(0)+eval_test_proxy1_scores.max(0), eval_test_labels))
print("score fusion proxy1, std")
print(compute_eer(eval_test_proxy1_scores.mean(0)+eval_test_proxy1_scores.std(0), eval_test_labels))

score fusion true
(0.005319148936170248, 0.0, 0.005319148936170248, 6.835607921568626)
score fusion true, budget (random)
(0.005319148936170248, 0.0, 0.005319148936170248, 6.804656989999999)
score fusion total (total)
(0.005319148936170248, 0.0, 0.005319148936170248, 6.816882456310678)

score fusion proxy
(0.005319148936170248, 0.0, 0.005319148936170248, 6.524759706666669)
score fusion proxy, max
(0.005319148936170248, 0.0, 0.005319148936170248, 22.51899970666667)
score fusion proxy, std
(0.005319148936170248, 0.0, 0.005319148936170248, 11.119754329694093)

score fusion proxy1
(0.005319148936170248, 0.0, 0.005319148936170248, 6.5057529)
score fusion proxy1, max
(0.005319148936170248, 0.0017730496453900709, 0.005319148936170248, 20.651582899999998)
score fusion proxy1, std
(0.005319148936170248, 0.000591016548463357, 0.005319148936170248, 10.253739150957358)


In [20]:
eval_test_proxy1_scores = compute_plda_score(eval_total_enr_embeds[adapt_sorted_idx[:n_adapt]], eval_test_embeds, plda_model_dir, mean=True)
print("score fusion proxy1")
print(compute_eer(eval_test_proxy1_scores.mean(0), eval_test_labels))
print("score fusion proxy1, max")
print(compute_eer(eval_test_proxy1_scores.mean(0)+eval_test_proxy1_scores.max(0), eval_test_labels))
print("score fusion proxy1, std")
print(compute_eer(eval_test_proxy1_scores.mean(0)+eval_test_proxy1_scores.std(0), eval_test_labels))

score fusion proxy1
(0.005319148936170248, 0.0, 0.005319148936170248, -2.2094139999999998)
score fusion proxy1, max
(0.005319148936170248, 0.0, 0.005319148936170248, -4.4188279999999995)
score fusion proxy1, std
(0.005319148936170248, 0.0, 0.005319148936170248, -2.2094139999999998)


In [None]:
for i in range(1, len(adapt_sorted_idx)):
    print(compute_eer(eval_test_scores[proxy_eer_sorted[:i]].mean(0), eval_test_labels)[0])
    print(compute_eer(eval_test_scores[adapt_sorted_idx[:i]].mean(0), eval_test_labels)[0])
    print()

### score normalization 

In [None]:
# Z-norm
enr_cohort_scores = compute_plda_score(eval_total_enr_embeds, eval_cohort_embeds, plda_model_dir)
enr_cohort_mu = enr_cohort_scores.mean(1, keepdims=True)
enr_cohort_std = enr_cohort_scores.std(1, keepdims=True)
eval_test_norm_scores = (eval_test_scores - enr_cohort_mu) / enr_cohort_std
print(compute_eer(eval_test_norm_scores.mean(0), eval_test_labels))

In [None]:
# F-norm
a = 1
client_scores = compute_plda_score(eval_total_enr_embeds, eval_total_enr_embeds, plda_model_dir)
client_mean = np.triu(client_scores, 1).mean()
imp_scores = compute_plda_score(eval_total_enr_embeds, eval_cohort_embeds, plda_model_dir)
imp_mean = imp_score.mean()
f_score = (eval_test_scores-imp_mean)*(2*a/(client_mean - imp_mean)) + a
print(compute_eer(f_score.mean(0), eval_test_labels))

### max effect 

In [None]:
max_test_scores = eval_test_scores.mean(0)+eval_test_scores.max(0)

In [None]:
np.bincount(np.argmax(eval_test_scores, axis=0), minlength=len(eval_test_scores))

In [None]:
adapt_thres = compute_eer(adapt_test_scores, eval_test_labels)[-1]
adapt_fn_idx = np.nonzero((adapt_test_scores < adapt_thres) & (eval_test_labels == 1))
adapt_fp_idx = np.nonzero((adapt_test_scores > adapt_thres) & (eval_test_labels == 0))
max_thres = compute_eer(max_test_scores, eval_test_labels)[-1]
max_fn_idx = np.nonzero((max_test_scores < max_thres) & (eval_test_labels == 1))
max_fp_idx = np.nonzero((max_test_scores > max_thres) & (eval_test_labels == 0))

In [None]:
print(adapt_fp_idx)
print(adapt_test_scores[adapt_fp_idx])
print(max_fp_idx)
print(max_test_scores[max_fp_idx])

In [None]:
print(adapt_fn_idx)
print(adapt_test_scores[adapt_fn_idx])
print(max_fn_idx)
print(max_test_scores[max_fn_idx])

### std effect 

In [None]:
std_test_scores = eval_test_scores.mean(0)+eval_test_scores.std(0)

In [None]:
adapt_thres = compute_eer(adapt_test_scores, eval_test_labels)[-1]
adapt_fn_idx = np.nonzero((adapt_test_scores < adapt_thres) & (eval_test_labels == 1))
adapt_fp_idx = np.nonzero((adapt_test_scores > adapt_thres) & (eval_test_labels == 0))
std_thres = compute_eer(std_test_scores, eval_test_labels)[-1]
std_fn_idx = np.nonzero((std_test_scores < std_thres) & (eval_test_labels == 1))
std_fp_idx = np.nonzero((std_test_scores > std_thres) & (eval_test_labels == 0))

In [None]:
print(adapt_fp_idx)
print(adapt_test_scores[adapt_fp_idx])
print(std_fp_idx)
print(std_test_scores[std_fp_idx])

In [None]:
print(adapt_fn_idx)
print(adapt_test_scores[adapt_fn_idx])
print(std_fn_idx)
print(std_test_scores[std_fn_idx])

### adapt effect

In [None]:
init_test_scores = eval_test_scores[:3].mean(0)
adapt_test_scores = eval_test_scores.mean(0)
# eval_avg_test_scores = compute_plda_score(eval_high_avg_enr_embeds, eval_test_embeds, plda_model_dir)
# adapt_test_scores = eval_avg_test_scores.mean(0)

In [None]:
step = 2
for idx in range(3, len(eval_test_scores), step):
    print("{} to {}".format(idx, idx+step))
    compare_value(init_test_scores, eval_test_scores[idx:idx+step].mean(0))
#     compare_value(init_test_scores, eval_test_scores[idx:idx+step].mean(0), eval_test_labels==1)
#     compare_value(init_test_scores, eval_test_scores[idx:idx+step].mean(0), eval_test_labels==0)
    print(compute_eer(eval_test_scores[idx:idx+step].mean(0), eval_test_labels)[0])
    print()

In [None]:
# cumulative
for idx in range(3, len(eval_test_scores), 3):
    compare_value(init_test_scores, eval_test_scores[3:idx+3].mean(0), eval_test_labels==1)
    compare_value(init_test_scores, eval_test_scores[3:idx+3].mean(0), eval_test_labels==0)
    print(compute_eer(eval_test_scores[3:idx+3].mean(0), eval_test_labels)[0])
    print()

In [None]:
scores = compute_plda_score(eval_total_enr_embeds[[9]], eval_test_embeds, plda_model_dir)

In [None]:
init_thres = compute_eer(init_test_scores, eval_test_labels)[-1]
init_fn_idx = np.nonzero((init_test_scores < init_thres) & (eval_test_labels == 1))
init_fp_idx = np.nonzero((init_test_scores > init_thres) & (eval_test_labels == 0))
adapt_thres = compute_eer(adapt_test_scores, eval_test_labels)[-1]
adapt_fn_idx = np.nonzero((adapt_test_scores < adapt_thres) & (eval_test_labels == 1))
adapt_fp_idx = np.nonzero((adapt_test_scores > adapt_thres) & (eval_test_labels == 0))

In [None]:
print(init_fp_idx)
print(init_test_scores[init_fp_idx])
print(adapt_fp_idx)
print(adapt_test_scores[adapt_fp_idx])

In [None]:
print(init_fn_idx)
print(init_test_scores[init_fn_idx])
print(adapt_fn_idx)
print(adapt_test_scores[adapt_fn_idx])

### threshold 

In [None]:
from batch_sv_system_utils import compute_error
compute_error(init_test_scores > init_thres, eval_test_labels)

In [None]:
from batch_sv_system_utils import compute_error
compute_error(adapt_test_scores > init_thres*1.2, eval_test_labels)

### avg embeds 

In [None]:
n_adapt = 10

In [None]:
eval_test_scores = compute_plda_score(eval_total_enr_embeds, eval_test_embeds, plda_model_dir)
for idx in range(3, len(eval_test_scores)-n_adapt+3, 3):
    idx = [0,1,2] + np.arange(idx, idx+n_adapt-3).tolist()
    print(compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0])

In [None]:
eval_adapt_scores = compute_plda_score(eval_init_enr_embeds, eval_total_enr_embeds[n_enr:], plda_model_dir)
eval_adapt_sorted_embeds = eval_total_enr_embeds[n_enr:][np.argsort(eval_adapt_scores.mean(0))]

# confid based avg
eval_high_avg_enr_embeds = np.concatenate([eval_init_enr_embeds, eval_adapt_sorted_embeds[:n_adapt], 
                                           eval_adapt_sorted_embeds[n_adapt:].mean(0, keepdims=True)])
eval_low_avg_enr_embeds = np.concatenate([eval_init_enr_embeds, eval_adapt_sorted_embeds[-n_adapt:], 
                                          eval_adapt_sorted_embeds[:-n_adapt].mean(0, keepdims=True)])
eval_mid_avg_enr_embeds = np.concatenate([eval_init_enr_embeds, eval_adapt_sorted_embeds[:n_adapt//2], 
                                          eval_adapt_sorted_embeds[n_adapt//2:-n_adapt//2].mean(0, keepdims=True), 
                                          eval_adapt_sorted_embeds[-n_adapt//2:], ])
eval_hist_avg_enr_embeds = []
prev_edge = 0
for edge in np.cumsum(np.histogram(eval_adapt_scores.mean(0), bins=n_adapt)[0]):
    if prev_edge == edge: 
        continue
    eval_hist_avg_enr_embeds.append(eval_adapt_sorted_embeds[prev_edge:edge].mean(0))
    prev_edge = edge
eval_hist_avg_enr_embeds = np.stack(eval_hist_avg_enr_embeds)

# clustering based
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=n_adapt).fit(eval_adapt_sorted_embeds)
eval_kmeans_avg_enr_embeds= kmeans.cluster_centers_

eval_test_scores = compute_plda_score(eval_high_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("high avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_mid_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("mid avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_low_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("low avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_hist_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("hist avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_kmeans_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("kmeans avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

In [None]:
eval_adapt_scores = cosine_sim(eval_init_enr_embeds_, eval_true_adapted_embeds_)
eval_adapt_sorted_embeds = eval_total_enr_embeds[n_enr:][np.argsort(eval_adapt_scores.mean(0))]

# confid based avg
eval_high_avg_enr_embeds = np.concatenate([eval_adapt_sorted_embeds[:n_adapt], 
                                           eval_adapt_sorted_embeds[n_adapt:].mean(0, keepdims=True)])
eval_low_avg_enr_embeds = np.concatenate([eval_adapt_sorted_embeds[-n_adapt:], 
                                          eval_adapt_sorted_embeds[:-n_adapt].mean(0, keepdims=True)])
eval_mid_avg_enr_embeds = np.concatenate([eval_adapt_sorted_embeds[:n_adapt//2], 
                                          eval_adapt_sorted_embeds[n_adapt//2:-n_adapt//2].mean(0, keepdims=True), 
                                          eval_adapt_sorted_embeds[-n_adapt//2:], ])

eval_test_scores = compute_plda_score(eval_high_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("high avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_mid_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("mid avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

eval_test_scores = compute_plda_score(eval_low_avg_enr_embeds, eval_test_embeds, plda_model_dir)
print("low avg")
print(compute_eer(eval_test_scores.mean(0), eval_test_labels))

## Cumulative adaptation 

In [None]:
fig, axes = plt.subplots(2, 1, sharex=True, sharey=True)
ascend_eers = []
for idx in range(0, len(eval_test_scores), 1):
    idx = [0,1,2] + np.arange(n_enr, min(n_enr+idx, len(eval_test_scores))).tolist()
    mean_eer = compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0]
    mean_max_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0), eval_test_labels)[0]
    mean_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].std(0), eval_test_labels)[0]
    mean_max_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0)+eval_test_scores[idx].std(0),
                                   eval_test_labels)[0]
    ascend_eers.append((mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))
#     print("{:.4f}, {:.4f}, {:.4f}, {:.4f}".format(mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))

eer_stat = pd.DataFrame(list(zip(*ascend_eers))).T
eer_stat.columns = ['mean', 'mean_max', 'mean_std', 'mean_max_std']
eer_stat.plot(figsize=(20,10), title="hard trial, ascending confidence", ax=axes[0])

descend_eers = []
for idx in range(0, len(eval_test_scores)-n_enr, 1):
    idx = [0,1,2] + np.arange(len(eval_test_scores)-idx, len(eval_test_scores)).tolist()
    mean_eer = compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0]
    mean_max_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0), eval_test_labels)[0]
    mean_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].std(0), eval_test_labels)[0]
    mean_max_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0)+eval_test_scores[idx].std(0),
                                   eval_test_labels)[0]
    descend_eers.append((mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))
#     print("{:.4f}, {:.4f}, {:.4f}, {:.4f}".format(mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))

eer_stat = pd.DataFrame(list(zip(*descend_eers))).T
eer_stat.columns = ['mean', 'mean_max', 'mean_std', 'mean_max_std']
eer_stat.plot(figsize=(20,10), title="hard trial, descending confidence", ax=axes[1])

In [None]:
fig, axes = plt.subplots(2, 1, sharex=True, sharey=True)
ascend_eers = []
for idx in range(0, len(eval_test_scores), 1):
    idx = [0,1,2] + np.arange(n_enr, min(n_enr+idx, len(eval_test_scores))).tolist()
    mean_eer = compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0]
    mean_max_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0), eval_test_labels)[0]
    mean_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].std(0), eval_test_labels)[0]
    mean_max_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0)+eval_test_scores[idx].std(0),
                                   eval_test_labels)[0]
    ascend_eers.append((mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))
#     print("{:.4f}, {:.4f}, {:.4f}, {:.4f}".format(mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))

eer_stat = pd.DataFrame(list(zip(*ascend_eers))).T
eer_stat.columns = ['mean', 'mean_max', 'mean_std', 'mean_max_std']
eer_stat.plot(figsize=(20,10), title="hard trial, ascending confidence", ax=axes[0])

descend_eers = []
for idx in range(0, len(eval_test_scores)-n_enr, 1):
    idx = [0,1,2] + np.arange(len(eval_test_scores)-idx, len(eval_test_scores)).tolist()
    mean_eer = compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0]
    mean_max_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0), eval_test_labels)[0]
    mean_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].std(0), eval_test_labels)[0]
    mean_max_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0)+eval_test_scores[idx].std(0),
                                   eval_test_labels)[0]
    descend_eers.append((mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))
#     print("{:.4f}, {:.4f}, {:.4f}, {:.4f}".format(mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))

eer_stat = pd.DataFrame(list(zip(*descend_eers))).T
eer_stat.columns = ['mean', 'mean_max', 'mean_std', 'mean_max_std']
eer_stat.plot(figsize=(20,10), title="hard trial, descending confidence", ax=axes[1])

In [None]:
print("non-cumulative")
print("mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer")
non_cum_eers = []
step = 5
for idx in range(0, len(eval_test_scores), step):
    idx = np.arange(0, n_enr).tolist() + np.arange(idx, min(idx+step, len(eval_test_scores))).tolist()
    mean_eer = compute_eer(eval_test_scores[idx].mean(0), eval_test_labels)[0]
    mean_max_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0), eval_test_labels)[0]
    mean_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].std(0), eval_test_labels)[0]
    mean_max_std_eer = compute_eer(eval_test_scores[idx].mean(0)+eval_test_scores[idx].max(0)+eval_test_scores[idx].std(0),
                                   eval_test_labels)[0]
    non_cum_eers.append((mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))
#     print("{:.4f}, {:.4f}, {:.4f}, {:.4f}".format(mean_eer, mean_max_eer, mean_std_eer, mean_max_std_eer))

eer_stat = pd.DataFrame(list(zip(*non_cum_eers))).T
eer_stat.columns = ['mean', 'mean_max', 'mean_std', 'mean_max_std']
eer_stat.plot(figsize=(20,10), title="non-cumulative confidence")