In [None]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import os
import sys
import pandas as pd
import pickle
import numpy as np

In [None]:
from batch_sv_system_utils import get_embeds, cosine_sim, compute_error
from batch_sv_system_utils import compute_eer
from utils import key2df, df2dict

In [None]:
def get_id2idx(keys):
    key_df = key2df(keys)
    id2idx, idx2id = df2dict(key_df) 
    return id2idx

In [None]:
def get_features(x, n_feat=4):
    min_ = x.min(0)
    max_ = x.max(0)
    median_ = np.median(x, axis=0)
    avg_ = x.mean(0)
    std_ = x.std(0)
   
    if n_feat==2:
        return np.stack([avg_, std_], axis=0).T
    elif n_feat==3:
        return np.stack([max_, avg_, std_], axis=0).T
    elif n_feat==4:
        return np.stack([min_, max_, avg_, std_], axis=0).T
    elif n_feat==5:
        return np.stack([min_, max_, median_, avg_, std_], axis=0).T

## Unsupervised Adaptation and Test

In [None]:
embed_dir = "embeddings/voxc2_fbank64_voxc2untied_embeds/"
# embed_dir = "embeddings/voxc2_fbank64_voxc2untied_300f_embeds/"
sv_embeds = np.load(embed_dir + "/sv_embeds.npy")
keys = pickle.load(open(embed_dir + "/sv_keys.pkl", "rb"))
id2idx = get_id2idx(keys)

plda_embed_dir = "embeddings/voxc2_fbank64_voxc2untied_xvector/"
# plda_embed_dir = "embeddings/voxc2_fbank64_voxc2untied_300f_xvector/"
plda_sv_embeds = np.load(plda_embed_dir + "/sv_embeds.npy")
plda_model_dir = plda_embed_dir + "plda_train/"
plda_keys = pickle.load(open(plda_embed_dir + "/sv_keys.pkl", "rb"))
plda_id2idx = get_id2idx(plda_keys)

## Cosine + Normalization

In [None]:
# Online PLDA
from batch_sv_system_utils import run_trial, plot_score 

trial = pickle.load(open("trials/dev317_eval934/eval_random_enr10_tar50_n4950_gender/trials.pkl", "rb"))
cohort_embeds = np.load("trials/dev317_eval934/cohort_embeds.npy")

eval_score_list = []
eval_labels = []
eval_adapt_labels = []
eval_preds = []

for t_idx in range(0,len(trial)):
    enr_spk, enr_ids, test_trial_df = trial[t_idx] 
    n_target= test_trial_df.label.value_counts()[1]
    n_nonTarget = n_target
    test_trial_df = pd.concat([test_trial_df[test_trial_df.label==1], 
                               test_trial_df[test_trial_df.label==0].sample(n=n_nonTarget)])
    test_trial_df = test_trial_df.sample(frac=1.0)
    test_trial = (np.array(test_trial_df.id), np.array(test_trial_df.label))
    
    ### get embeds
    init_enr_embeds = get_embeds(enr_ids[:10], sv_embeds, id2idx, norm=True)
    test_embeds = get_embeds(test_trial[0], sv_embeds, id2idx, norm=True)

    ### adapt trial
    eT = 15.0
    adapt_scores = cosine_sim(init_enr_embeds, test_embeds)
    
    enr_cohort_scores = cosine_sim(init_enr_embeds, cohort_embeds)
    enr_mean = enr_cohort_scores.mean(1, keepdims=True)
    enr_std = enr_cohort_scores.std(1, keepdims=True)
    test_cohort_scores = cosine_sim(test_embeds, cohort_embeds)
    test_mean = test_cohort_scores.mean(1, keepdims=True).T
    test_std = test_cohort_scores.std(1, keepdims=True).T
    adapt_norm_scores = ((adapt_scores - enr_mean)/enr_std + (adapt_scores - test_mean)/test_std)/2
    
    adapt_enr_idx = np.nonzero(adapt_norm_scores.mean(0) > eT)[0]
    adapt_enr_acc = np.array(test_trial[1])[adapt_enr_idx].mean()
    print("[eT:{}] n_adapted:{}(of {}), adapt_acc:{:.3f}".format(eT, len(adapt_enr_idx), test_trial[1].sum(), 
                                                                        adapt_enr_acc))
    enr_embeds = np.concatenate([init_enr_embeds, test_embeds[adapt_enr_idx]], axis=0)
    
    ### test trial
    test_scores = cosine_sim(enr_embeds, test_embeds)
    enr_cohort_scores = cosine_sim(enr_embeds, cohort_embeds)
    enr_mean = enr_cohort_scores.mean(1, keepdims=True)
    enr_std = enr_cohort_scores.std(1, keepdims=True)
    test_cohort_scores = cosine_sim(test_embeds, cohort_embeds)
    test_mean = test_cohort_scores.mean(1, keepdims=True).T
    test_std = test_cohort_scores.std(1, keepdims=True).T
    test_norm_scores = ((test_scores - enr_mean)/enr_std + (test_scores - test_mean)/test_std)/2
    
    ### reduce to online sv scores
    online_scores = []
    prev_t = 0
    adapt_times = np.append(adapt_enr_idx, len(test_trial[1]))
    for n_enr, adapt_t in enumerate(adapt_times):
        online_scores.append(test_norm_scores[:n_enr+1, prev_t:adapt_t+1])
        prev_t = adapt_t+1   
        
    online_score_fusion = np.concatenate([x.mean(0) for x in online_scores], axis=0)
    print("adapt eer:{:.4f}".format(compute_eer(online_score_fusion, test_trial[1])[0]))
    
    eval_score_list.append(online_scores)
    eval_labels.append(test_trial[1])
print("done")

In [None]:
# LR train
from sklearn.linear_model import LogisticRegressionCV

test_adapt_scores = np.concatenate([reduce_scores(x) for x in eval_score_list], axis=0)
test_feat = np.concatenate([reduce_scores(x, get_features) for x in eval_score_list], axis=0)
test_labels = np.concatenate(eval_labels)
lr_clf = pickle.load(open("trials/dev317_eval934/dev_random_enr20_spk10_gender/plda_nf4_bal_lr_clf.pkl", "rb"))
svm_clf = pickle.load(open("trials/dev317_eval934/dev_random_enr20_spk10_gender/plda_nf4_bal_svm_clf.pkl", "rb"))

print(compute_eer(test_adapt_scores, test_labels))
print(compute_eer(lr_clf.decision_function(test_feat), test_labels))
print(compute_eer(svm_clf.decision_function(test_feat), test_labels))

In [None]:
score_fusion_eers = []
feat_eers = []
for scores, label in zip(eval_plda_score_list, eval_plda_labels):
    score_fusion = reduce_scores(scores)
    feat_score = svm_clf.decision_function(reduce_scores(scores, get_features))
    score_fusion_eers.append(compute_eer(score_fusion, label)[0])
    feat_eers.append(compute_eer(feat_score, label)[0])

eer_diff = np.array(feat_eers) - np.array(score_fusion_eers)

n_better = np.count_nonzero(eer_diff < 0)
n_worse = np.count_nonzero(eer_diff > 0)
n_equal = np.count_nonzero(eer_diff == 0)
print("n_better:{}, n_worse:{}, n_equal:{}".format(n_better, n_worse,  n_equal))

## PLDA

In [30]:
# Online PLDA
from batch_sv_system_utils import run_trial, plot_score 

trial = pickle.load(open("trials/dev317_eval934/eval_random_enr10_tar50_spk10_gender/trials.pkl", "rb"))


t_idx = 10
enr_spk, enr_ids, test_trial_df = trial[t_idx] 
n_target= test_trial_df.label.value_counts()[1]
n_nonTarget = n_target
test_trial_df = pd.concat([test_trial_df[test_trial_df.label==1], 
                           test_trial_df[test_trial_df.label==0].sample(n=n_nonTarget)])
test_trial_df = test_trial_df.sample(frac=1.0)
test_trial = (np.array(test_trial_df.id), np.array(test_trial_df.label))

### get embeds
plda_init_enr_embeds = get_embeds(enr_ids[:3], plda_sv_embeds, plda_id2idx, norm=False)
plda_test_embeds = get_embeds(test_trial[0], plda_sv_embeds, plda_id2idx, norm=False)

### adapt trial
plda_adapt_fusion, plda_adapt_scores = run_trial(plda_init_enr_embeds, plda_test_embeds, test_trial[1],
                         plda_dir=plda_model_dir, 
                         plot=False, title="adapt_score_fusion")
# adapt threshold
eT = 15

adapt_enr_idx = np.nonzero(plda_adapt_fusion > eT)[0]
adapt_enr_acc = np.array(test_trial[1])[adapt_enr_idx].mean()
print("[eT:{}] n_adapted:{}(of {}), adapt_acc:{:.3f}".format(eT, len(adapt_enr_idx), test_trial[1].sum(), 
                                                                    adapt_enr_acc))
plda_enr_embeds = np.concatenate([plda_init_enr_embeds, plda_test_embeds[adapt_enr_idx]], axis=0)
plda_test_score_fusion, plda_test_scores = run_trial(plda_enr_embeds, plda_test_embeds, test_trial[1],
                                       plda_dir=plda_model_dir, neg_embeds=None,
                                       plot=False, title="score_fusion(plda, adapt)",
                                       verbose=False)
plda_online_scores = []
prev_t = 0
adapt_times = np.append(adapt_enr_idx, len(test_trial[1]))
for n_enr, adapt_t in enumerate(adapt_times):
    plda_online_scores.append(plda_test_scores[:n_enr+1, prev_t:adapt_t+1])
    prev_t = adapt_t+1   

plda_online_score_fusion = np.concatenate([x.mean(0) for x in plda_online_scores], axis=0)
print("adapt eer:{:.4f}".format(compute_eer(plda_online_score_fusion, test_trial[1])[0]))

[eT:15] n_adapted:46(of 60), adapt_acc:1.000
adapt eer:0.0083


In [31]:
adapt_confids = plda_adapt_scores.mean(0)[adapt_enr_idx]
sorted_adapt_confid_idx = np.argsort(adapt_confids)

In [33]:
step = 3
for idx in range(3, len(sorted_adapt_confid_idx), step):
    print(compute_eer(plda_test_scores[sorted_adapt_confid_idx[:idx+3]].mean(0), test_trial[1])[0])

0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336
0.00833333333333336


In [None]:
eval_plda_trial_output = pickle.load(open("tmp/eT15_enr10_score.pkl", "rb"))
eval_plda_score_list = [x[0] for x in eval_plda_trial_output]
eval_plda_labels = [x[1] for x in eval_plda_trial_output]

In [None]:
def reduce_scores(score_list, fusion_type="avg", feat_fn=None):
    if not feat_fn:
        if fusion_type =="avg":
            return np.concatenate([score.mean(0) for score in score_list], axis=0)
        elif fusion_type == "avg_std":
            return np.concatenate([score.mean(0)+score.std(0) for score in score_list], axis=0)
        elif fusion_type == "avg_max":
            return np.concatenate([score.mean(0)+score.max(0) for score in score_list], axis=0)
        else:
            raise NotImplementedError
    else:
        return np.concatenate([feat_fn(score) for score in score_list], axis=0)

In [None]:
test_adapt_scores = np.concatenate([reduce_scores(x, fusion_type="avg_max") for x in eval_plda_score_list], axis=0)
test_labels = np.concatenate(eval_plda_labels)
print(compute_eer(test_adapt_scores, test_labels))

In [None]:
# LR train
from sklearn.linear_model import LogisticRegressionCV

test_adapt_scores = np.concatenate([reduce_scores(x) for x in eval_plda_score_list], axis=0)
test_feat = np.concatenate([reduce_scores(x, feat_fn=get_features) for x in eval_plda_score_list], axis=0)
test_labels = np.concatenate(eval_plda_labels)
lr_clf = pickle.load(open("trials/dev317_eval934/dev_random_enr20_spk10_gender/plda_nf4_bal_lr_clf.pkl", "rb"))
svm_clf = pickle.load(open("trials/dev317_eval934/dev_random_enr20_spk10_gender/plda_nf4_bal_svm_clf.pkl", "rb"))

print(compute_eer(test_adapt_scores, test_labels))
print(compute_eer(lr_clf.decision_function(test_feat), test_labels))
print(compute_eer(svm_clf.decision_function(test_feat), test_labels))

In [None]:
score_fusion_eers = []
feat_eers = []
for scores, label in zip(eval_plda_score_list, eval_plda_labels):
    score_fusion = reduce_scores(scores)
    feat_score = lr_clf.decision_function(reduce_scores(scores, get_features))
    score_fusion_eers.append(compute_eer(score_fusion, label)[0])
    feat_eers.append(compute_eer(feat_score, label)[0])

eer_diff = np.array(feat_eers) - np.array(score_fusion_eers)
avg_feat_eer = np.mean(feat_eers)
avg_fusion_eer = np.mean(score_fusion_eers)

n_better = np.count_nonzero(eer_diff < 0)
n_worse = np.count_nonzero(eer_diff > 0)
n_equal = np.count_nonzero(eer_diff == 0)
print("n_better:{}, n_worse:{}, n_equal:{}".format(n_better, n_worse,  n_equal))
print("avg_fusion_eer:{:.5f}, avg_feat_eer:{:.5f}".format(avg_fusion_eer, avg_feat_eer))

In [None]:
better_trial_idx = np.nonzero(eer_diff < 0)[0]

better_n_adapt = []
better_adapt_time = []
for idx in better_trial_idx:
    adapt_rec = eval_plda_trial_output[idx][-1]
    n_adapt = len(adapt_rec)
    avg_adapt_time = adapt_rec.mean()
    better_n_adapt.append(n_adapt)
    if n_adapt > 0:
        better_adapt_time.append(avg_adapt_time)

In [None]:
np.mean(better_n_adapt)

In [None]:
np.mean(better_adapt_time)

In [None]:
worse_trial_idx = np.nonzero(eer_diff > 0)[0]

worse_n_adapt = []
worse_adapt_time = []
for idx in worse_trial_idx:
    adapt_rec = eval_plda_trial_output[idx][-1]
    n_adapt = len(adapt_rec)
    avg_adapt_time = adapt_rec.mean()
    worse_n_adapt.append(n_adapt)
    if n_adapt > 0:
        worse_adapt_time.append(avg_adapt_time)

In [None]:
np.mean(worse_n_adapt)

In [None]:
np.mean(worse_adapt_time)