score_calibration
------------------------

Gender dependent embedding
------------------------

각 gender 정보만 가지고 학습된 모델의 embedding이 각 gender에 대해 더 좋은 성능을 내는지 보자.

 


## Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('/host/projects/sv_experiments/sv_system/')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [3]:
def key2df(keys, delimeter="-"):
    key_df = pd.DataFrame(keys, columns=['key'])
    key_df['spk'] = key_df.key.apply(lambda x: x.split(delimeter)[0])
    key_df['label'] = key_df.groupby('spk').ngroup()
    key_df['origin'] = key_df.spk.apply(lambda x: 'voxc2' if x.startswith('id') else 'voxc1')
    
    return key_df

### SCORES

In [None]:
def read_score(score_path, trial):
    score = pd.read_csv(score_path, 
                    sep=' ', header=None)
    score.columns = ['enroll', 'test', 'score']
    score['label'] = trial.label
    
    return score

In [None]:
trial = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_f_sv_test_dataframe.pkl")
voxc1_meta = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc1_meta.pkl")

In [None]:
trial['enroll_gender'] = trial.enroll_spk.apply(lambda x: voxc1_meta.loc[x].Gender)

In [None]:
trial['test_gender'] = trial.test_spk.apply(lambda x: voxc1_meta.loc[x].Gender)

In [None]:
equal_gender_trial = trial[trial.apply(lambda x: x.enroll_gender == x.test_gender, axis=1)]

In [None]:
ff_trial = trial[(trial.enroll_gender == 'f') & (trial.test_gender == 'f')]
# mm_trial = trial[(trial.enroll_gender == 'm') & (trial.test_gender == 'm')]

In [None]:
cosine_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_mfcc30_best/scores/cosine_scores",
                            trial)
lda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_mfcc30_best/scores/lda_scores",
                        trial)
plda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_mfcc30_best/scores/plda_scores",
                         trial)

In [None]:
m_cosine_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_m_mfcc30_best/scores/cosine_scores",
                            trial)
m_lda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_m_mfcc30_best/scores/lda_scores",
                        trial)
m_plda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_m_mfcc30_best/scores/plda_scores",
                         trial)

In [None]:
f_cosine_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_f_mfcc30_best/scores/cosine_scores",
                           trial)
f_lda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_f_mfcc30_best/scores/lda_scores",
                        trial)
f_plda_score = read_score("/host/projects/sv_experiments/sv_system/voxc1_f_mfcc30_best/scores/plda_scores",
                         trial)

In [None]:
fig, axes = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(20, 15))
fig.suptitle("gender_independet-trained")
# fig.tight_layout()

axes[0,0].set_title("cosine_entire")
axes[1,0].set_title("cosine_mm")
axes[2,0].set_title("cosine_ff")

cosine_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,0])
cosine_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,0])
cosine_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,0])

axes[0,1].set_title("lda_entire")
axes[1,1].set_title("lda_mm")
axes[2,1].set_title("lda_ff")

lda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,1])
lda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,1])
lda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,1])

axes[0,2].set_title("plda_entire")
axes[1,2].set_title("plda_mm")
axes[2,2].set_title("plda_ff")

plda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,2])
plda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,2])
plda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,2])


plt.show()

In [None]:
fig, axes = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(20, 15))
fig.suptitle("male-trained")
# fig.tight_layout()

axes[0,0].set_title("cosine_entire")
axes[1,0].set_title("cosine_mm")
axes[2,0].set_title("cosine_ff")

m_cosine_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,0])
m_cosine_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,0])
m_cosine_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,0])

axes[0,1].set_title("lda_entire")
axes[1,1].set_title("lda_mm")
axes[2,1].set_title("lda_ff")

m_lda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,1])
m_lda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,1])
m_lda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,1])

axes[0,2].set_title("plda_entire")
axes[1,2].set_title("plda_mm")
axes[2,2].set_title("plda_ff")

m_plda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,2])
m_plda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,2])
m_plda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,2])


plt.show()

In [None]:
fig, axes = plt.subplots(3, 3, sharex=True, sharey=True, figsize=(20, 15))
fig.suptitle("female-trained")

axes[0,0].set_title("cosine_entire")
axes[1,0].set_title("cosine_mm")
axes[2,0].set_title("cosine_ff")

f_cosine_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,0])
f_cosine_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,0])
f_cosine_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,0])

axes[0,1].set_title("lda_entire")
axes[1,1].set_title("lda_mm")
axes[2,1].set_title("lda_ff")

f_lda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,1])
f_lda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,1])
f_lda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,1])

axes[0,2].set_title("plda_entire")
axes[1,2].set_title("plda_mm")
axes[2,2].set_title("plda_ff")

f_plda_score.groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[0,2])
f_plda_score.iloc[mm_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[1,2])
f_plda_score.iloc[ff_trial.index].groupby('label').score.hist(bins=100, alpha=0.5, ax=axes[2,2])

plt.show()

## Gender classification

In [4]:
import torch
from torch.nn.functional import cosine_similarity

def euc_dist(a, b):
    return ((a * a) + (b * b) - b * a).sum(1)

def cos_dist(a, b):
    a = a / np.linalg.norm(a, axis=1, keepdims=True)
    b = b / np.linalg.norm(b, axis=1, keepdims=True)
    return -(a*b).sum(1)

def cos_sim_batch(a, b, axis=2):
    a = torch.from_numpy(a)
    b = torch.from_numpy(b)
    
    return cosine_similarity(a, b, dim=axis).numpy()

In [249]:
voxc1_train_df = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc1_si_train_dataframe.pkl")
voxc1_test_df = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_sv_test_dataframe.pkl")

In [259]:
si_keys = pickle.load(open("/host/projects/sv_experiments/sv_system/embeddings/voxc1/voxc1_mfcc30_best/si_keys.pkl", "rb"))
si_embeds = np.load("/host/projects/sv_experiments/sv_system//embeddings/voxc1/voxc1_mfcc30_best/si_embeds.npy")

sv_keys = pickle.load(open("/host/projects/sv_experiments/sv_system//embeddings/voxc1/voxc1_mfcc30_best/sv_keys.pkl", "rb"))
sv_embeds = np.load("/host/projects/sv_experiments/sv_system//embeddings/voxc1/voxc1_mfcc30_best/sv_embeds.npy")

In [260]:
si_key_df = key2df(si_keys)
sv_key_df = key2df(sv_keys)

In [261]:
trial = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_test_trial.pkl")
voxc1_meta = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc1_meta.pkl")

# trial['enroll_gender'] = trial.enroll_spk.apply(lambda x: voxc1_meta.loc[x].Gender)
# trial['test_gender'] = trial.test_spk.apply(lambda x: voxc1_meta.loc[x].Gender)
# trial.enrolment_id = trial.enroll_idx.apply(lambda x: sv_keys.index(x))
# trial.test_id = trial.test_idx.apply(lambda x: sv_keys.index(x))

In [227]:
# trial.to_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_sv_test_dataframe.pkl")

In [228]:
si_spks = si_key_df.spk.unique().tolist()
si_spks_ar = np.array(si_spks)
sv_spks = sv_key_df.spk.unique().tolist()
sv_spks_ar = np.array(sv_spks)

In [229]:
m_si_embeds = si_embeds[np.nonzero(voxc1_train_df.gender == 'm')]
f_si_embeds = si_embeds[np.nonzero(voxc1_train_df.gender == 'f')]

In [230]:
m_sv_embeds = sv_embeds[np.nonzero(voxc1_test_df.gender == 'm')]
f_sv_embeds = sv_embeds[np.nonzero(voxc1_test_df.gender == 'f')]

### Gender-independet and dependent models

In [231]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

si_embeds_mean = si_embeds.mean(0)
global_clf = LDA(solver='svd', n_components=200)
global_clf.fit(si_embeds - si_embeds_mean, si_key_df.label)

LinearDiscriminantAnalysis(n_components=200, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [232]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

m_si_embeds_mean = m_si_embeds.mean(0)
male_clf = LDA(solver='svd', n_components=200)
male_clf.fit(m_si_embeds-m_si_embeds_mean, si_key_df.iloc[np.nonzero(voxc1_train_df.gender == 'm')].label)

LinearDiscriminantAnalysis(n_components=200, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [233]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

f_si_embeds_mean = f_si_embeds.mean(0)
female_clf = LDA(solver='svd', n_components=200)
female_clf.fit(f_si_embeds-f_si_embeds_mean, si_key_df.iloc[np.nonzero(voxc1_train_df.gender == 'f')].label)



LinearDiscriminantAnalysis(n_components=200, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

### Gender Calssification

In [234]:
# embed_mean for each speaker
si_spk_embed_means = []
for spk in si_spks:
    spk_embed =  si_embeds[si_key_df[si_key_df.spk == spk].index]
    spk_embed_mean  = spk_embed.mean(0)
    si_spk_embed_means.append(spk_embed_mean)

si_spk_embed_means = np.array(si_spk_embed_means)

In [235]:
sv_si_sim = cos_sim_batch(np.expand_dims(sv_embeds, 1), np.expand_dims(si_spk_embed_means, 0), axis=2)
sorted_close_spks = np.argsort(sv_si_sim, axis=1)

In [236]:
gender_output = []
k_near = 10
# for i in range(len(sorted_close_spks)):
#     gender_label = voxc1_meta.loc[si_spks_ar[sorted_close_spks[i,-k_near:]]].Gender
#     n_male = np.count_nonzero(gender_label == 'm')
#     n_female = np.count_nonzero(gender_label == 'f')
# #     print(f"male:{n_male}, female:{n_female}")
#     if n_male > k_near-1:
#         gender_output.append('f')
#     elif n_female > k_near-1:
#         gender_output.append('m')
#     else:
#         gender_output.append('g')
        
gender_output = []
for i in range(len(sorted_close_spks)):
    gender_label = voxc1_meta.loc[si_spks_ar[sorted_close_spks[i,-k_near:]]].Gender
    n_male = np.count_nonzero(gender_label == 'm')
    n_female = np.count_nonzero(gender_label == 'f')
#     print(f"male:{n_male}, female:{n_female}")
    if n_male > n_female:
        gender_output.append('m')        
    else:
        gender_output.append('f')
        
gender_clf_output = np.array(gender_output)        

### Split trials

In [237]:
label_m_trial = trial[(trial.enroll_gender == 'm') & (trial.test_gender == 'm')]
label_f_trial = trial[(trial.enroll_gender == 'f') & (trial.test_gender == 'f')]

In [238]:
trial['clf_enroll_gender'] = trial.enroll_idx.apply(lambda x: gender_clf_output[sv_keys.index(x)])
trial['clf_test_gender'] = trial.test_idx.apply(lambda x: gender_clf_output[sv_keys.index(x)])

clf_m_trial = trial[(trial.enroll_gender == 'm') & (trial.clf_test_gender == 'm')]
clf_f_trial = trial[(trial.enroll_gender == 'f') & (trial.clf_test_gender == 'f')]
clf_g_trial = trial.drop(index=clf_m_trial.index.tolist() + clf_f_trial.index.tolist())

### Apply LDA model

In [243]:
# gender-independent 

global_lda_embeds = global_clf.transform(sv_embeds - si_embeds_mean)

from eval.score_utils import compute_eer
scores = cos_sim_batch(global_lda_embeds[trial.enrolment_id], 
                       global_lda_embeds[trial.test_id], axis=1)

m_scores = scores[label_m_trial.index]
f_scores = scores[label_f_trial.index]

print("global(gender-independent)")
print("=================================")
compute_eer(scores[trial.label == 1], scores[trial.label == 0])
print("<gender-dependent thresholed>")
m_eer, _ = compute_eer(m_scores[label_m_trial.label == 1], m_scores[label_m_trial.label == 0])
f_eer, _ = compute_eer(f_scores[label_f_trial.label == 1], f_scores[label_f_trial.label == 0])

global(gender-independent)
eer:14.180% at threshold 0.1765
<gender-dependent thresholed>
eer:11.552% at threshold 0.1485
eer:9.009% at threshold 0.2290


In [242]:
# gender-dependent based on label

m_output_idx = np.nonzero(voxc1_test_df.gender == 'm')
m_embeds = male_clf.transform(sv_embeds[m_output_idx] - m_si_embeds_mean)

f_output_idx = np.nonzero(voxc1_test_df.gender == 'f')
f_embeds = female_clf.transform(sv_embeds[f_output_idx] - f_si_embeds_mean)

label_lda_embeds = global_lda_embeds.copy()
label_lda_embeds[m_output_idx] = m_embeds
label_lda_embeds[f_output_idx] = f_embeds

from eval.score_utils import compute_eer
scores = cos_sim_batch(label_lda_embeds[trial.enrolment_id], 
                       label_lda_embeds[trial.test_id], axis=1)

m_scores = scores[label_m_trial.index]
f_scores = scores[label_f_trial.index]

print("optimal gender-dependent")
print("=================================")
print("<unified thresholed>")
compute_eer(scores[trial.label == 1], scores[trial.label == 0])
print("<gender-dependent thresholed>")
m_eer, _ = compute_eer(m_scores[label_m_trial.label == 1], m_scores[label_m_trial.label == 0])
f_eer, _ = compute_eer(f_scores[label_f_trial.label == 1], f_scores[label_f_trial.label == 0])

optimal gender-dependent
<unified thresholed>
eer:10.970% at threshold 0.1041
<gender-dependent thresholed>
eer:11.040% at threshold 0.0988
eer:9.099% at threshold 0.1187


In [244]:
# gender-dependent based on classification

m_output_idx = np.nonzero(gender_clf_output == 'm')
m_embeds = male_clf.transform(sv_embeds[m_output_idx] - m_si_embeds_mean)

f_output_idx = np.nonzero(gender_clf_output == 'f')
f_embeds = male_clf.transform(sv_embeds[f_output_idx] - f_si_embeds_mean)

# g_output_idx = np.nonzero(gender_clf_output == 'g')
# g_embeds = global_clf.transform(sv_embeds[g_output_idx] - si_embeds_mean)

clf_lda_embeds = global_lda_embeds.copy() # for 'g' label we set global value
# clf_lda_embeds = np.zeros((len(sv_embeds), 200))
clf_lda_embeds[m_output_idx] = m_embeds 
clf_lda_embeds[f_output_idx] = f_embeds
# clf_lda_embeds[g_output_idx] = g_embeds

from eval.score_utils import compute_eer
scores = cos_sim_batch(clf_lda_embeds[trial.enrolment_id], 
                       clf_lda_embeds[trial.test_id], axis=1)

m_scores = scores[clf_m_trial.index]
f_scores = scores[clf_f_trial.index]
g_scores = scores[clf_g_trial.index]

print("pratical gender-dependent")
print("=================================")
print("<unified thresholed>")
compute_eer(scores[trial.label == 1], scores[trial.label == 0])
print("<gender-dependent thresholed>")
m_eer, _ = compute_eer(m_scores[clf_m_trial.label == 1], m_scores[clf_m_trial.label == 0])
f_eer, _ = compute_eer(f_scores[clf_f_trial.label == 1], f_scores[clf_f_trial.label == 0])
# g_eer, _ = compute_eer(g_scores[clf_g_trial.label == 1], g_scores[clf_g_trial.label == 0])

pratical gender-dependent
<unified thresholed>
eer:13.490% at threshold 0.1184
<gender-dependent thresholed>
eer:11.049% at threshold 0.0984
eer:11.580% at threshold 0.1596


# Baseline - average enrolled embeddings

In [None]:
enrolled_spks_df = sv_key_df.groupby('spk').apply(lambda x: x.sample(n=20))

In [None]:
enrolled_uttrs = dict()
enrolled_aveg_embeds = []
for spk in sv_spks:
    enrolled_uttrs[spk] = enrolled_spks_df.loc[spk].key.tolist()
    enrolled_aveg_embeds.append(lda_embeds[sv_key_df[sv_key_df.key.isin(enrolled_uttrs[spk])].index].mean(0))
enrolled_aveg_embeds = np.array(enrolled_aveg_embeds)

In [None]:
all_enrolled_uttrs = np.array([v for v in enrolled_uttrs.values()]).flatten()

In [None]:
filtered_trial = trial[~trial.test_idx.isin(all_enrolled_uttrs)]

In [None]:
filtered_trial['enrolment_id'] = filtered_trial.enroll_spk.apply(lambda x: sv_spks.index(x))

In [None]:
from eval.score_utils import compute_eer
scores = cos_sim_batch(enrolled_aveg_embeds[filtered_trial.enrolment_id], 
                       lda_embeds[filtered_trial.test_id], axis=1)
compute_eer(scores[filtered_trial.label == 1], scores[filtered_trial.label == 0])

In [None]:
from eval.score_utils import compute_eer
scores = cos_sim_batch(lda_embeds[trial.enrolment_id], lda_embeds[trial.test_id], axis=1)
compute_eer(scores[trial.label == 1], scores[trial.label == 0])