score_calibration
------------------------

화자의 추가 정보를 이용해서 score를 조정하여 특정 화자에 대해 성능이 좋아지게하려고 한다.

Private_LDA
------------------------


 


## Environment

In [3]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [4]:
sys.path.append('/host/projects/sv_experiments/sv_system/')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [5]:
def key2df(keys, delimeter="-"):
    key_df = pd.DataFrame(keys, columns=['key'])
    key_df['spk'] = key_df.key.apply(lambda x: x.split(delimeter)[0])
    key_df['label'] = key_df.groupby('spk').ngroup()
    key_df['origin'] = key_df.spk.apply(lambda x: 'voxc2' if x.startswith('id') else 'voxc1')
    
    return key_df

## Local LDA  implementaion

In [6]:
import torch
from torch.nn.functional import cosine_similarity

def euc_dist(a, b):
    return ((a * a) + (b * b) - b * a).sum(1)

def cos_dist(a, b):
    a = a / np.linalg.norm(a, axis=1, keepdims=True)
    b = b / np.linalg.norm(b, axis=1, keepdims=True)
    return -(a*b).sum(1)

def cos_sim_batch(a, b, axis=2):
    a = torch.from_numpy(a)
    b = torch.from_numpy(b)
    
    return cosine_similarity(a, b, dim=axis).numpy()

In [7]:
## centering embeds

def train_lda_model(key_df, embeds, lda_dim=200):
    spks = sorted(key_df.spk.unique().tolist())
    overall_mean = embeds.mean(0)
#     embeds_centered = embeds - overall_mean
    embeds_centered = embeds

    mean_vectors = []
    for spk in spks:
        mean_vectors.append(embeds_centered[key_df[key_df.spk == spk].index].mean(0))

    # within-class scatter matrix Sw
    S_W = np.zeros([512, 512])

    for spk, cl_mean in zip(spks, mean_vectors):
        cl_embeds = embeds_centered[key_df[key_df.spk == spk].index]
        x_m = cl_embeds - cl_mean.reshape(1, 512)
        x_m = np.expand_dims(x_m, -1)
        cl_sc = np.matmul(x_m, x_m.transpose([0,2,1])).sum(0)
        S_W += cl_sc

    # between-class scatter matrix SB
    S_B = np.zeros([512, 512])

    for spk, cl_mean in zip(spks, mean_vectors):
        ovm_m = np.expand_dims(cl_mean, -1) - np.expand_dims(overall_mean, -1)
        ns = len(key_df[key_df.spk == spk])
        S_B += ns*ovm_m.dot(ovm_m.T)
    
    eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
    # Make a list of (eigenvalue, eigenvector) tuples
    eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
    # Sort the (eigenvalue, eigenvector) tuples from high to low
    eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True)
    W = np.hstack([eig_pairs[i][1].reshape(512,1) for i in range(lda_dim)])

    return W

In [8]:
# # local LDA
# # modify the bewenn-class scatter matrix SB --> PSB

# imposter_means = dict()

# for spk, cl_mean in zip(si_spks, mean_vectors):
#     cl_embeds = si_embeds_centered[si_key_df[si_key_df.spk == spk].index]
#     out_embeds = si_embeds_centered[si_key_df[si_key_df.spk != spk].index]
#     out_distances = cos_dist(cl_mean.reshape(1,512), out_embeds)
#     max_dist_in_samples = cos_dist(cl_mean.reshape(1,512), cl_embeds).max()
#     out_embeds = out_embeds[out_distances < max_dist_in_samples]
#     if len(out_embeds) > 1000:
#         imposter_mean = out_embeds.mean(0)
#     else:
#         imposter_mean = si_overall_mean
#     imposter_means[spk] = imposter_mean

In [9]:
# PS_B = np.zeros([512, 512])
# for spk, cl_mean in zip(si_spks, mean_vectors):    
#     ovm_m = cl_mean.reshape(512, 1) - imposter_means[spk].reshape(512, 1)
#     ns = len(si_key_df[si_key_df.spk == spk])
#     PS_B += ns*ovm_m.dot(ovm_m.T)

## Embeddings

In [10]:
def search_embeds(key_df, spks, embeds):
    key_df_for_spks = key_df[key_df.spk.isin(spks)]
    embeds_for_spks = embeds[key_df_for_spks.index]
    
    return key_df_for_spks.reset_index(), embeds_for_spks

In [57]:
si_keys = pickle.load(open("/host/projects/sv_experiments/sv_system/embeddings/voxc1/voxc1_mfcc30_best/si_keys.pkl", "rb"))
si_embeds = np.load("/host/projects/sv_experiments/sv_system/embeddings/voxc1/voxc1_mfcc30_best/si_embeds.npy")

sv_keys = pickle.load(open("/host/projects/sv_experiments/sv_system/embeddings/voxc1/voxc1_mfcc30_best/sv_keys.pkl", "rb"))
sv_embeds = np.load("/host/projects/sv_experiments/sv_system/embeddings/voxc1//voxc1_mfcc30_best/sv_embeds.npy")
 
trial = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_test_trial.pkl")

In [58]:
si_key_df = key2df(si_keys)
sv_key_df = key2df(sv_keys)

si_spks = sorted(si_key_df.spk.unique().tolist())
sv_spks = sorted(sv_key_df.spk.unique().tolist())

si_spks_array = np.array(si_spks)

In [14]:
# embed_mean for each speaker
si_spk_embed_means = []
for spk in si_spks:
    spk_embed =  si_embeds[si_key_df[si_key_df.spk == spk].index]
    spk_embed_mean  = spk_embed.mean(0)
    si_spk_embed_means.append(spk_embed_mean)

si_spk_embed_means = np.array(si_spk_embed_means)

In [15]:
# embed_mean for each speaker
sv_spk_embed_means = []
for spk in sv_spks:
    spk_embed =  sv_embeds[sv_key_df[sv_key_df.spk == spk].index]
    spk_embed_mean  = spk_embed.mean(0)
    sv_spk_embed_means.append(spk_embed_mean)

sv_spk_embed_means = np.array(sv_spk_embed_means)

In [16]:
sv_si_sim = cos_sim_batch(np.expand_dims(sv_spk_embed_means, 1), np.expand_dims(si_spk_embed_means, 0), axis=2)
sorted_close_spks = np.argsort(sv_si_sim, axis=1)

In [18]:
sv_inner_lowest_sim = []

for spk in sv_spks:
    spk_embed = sv_embeds[sv_key_df[sv_key_df.spk == spk].index]
    spk_embed_mean = spk_embed.mean(0, keepdims=True)
    inner_sim = cos_sim_batch(spk_embed_mean, 
                              spk_embed, axis=1)
    sv_inner_lowest_sim.append(inner_sim.min())

In [19]:
sv_close_si_spks = dict()

for i, spk in enumerate(sv_spks):
    close_spks = si_spks_array[sv_si_sim[i] > sv_inner_lowest_sim[i]]
    sv_close_si_spks[spk] = close_spks.tolist()    

In [94]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

clf = LDA(solver='svd', n_components=200)
clf.fit(si_embeds, si_key_df.label)

LinearDiscriminantAnalysis(n_components=200, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [95]:
overall_LDA = clf

In [96]:
lda_embeds = overall_LDA.transform(sv_embeds)

In [97]:
from eval.score_utils import compute_eer
scores = cos_sim_batch(lda_embeds[trial.enrolment_id], lda_embeds[trial.test_id], axis=1)
compute_eer(scores[trial.label == 1], scores[trial.label == 0])

eer:8.282% at threshold 0.1341


(0.08282078472958643, 0.1340561357874964)

### Baseline - average enrolled embeddings

In [124]:
enrolled_spks_df = sv_key_df.groupby('spk').apply(lambda x: x.sample(n=20))

In [125]:
enrolled_uttrs = dict()
enrolled_aveg_embeds = []
for spk in sv_spks:
    enrolled_uttrs[spk] = enrolled_spks_df.loc[spk].key.tolist()
    enrolled_aveg_embeds.append(lda_embeds[sv_key_df[sv_key_df.key.isin(enrolled_uttrs[spk])].index].mean(0))
enrolled_aveg_embeds = np.array(enrolled_aveg_embeds)

In [126]:
all_enrolled_uttrs = np.array([v for v in enrolled_uttrs.values()]).flatten()

In [127]:
filtered_trial = trial[~trial.test_idx.isin(all_enrolled_uttrs)]

In [128]:
filtered_trial['enrolment_id'] = filtered_trial.enroll_spk.apply(lambda x: sv_spks.index(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [129]:
from eval.score_utils import compute_eer
scores = cos_sim_batch(enrolled_aveg_embeds[filtered_trial.enrolment_id], 
                       lda_embeds[filtered_trial.test_id], axis=1)
compute_eer(scores[filtered_trial.label == 1], scores[filtered_trial.label == 0])

eer:1.618% at threshold 0.2766


(0.016183283619978422, 0.2765653736591283)

In [130]:
from eval.score_utils import compute_eer
scores = cos_sim_batch(lda_embeds[trial.enrolment_id], lda_embeds[trial.test_id], axis=1)
compute_eer(scores[trial.label == 1], scores[trial.label == 0])

eer:8.282% at threshold 0.1341


(0.08282078472958643, 0.1340561357874964)