# known_speaker_information

SI network에는 학습에 사용되었던 speaker들의 정보가 녹아져있다.

특히 마지막 linear layer는 각 speaker에 대한 agent들로 이루어져 있다.

과연 새로운 unknown speaker가 나타났을 때 기존 speaker들과 어떤 연관이 있을 수 있을까?

------

기존 speaker와의 연관성은 그냥 logit을 구하면 나온다. 

logit의 값들이 학습에 사용된 스피커와의 연관성이라고 생각하면된다.  

*ResNet34_v4_softmax_best.pth.tar* 기준으로 embeding layer로는 EER이 5.35

마지막 logit으로 구했을 때는 5.72로 나빠진다.

-----

sv_set에 있는 speaker들은 logit을 구했을떄 max 값을 가지는 speaker들의 수가 많다.

![img](https://trello-attachments.s3.amazonaws.com/5bac5ccd23298141e2fcbedc/5bab882710e5cc1022669618/dca15a2b2de87efd907a20546bf606b3/image.png)

반대로 si_set의 validation set에 대해서는 적다.

![img2](https://trello-attachments.s3.amazonaws.com/5bac5ccd23298141e2fcbedc/5bab882710e5cc1022669618/0c6f83113718cbef5c29d58dbeed1a60/image.png)

그리고 sv embedding들을 last_weight(마지막 fc layer)와 cosine similarity를 구했을 때
대단히 낮은 값들이 나왔다.

즉 close-set의 스피커들은 hard negative sample이 될 수 없다.

![image.png](https://trello-attachments.s3.amazonaws.com/5bac5ccd23298141e2fcbedc/5bab882710e5cc1022669618/995fade5b9af23ec605e0d609e236714/image.png)

## Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

### Configuration

In [200]:
from sv_system.utils.parser import set_train_config
import easydict

# datasets
# voxc1_fbank_xvector
# gcommand_fbank_xvector

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector",
                              input_frames=800, splice_frames=[300, 800], stride_frames=1, input_format='fbank',
                              cuda=True,
                              lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="ResNet34_v4", loss="softmax",
                              n_epochs=50
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [201]:
from sv_system.data.data_utils import find_dataset, find_trial

_, datasets = find_dataset(config, basedir='../')
trial = find_trial(config, basedir='../')

In [202]:
from sv_system.data.dataloader import init_loaders

dataloaders = init_loaders(config, datasets)

### Model Load

In [203]:
from sv_system.model.model_utils import find_model
config['n_labels'] = 1260
model = find_model(config)

In [204]:
if not config['no_cuda']:
    model = model.cuda()

In [205]:
import torch
saved_model = torch.load("../best_models/voxc1/ResNet34_v4_softmax/ResNet34_v4_softmax_best.pth.tar")

In [164]:
saved_model["best_metric"]

0.04956873602385262

In [206]:
model.load_state_dict(saved_model['state_dict'])

In [188]:
last_w = model.output.weight.cpu().detach().t()

### Test Embeddings

In [175]:
from sv_system.train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion, optimizer = find_optimizer(config, model)

In [176]:
if not config['no_eer']:
    train_loader, val_loader, test_loader, sv_loader = dataloaders
else:
    train_loader, val_loader, test_loader = dataloaders

In [210]:
import torch.nn.functional as F

def embeds_utterance(config, val_dataloader, model):
    embeddings = []
    labels = []
    model.eval()

    with torch.no_grad():
        for batch in val_dataloader:
            X, y = batch
            if not config['no_cuda']:
                X = X.cuda()
                
            model_output = model.embed(X).cpu().detach()
            embeddings.append(model_output)
            labels.append(y.numpy())
        embeddings = torch.cat(embeddings)
        labels = np.hstack(labels)
    return embeddings, labels 

In [57]:
val_embeddings, labels = embeds_utterance(config, val_loader, model)
test_embeddings, labels = embeds_utterance(config, sv_loader, model)

In [24]:
torch.save(test_embeddings, open("../best_models/voxc1/ResNet34_v3_angular/test_embeddings.pkl", "wb"))

In [148]:
lastw_sim = torch.matmul(test_embeddings, last_w.t()).numpy()
# lastw_sim = F.cosine_similarity(test_embeddings.unsqueeze(1), last_w.unsqueeze(0), dim=2).numpy()

In [149]:
sim_df = pd.DataFrame(lastw_sim)

In [150]:
sv_df = pd.read_pickle("../dataset/dataframes/voxc1/sv_voxc_dataframe.pkl")

In [151]:
sim_df['spk'] = sv_df['spk'].tolist()

In [214]:
sim_df.groupby("spk").idxmax(axis=1).groupby('spk').unique()
# sim_df.groupby("spk").idxmax(axis=1).groupby("spk").apply(lambda x: x.value_counts())

spk
Eartha_Kitt          [568, 746, 805, 1088, 276, 594, 167, 637, 303,...
Ed_Westwick          [321, 990, 442, 893, 1141, 313, 1129, 314, 284...
Eddie_Griffin        [904, 1089, 1060, 910, 936, 913, 863, 356, 965...
Eddie_Izzard         [767, 936, 951, 866, 139, 346, 3, 1089, 1140, ...
Eddie_Kaye_Thomas    [1027, 202, 989, 760, 345, 406, 923, 710, 520,...
Eddie_McClintock     [413, 453, 523, 1132, 1141, 745, 528, 955, 893...
Edgar_Wright         [1056, 139, 425, 635, 1111, 1142, 961, 953, 71...
Eduardo_Noriega      [293, 482, 1171, 119, 894, 1125, 938, 462, 300...
Edward_Asner         [1081, 95, 107, 711, 304, 115, 1014, 520, 479,...
Efren_Ramirez        [893, 590, 255, 523, 635, 719, 494, 287, 1034,...
Elaine_Cassidy       [36, 846, 645, 1209, 575, 292, 568, 832, 851, ...
Elaine_Hendrix       [146, 316, 393, 975, 779, 31, 1111, 998, 333, ...
Eleanor_Tomlinson    [562, 815, 779, 645, 846, 586, 110, 542, 339, ...
Eli_Roth             [1189, 52, 595, 678, 1056, 635, 980, 495, 363,...
El

### Compute EER

In [211]:
from sklearn.metrics import roc_curve
def sv_test(config, sv_loader, model, trial):
        if isinstance(model, torch.nn.DataParallel):
            model_t = model.module
        else:
            model_t = model

        embeddings, _ = embeds_utterance(config, sv_loader, model_t)
        sim_matrix = F.cosine_similarity(
                embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
        cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
        score_vector = sim_matrix[cord].numpy()
        label_vector = np.array(trial.label)
        fpr, tpr, thres = roc_curve(
                label_vector, score_vector, pos_label=1)
        eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

        return eer, label_vector, score_vector

In [212]:
from sv_system.data.data_utils import find_trial
trial = find_trial(config, basedir="../")

In [213]:
sv_test(config, sv_loader, model, trial)

(0.053561920988180176,
 array([1, 0, 1, ..., 0, 1, 0]),
 array([0.8261922, 0.5786556, 0.8692004, ..., 0.5851219, 0.8255653,
        0.6215607], dtype=float32))