In [78]:
%load_ext autoreload
%autoreload 2
%pylab inline
import pandas as pd
import os
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib


In [79]:
from sv_system.data.dataloader import init_default_loader
from sv_system.utils.parser import get_sv_parser
from sv_system.train.si_train import set_seed
from tqdm import tqdm_notebook as tqdm
import torch.nn.functional as F

In [80]:
options = get_sv_parser().parse_args(args=[])
options.n_dct_filters = 40
options.n_mels = 40
options.timeshift_ms = 100
options.data_folder = "/home/muncok/DL/dataset/SV_sets"
options.window_size= 0.025
options.window_stride= 0.010
options.cache_size = 32768

In [81]:
from sv_system.utils import secToFrames, secToSample
options.input_format = "fbank"
options.input_clip = True
options.input_length = secToSample(3) # if input_clip is false, it doesn't affect anything
options.input_frames = secToFrames(3)
options.splice_frames = secToFrames(0.1)  # it is for extractor

In [82]:
options.data_folder = "/home/muncok/DL/dataset/SV_sets/reddots_r2015q4_v1/wav/"

### SI_Model

In [119]:
from sv_system.model.TDNN import TdnnModel
from sv_system.model.SpeechModel import SpeechResModel
import pickle
# model = TdnnModel(vars(options), 70, embed_mode=True)
# model.load("models/reddots/si_train/si_reddots_tdnnfc_3s_0.1s_mean.pt")
model = SpeechResModel("res15", 1260)
model.load("models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_fbank.pt")
model.cuda()
# lda = pickle.load(open("models/lda/si_reddots_0.2s_random_2_lda.pkl", "rb"))
lda = None

loaded from models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_fbank.pt


###  Reddots Trial

In [133]:
import torch
from torch.autograd import Variable
from tqdm import tqdm_notebook
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

def lda_on_tensor(tensor, lda):
    return torch.from_numpy(lda.transform(tensor.numpy()).astype(np.float32))

def embeds_utterance(opt, val_dataloader, model, lda=None):
    val_iter = iter(val_dataloader)
    model.eval()
    splice_dim = opt.input_frames
    embeddings = []
    labels = []
    for batch in (val_iter):
        x, y = batch
        time_dim = x.size(2)
        split_points = range(0, time_dim-splice_dim+1, splice_dim//2)
        model_outputs = []
        for point in split_points:
            x_in = Variable(x.narrow(2, point, splice_dim))
            if opt.cuda:
                x_in = x_in.cuda()
            model_outputs.append(model.embed(x_in).cpu().data)
#         print(len(model_outputs))
        model_output = torch.stack(model_outputs, dim=0)
        model_output = model_output.mean(0)
        if lda is not None:
            model_output = torch.from_numpy(lda.transform(model_output.numpy()).astype(np.float32))
        embeddings.append(model_output)
        labels.append(y.numpy())
    embeddings = torch.cat(embeddings)
    labels = np.hstack(labels)
    return embeddings, labels

In [104]:
def embeds_one(opt, val_dataloader, model, lda=None):
    val_iter = iter(val_dataloader)
    model.eval()
    model
    embeddings = []
    labels = []
    for batch in (val_iter):
        x, y = batch
        if opt.cuda:
            x = x.cuda()
        model_output = model(x)
        embeddings.append(model_output.cpu().detach())
        if lda is not None:
            model_output = torch.from_numpy(lda.transform(model_output.numpy()).astype(np.float32))
        labels.append(y.numpy())
    embeddings = torch.cat(embeddings)
    labels = np.hstack(labels)
    return embeddings, labels

In [105]:
ndx = pd.read_pickle("dataset/dataframes/Reddots/m_part1/m_part1_ndx.pkl")
trn = pd.read_pickle('dataset/dataframes/Reddots/m_part1/m_part1_trn.pkl')
# m_part4_ndx = pd.read_pickle("dataset/dataframes/Reddots/m_part4_tp/m_part4_tp_ndx.pkl")
# m_part4_trn = pd.read_pickle("dataset/dataframes/Reddots/m_part4_tp/m_part4_tp_trn.pkl")

In [106]:
err_type = {0:'TC', 1:'TW', 2:'IC', 3:'IW'}

###  Enrollment (trn)

In [125]:
from sv_system.data.dataset import SpeechDataset
dataset = SpeechDataset.read_df(vars(options), trn, "test")

In [126]:
options.batch_size = 64
options.num_workers = 16

In [135]:
val_dataloader = init_default_loader(vars(options), dataset, shuffle=False)
trn_embeddings, _ = embeds_utterance(options, val_dataloader, model, lda)

embed_dim = trn_embeddings.shape[-1]
trn_id = list(trn.id.unique())
spk_model_dict = {}
for id in trn_id:
    index = np.nonzero(trn.id == id)
    spk_model_dict[id] = trn_embeddings[index].mean(0, True)

In [128]:
spk_model_dict['m0001_31'].shape

torch.Size([1, 1260])

###  SV Scoring (ndx)

In [129]:
ndx_file =pd.DataFrame(ndx.file.unique().tolist(), columns=['file'])

In [130]:
dataset = SpeechDataset.read_df(vars(options), ndx_file, "test")

In [136]:
val_dataloader = init_default_loader(vars(options), dataset, shuffle=False) 
ndx_embeddings, _ = embeds_utterance(options, val_dataloader, model, lda)
# torch.save(ndx_embeddings, 'trials/reddots/m_part1/{}_embeds.pkl'.format(model_name))
# ndx_embeddings = torch.load('trials/reddots/m_part1/{}_embeds.pkl')

In [137]:
all_trials = ndx.id.unique().tolist()
scores = dict()
for t in range(4):
    scores[t] = []    

for trial_id in tqdm(all_trials):
    trial_ndx = ndx[(ndx.id == trial_id)].reset_index()
    trial_embed_idx = np.nonzero(ndx_file.file.isin(trial_ndx.file))
    trial_embeds = ndx_embeddings[trial_embed_idx]
    sim = F.cosine_similarity(trial_embeds, spk_model_dict[trial_id])
    for t in range(4):
        trial_type_idx = trial_ndx[trial_ndx.trial_type == t].index.tolist()
        scores[t].append(sim[trial_type_idx])
        
# [TC, TW, IC, IW]
for t in range(4):
    scores[t] = torch.cat(scores[t])

HBox(children=(IntProgress(value=0, max=320), HTML(value='')))




In [139]:
for t in range(4):
     print("{} mean:{:.2f}, std:{:.3f}".format(err_type[t], scores[t].mean(), scores[t].std()))

TC mean:0.92, std:0.081
TW mean:0.92, std:0.081
IC mean:0.92, std:0.078
IW mean:0.92, std:0.079


TD EERs

In [116]:
from sklearn.metrics import roc_curve


for t in range(1,4):
    score_vector = np.concatenate((scores[0], scores[t]))
    label_vector = np.concatenate((np.ones(len(scores[0])), 
                               np.zeros(len(scores[t]))))
    fpr, tpr, thres = roc_curve(label_vector, score_vector, pos_label=1)
    eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]
    thres = thres[np.nanargmin(np.abs(fpr - (1 - tpr)))]
    print("[{}] eer: {:.2f}, thres: {:.5f}".format(err_type[t], eer, thres))

[TW] eer: 0.41, thres: 0.88745
[IC] eer: 0.11, thres: 0.79204
[IW] eer: 0.10, thres: 0.78526


TI EERs

In [117]:
from sklearn.metrics import roc_curve

score_vector = np.concatenate((scores[0], scores[1],
                              scores[2], scores[3]))
label_vector = np.concatenate((np.ones(len(scores[0]) + len(scores[1])), 
                           np.zeros(len(scores[2]) + len(scores[3]))))
fpr, tpr, thres = roc_curve(label_vector, score_vector, pos_label=1)
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]
print("[TI] eer: {:.2f}".format(eer))

[TI] eer: 0.14
