In [22]:
%load_ext autoreload
%autoreload 2
%pylab inline
import pandas as pd
import os
import sys
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Populating the interactive namespace from numpy and matplotlib


In [23]:
from dnn.si_train import set_seed
from dnn.train.model import init_speechnet
from dnn.data.dataloader import init_sv_loaders

In [24]:
def secToSample(sec):
    return int(16000 * sec)

In [25]:
from sklearn.metrics import roc_curve
def compute_eer(pos_scores, neg_scores):
    score_vector = np.concatenate([pos_scores, neg_scores])
    label_vector = np.concatenate([np.ones(len(pos_scores)), np.zeros(len(neg_scores))])
    fpr, tpr, thres = roc_curve(label_vector, score_vector, pos_label=1)
    eer = np.min([fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))], 
                 1-tpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]])
    thres = thres[np.nanargmin(np.abs(fpr - (1 - tpr)))]
    print("eer:{:.2f}, thres:{:.4f}".format(eer*100, thres))

In [26]:
from dnn.parser import get_sv_parser
options = get_sv_parser().parse_args(args=[])
options.n_dct_filters = 40
options.n_mels = 40
options.timeshift_ms = 100
options.data_folder = "/home/muncok/DL/dataset/SV_sets"
options.window_size= 0.025
options.window_stride= 0.010
options.cache_size = 32768

In [27]:
options.input_format = 'fbank'
options.input_clip = False
# options.input_length = secToSample(1)
options.splice_frames = secToSample(0.1)//160+1

In [28]:
import torch.nn.functional as F
from dnn.data.dataloader import init_embed_loaders
options.data_folder= "/home/muncok/DL/dataset/SV_sets/speech_commands/"

### SI_Model

In [29]:
options.input = "models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_fbank.pt"
options.model = "res15"
model = init_speechnet(options)
lda = None# None means not using lda

models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_fbank.pt is loaded


In [30]:
options.input = "models/commands/equal_num_102spk_dot1.pt"
options.model = "SimpleCNN"
model = init_speechnet(options)
lda = pickle.load(open("models/lda/equal_num_102spk_dot1_splice_lda.pkl", "rb"))
# lda = None

models/commands/equal_num_102spk_dot1.pt is loaded


In [31]:
options.input = "models/reddots/simplecnn/si_reddots_0.2s_random_2.pt"
model = init_speechnet(options)
# lda = pickle.load(open("models/lda/word_aligned_lda.pkl", "rb"))
lda = None

models/reddots/simplecnn/si_reddots_0.2s_random_2.pt is loaded


## Command Trial

In [32]:
import torch
from torch.autograd import Variable

def embeds(opt, val_dataloader, model, lda=None):
    val_iter = iter(val_dataloader)
    model.eval()
    splice_dim = opt.splice_frames
    embeddings = []
    labels = []
    if lda is not None:
        print("LDA is loaded")
    for batch in (val_iter):
        x, y = batch
        time_dim = x.size(2)
        split_points = range(0, time_dim-splice_dim+1, splice_dim)
        model_outputs = []
        for point in split_points:
            x_in = Variable(x.narrow(2, point, splice_dim))
            if opt.cuda:
                x_in = x_in.cuda()
            model_outputs.append(model.embed(x_in).cpu().data)
        model_output = torch.stack(model_outputs, dim=0)
        model_output = model_output.mean(0)
        if lda is not None:
            model_output = torch.from_numpy(lda.transform(model_output.numpy()).astype(np.float32))
        embeddings.append(model_output)
        labels.append(y.numpy())
    embeddings = torch.cat(embeddings)
    labels = np.hstack(labels)
    return embeddings, labels

In [33]:
enroll_df = pd.read_pickle('trials/commands/final/equal_num_102spk_enroll.pkl')
pos_test_df = pd.read_pickle('trials/commands/final/equal_num_102spk_pos_test.pkl')
neg_test_df = pd.read_pickle('trials/commands/final/equal_num_102spk_neg_test.pkl')

In [34]:
common_words = enroll_df.sent.unique().tolist()
common_words

['down',
 'eight',
 'five',
 'four',
 'go',
 'left',
 'nine',
 'no',
 'off',
 'on',
 'one',
 'right',
 'seven',
 'six',
 'stop',
 'three',
 'two',
 'up',
 'yes',
 'zero']

In [35]:
aux_words = list(set(pos_test_df.sent.unique().tolist()) - set(common_words))
aux_words

['marvin',
 'happy',
 'sheila',
 'cat',
 'wow',
 'bird',
 'tree',
 'bed',
 'house',
 'dog']

In [36]:
enroll_spks = enroll_df.spk.unique().tolist()

In [37]:
enroll_df['file'] = enroll_df.apply(lambda x: os.path.join(x.sent, x.file), axis=1)
pos_test_df['file'] = pos_test_df.apply(lambda x: os.path.join(x.sent, x.file), axis=1)
neg_test_df['file'] = neg_test_df.apply(lambda x: os.path.join(x.sent, x.file), axis=1)

### Embedding

In [39]:
val_dataloader = init_embed_loaders(options, enroll_df)
enroll_embeddings, _ = embeds(options, val_dataloader, model, lda)

In [40]:
val_dataloader = init_embed_loaders(options, pos_test_df)
pos_embedding, _ = embeds(options, val_dataloader, model, lda)

In [41]:
val_dataloader = init_embed_loaders(options, neg_test_df)
imposter_embeddings, _ = embeds(options, val_dataloader, model, lda)

### Concat

In [42]:
n_average = 1
n_enroll_uttrs = 40

In [43]:
spk_models = dict()
for spk in enroll_spks:
    enroll_idx = enroll_df[enroll_df.spk == spk].index
#     enroll_idx = enroll_df[(enroll_df.spk == spk)].iloc[[i for i in range(0, 40, 1)]].index
    spk_models[spk] = enroll_embeddings[enroll_df.index.get_indexer_for(enroll_idx),].mean(0, keepdim=True)

In [44]:
for n_words_in_uttr in range(1,2):
    if n_words_in_uttr > 1:
        pos_concat_df = pd.read_pickle('trials/commands/final/equal_num_102spk_concat{}_pos.pkl'.format(n_words_in_uttr))
        neg_concat_df = pd.read_pickle('trials/commands/final/equal_num_102spk_concat{}_neg.pkl'.format(n_words_in_uttr))
    else:
        pos_concat_df = pd.read_pickle('trials/commands/final/equal_num_102spk_pos_test.pkl')
        neg_concat_df = pd.read_pickle('trials/commands/final/equal_num_102spk_neg_test.pkl')

    pos_concat_dict = dict()

    for spk in enroll_spks:
        pos_concat_dict[spk] = pos_concat_df[pos_concat_df.spk == spk]

    pos_embs_per_uttr = dict()

    for spk in enroll_spks:
        pos_concat_spk_df = pos_concat_dict[spk]
  
        pos_embs_per_uttr[spk] = []

        for uniqID, _ in pos_concat_spk_df.iterrows():
            idxs = uniqID.split('_')
            pos_test_idx = pos_test_df[pos_test_df.index.isin(idxs)].index
            pos_embs_per_uttr[spk].append(pos_embedding[pos_test_df.index.get_indexer_for(pos_test_idx),])

#     Neg embedding preloading

    neg_embs_per_uttr = []

    for uniqID, _ in neg_concat_df.iterrows():
        idxs = uniqID.split('_')
        neg_test_idx = neg_test_df[neg_test_df.index.isin(idxs)].index
        neg_embs_per_uttr.append(imposter_embeddings[neg_test_df.index.get_indexer_for(neg_test_idx),])

    neg_embs_per_uttr_emb = torch.stack([torch.mean(x, dim=0) for x in neg_embs_per_uttr])

    neg_embs_per_uttr_scr = torch.stack(neg_embs_per_uttr)

    # average embedding --> scoring

    pos_scores = dict()
    neg_scores = dict()

    for spk in enroll_spks:

        pos_embs_per_uttr_emb = torch.stack([torch.mean(x, dim=0) for x in pos_embs_per_uttr[spk]])

        pos_scores[spk] = F.cosine_similarity(pos_embs_per_uttr_emb, spk_models[spk])

        neg_scores[spk] = F.cosine_similarity(neg_embs_per_uttr_emb, spk_models[spk])

    print("n_words_in_uttr: {}".format(n_words_in_uttr))
    uni_pos_scores = np.concatenate([v for v in pos_scores.values()])
    uni_neg_scores = np.concatenate([v for v in neg_scores.values()])
    compute_eer(uni_pos_scores, uni_neg_scores)

n_words_in_uttr: 1
eer:21.10, thres:0.9954
