In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline
import pandas as pd
import os

Populating the interactive namespace from numpy and matplotlib


In [2]:
from dnn.data.dataloader import init_embed_loaders
from dnn.parser import get_sv_parser
from dnn.train.model import init_seed
from dnn.train.model import init_speechnet
from tqdm import tqdm_notebook as tqdm
import torch.nn.functional as F

In [31]:
from dnn.data.dataset import embedDataset
from dnn.data.dataloader import _collate_fn
def init_reddots_loaders(opt, dataframe):
    
    '''
    Initialize the datasets, samplers and dataloaders for embeding
    '''
    val_dataset = embedDataset.read_reddots_token_df(opt, dataframe)
    val_dataloader = torch.utils.data.DataLoader(val_dataset,
            batch_size=64,
            num_workers=8,
            collate_fn=_collate_fn)
    return val_dataloader

In [5]:
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))

In [6]:
def secToSample(sec):
    return int(16000 * sec)

In [7]:
options = get_sv_parser().parse_args(args=[])
options.n_dct_filters = 40
options.n_mels = 40
options.timeshift_ms = 100
options.data_folder = "/home/muncok/DL/dataset/SV_sets"
options.window_size= 0.025
options.window_stride= 0.010
options.cache_size = 32768

In [19]:
options.input_format = "mfcc"
options.input_clip = True
options.input_length = secToSample(3) # if input_clip is false, it doesn't affect anything
options.splice_frames = secToSample(0.1)//160+1

In [9]:
options.data_folder = "/home/muncok/DL/dataset/SV_sets/reddots_r2015q4_v1/wav/"

###  SI_Model

In [15]:
options.input = "models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_fbank.pt"
options.model = "res15"
model = init_speechnet(options)
lda = None# None means not using lda

models/commands/word_aligned.pt is loaded


In [10]:
options.input = "models/reddots/simplecnn/si_reddots_0.2s_random_2.pt"
options.model = "SimpleCNN"
model =  init_speechnet(options)
lda = None

models/reddots/simplecnn/si_reddots_0.2s_random_2.pt is loaded


###  Reddots Trial

In [24]:
import torch
from torch.autograd import Variable
from tqdm import tqdm_notebook
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

def lda_on_tensor(tensor, lda):
    return torch.from_numpy(lda.transform(tensor.numpy()).astype(np.float32))

def embeds(opt, val_dataloader, model, lda=None):
    val_iter = iter(val_dataloader)
    model.eval()
    splice_dim = opt.splice_frames
    embeddings = []
    labels = []
    for batch in (val_iter):
        x, y = batch
        time_dim = x.size(2)
        split_points = range(0, time_dim-splice_dim+1, splice_dim)
        model_outputs = []
        for point in split_points:
            x_in = Variable(x.narrow(2, point, splice_dim))
            if opt.cuda:
                x_in = x_in.cuda()
            model_outputs.append(model.embed(x_in).cpu().data)
        model_output = torch.stack(model_outputs, dim=0)
        model_output = model_output.mean(0)
        if lda is not None:
            model_output = torch.from_numpy(lda.transform(model_output.numpy()).astype(np.float32))
        embeddings.append(model_output)
        labels.append(y.numpy())
    embeddings = torch.cat(embeddings)
    labels = np.hstack(labels)
    return embeddings, labels

In [15]:
m_part1_ndx = pd.read_pickle("trials/reddots/m_part1/m_part1_ndx.pkl")
m_part1_trn = pd.read_pickle('trials/reddots/m_part1/m_part1_trn.pkl')
# m_part4_ndx = pd.read_pickle("manifests/reddots/trial/m_part4/m_part4_ndx.pkl")
# m_part4_trn = pd.read_pickle("manifests/reddots/trial/m_part4/m_part4_trn.pkl")

In [16]:
trn = m_part1_trn
ndx = m_part1_ndx

In [17]:
err_type = {0:'TC', 1:'TW', 2:'IC', 3:'IW'}

###  Enrollment (trn)

In [26]:
val_dataloader = init_embed_loaders(options, trn)
options.chunks = 1
trn_embeddings, _ = embeds(options, val_dataloader, model, lda)

embed_dim = trn_embeddings.shape[-1]
trn_id = list(trn.id.unique())
spk_model_dict = {}
for id in trn_id:
    index = np.nonzero(trn.id == id)
    spk_model_dict[id] = trn_embeddings[index].mean(0, True)

###  SV Scoring (ndx)

In [27]:
options.data_folder = "/home/muncok/DL/dataset/SV_sets/reddots_r2015q4_v1/token/"

In [28]:
m_part1_files = pd.read_pickle('trials/reddots/m_part1/m_part1_files.pkl')

In [29]:
m_part1_tokens = pd.read_pickle("trials/reddots/m_part1/m_part1_token_files.pkl")

In [32]:
val_dataloader = init_reddots_loaders(options, m_part1_tokens)
options.chunks = 1
ndx_embeddings, _ = embeds(options, val_dataloader, model, lda)
# torch.save(ndx_embeddings, 'trials/reddots/m_part1/{}_embeds.pkl'.format(model_name))
# ndx_embeddings = torch.load('trials/reddots/m_part1/{}_embeds.pkl')

In [34]:
ndx['utterance'] = ndx.file.apply(lambda x: x[6:-4])

In [35]:
lost_uttrs = set(ndx.utterance.unique().tolist()) - set(m_part1_tokens.utterance.unique().tolist())
lost_ndx = ndx[ndx.utterance.isin(lost_uttrs)]
ndx = ndx.drop(index=lost_ndx.index)

In [38]:
spk_model_dict[trial_id].shape

torch.Size([1, 320])

In [39]:
all_trials = ndx.id.unique().tolist()
import torch.nn.functional as F
scores = dict()
for t in range(4):
    scores[t] = []

for trial_id in tqdm(all_trials):
    trial_ndx = ndx[(ndx.id == trial_id)].reset_index()
    trial_embed_idx = np.nonzero(m_part1_tokens.utterance.isin(trial_ndx.utterance))
    trial_embeds = ndx_embeddings[trial_embed_idx]
    sim = F.cosine_similarity(trial_embeds, spk_model_dict[trial_id], dim=1).mean(-1).numpy()
    split_count = m_part1_tokens[m_part1_tokens.utterance.isin(trial_ndx.utterance)].utterance.value_counts(sort=False).tolist()
    sp_indx = [int(sum(split_count[:i])) for i in range(1,len(split_count))]
    sim_avg = np.array(list(map(mean, np.split(sim, sp_indx))))
    for t in range(4):
        trial_type_idx = trial_ndx[trial_ndx.trial_type == t].index.tolist()
        scores[t].append(sim_avg[trial_type_idx])
        

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)





In [42]:
# [TC, TW, IC, IW]
for t in range(4):
    scores[t] = np.concatenate(scores[t])

ValueError: zero-dimensional arrays cannot be concatenated

In [43]:
for t in range(4):
     print("{} mean:{:.4f}, std:{:.3f}".format(err_type[t], scores[t].mean(), scores[t].std()))

TC mean:nan, std:nan
TW mean:nan, std:nan
IC mean:nan, std:nan
IW mean:nan, std:nan


TD EERs

In [182]:
from sklearn.metrics import roc_curve


for t in range(1,4):
    score_vector = np.concatenate((scores[0], scores[t]))
    label_vector = np.concatenate((np.ones(len(scores[0])), 
                               np.zeros(len(scores[t]))))
    fpr, tpr, thres = roc_curve(label_vector, score_vector, pos_label=1)
    eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]
    thres = thres[np.nanargmin(np.abs(fpr - (1 - tpr)))]
    print("[{}] eer: {:.2f}, thres: {:.5f}".format(err_type[t], eer, thres))

[TW] eer: 0.50, thres: 0.69256
[IC] eer: 0.36, thres: 0.62052
[IW] eer: 0.35, thres: 0.62023


TI EERs

In [183]:
from sklearn.metrics import roc_curve

score_vector = np.concatenate((scores[0], scores[1],
                              scores[2], scores[3]))
label_vector = np.concatenate((np.ones(len(scores[0]) + len(scores[1])), 
                           np.zeros(len(scores[2]) + len(scores[3]))))
fpr, tpr, thres = roc_curve(label_vector, score_vector, pos_label=1)
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]
print("[TI] eer: {:.2f}".format(eer))

[TI] eer: 0.35
