### Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

### Configuration

In [162]:
from sv_system.utils.parser import set_train_config
import easydict

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector", 
                              data_folder="/dataset/SV_sets/voxceleb12/feats/fbank64_vad/",
                              input_frames=400, splice_frames=[200, 400], stride_frames=1, 
                              input_format='fbank', input_dim=65, random_clip=True,
                              n_epochs=200, lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False, batch_size=64,
                              gpu_no=[0], cuda=True, num_workers=4,
                              arch="tdnn_conv", loss="softmax",
                             ))
config = set_train_config(args)

### Dataset

In [163]:
dev_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_dev.csv")
# dev_df['label'] = dev_df.groupby("spk").ngroup()
dev_train_df = dev_df[dev_df.set == 'train']
dev_val_df = dev_df[dev_df.set == 'val']
eval_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_eval.csv") 

In [164]:
from sv_system.data.feat_dataset import FeatDataset

dev_train_dataset = FeatDataset.read_df(config, dev_train_df, 'train')
dev_val_dataset = FeatDataset.read_df(config, dev_val_df, 'test')
eval_dataset = FeatDataset.read_df(config, eval_df, 'test')

### Dataloader

In [157]:
from sv_system.data.dataloader import init_default_loader 
dev_train_dataloader = init_default_loader(config, dev_train_dataset, shuffle=True, var_len=False) 
dev_val_dataloader = init_default_loader(config, dev_val_dataset, shuffle=False, var_len=True) 
eval_dataloader = init_default_loader(config, eval_dataset, shuffle=False, var_len=True) 

### Model

In [220]:
from tdnn_models import tdnn_xvector_res_v0
import torch

model = tdnn_xvector_res_v0(config, 512, n_labels=len(dev_train_df.label.unique()))
saved_model = torch.load("trained_models/voxc1_dev_tdnn_res_v0.pt")
model.load_state_dict(saved_model)

if not config['no_cuda']:
    model = model.cuda()

In [None]:
#### import torch.nn as nn
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
from sklearn.metrics import roc_curve
import torch.nn.functional as F
from tensorboardX import SummaryWriter

optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001)
from fine_tune_utils import class_weight
# criterion = nn.CrossEntropyLoss(weight=class_weight(config, dev_train_df))
criterion = nn.CrossEntropyLoss()
plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)
# step_scheduler = MultiStepLR(optimizer, [30], 0.1)

writer = SummaryWriter("logs/tdnn_res_v0")
model_path = "voxc1_dev_tdnn_res_v0.pt"
start_epoch = 0

for epoch_idx in range(start_epoch, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))
    
# =============== train code #===============
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(dev_train_dataloader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()
        logit = model(X)
        loss = criterion(logit, y)
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
#         if (batch_idx+1) % 1000 == 0:
#             print("Batch {}/{}\t Loss {:.6f}" \
#                   .format(batch_idx+1, len(si_loader), loss_sum / total))
    train_loss = loss_sum / total
    train_acc = n_corrects / total
    plateau_scheduler.step(train_loss)
    
    print("epoch #{}, train loss: {:.4f}, train acc: {:.4f}".format(epoch_idx, train_loss, train_acc))
    writer.add_scalar("train/loss", train_loss, epoch_idx+1)
    writer.add_scalar("train/acc", train_acc, epoch_idx+1)

#=============== dev_val code #===============
    model.eval()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(dev_val_dataloader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        logit = model(X)
        loss = criterion(logit, y)
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
    val_loss = loss_sum / total
    val_acc = n_corrects / total
    
    print("epoch #{}, val loss: {:.4f}, val acc: {:.4f}".format(epoch_idx, val_loss, val_acc))
    writer.add_scalar("val/loss", val_loss, epoch_idx+1)
    writer.add_scalar("val/acc", val_acc, epoch_idx+1)
    torch.save(model.state_dict(), model_path)

In [221]:
# See, Fr features
fr_feats = []
model.eval()
total = 0
for batch_idx, (X, y) in enumerate(dev_val_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()

    fr_feat = model.fr_feat(X).cpu().detach()
    fr_feats.append(fr_feat)
    break
    

In [222]:
from torch.nn.functional import cosine_similarity as cosine
cosine(fr_feats[0][0,:,0:1], fr_feats[0][0,:,1:], dim=0)

tensor([ 0.9731,  0.9194,  0.8460,  0.7652,  0.6475,  0.5775,  0.5488,  0.5367,
         0.5829,  0.5654,  0.4195,  0.2916,  0.2122,  0.1197,  0.0676,  0.0322,
        -0.0061, -0.0410, -0.0531, -0.0531, -0.0590, -0.0614, -0.0328, -0.0150,
        -0.0346, -0.0619, -0.0721, -0.0648, -0.0658, -0.0635, -0.0456, -0.0339,
        -0.0354, -0.0256, -0.0266, -0.0331, -0.0207,  0.0241,  0.0486,  0.0436,
         0.0121, -0.0017, -0.0279, -0.0312, -0.0458, -0.0290, -0.0084,  0.0147,
         0.0084,  0.0343,  0.0646,  0.0471, -0.0223, -0.0798,  0.0046,  0.2005,
         0.3268,  0.3368,  0.2863,  0.2927,  0.3569,  0.3999,  0.4128,  0.3561,
         0.1756, -0.0672, -0.1856, -0.2118, -0.2193, -0.2026, -0.1163,  0.0897,
         0.2522,  0.3210,  0.3290,  0.2939,  0.2097,  0.0964,  0.0285,  0.0154,
         0.0167,  0.0372,  0.0647,  0.0906,  0.1198,  0.1151,  0.0794,  0.0454,
         0.0297,  0.0120, -0.0102, -0.0400, -0.0716, -0.1135, -0.1244, -0.1197,
        -0.1167, -0.0907,  0.0283,  0.23

In [223]:
fr_feats[0].shape

torch.Size([64, 1500, 396])

### Untrained Classes

In [123]:
model.eval()
loss_sum = 0
n_corrects = 0
total = 0
predicts = []
labels = []
for batch_idx, (seq_len, X, y) in enumerate(dev_val_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()

    batch_logits = []
    for i in range(len(X)):
        x_in = X[i:i+1,:,:seq_len[i]]
        out_ = model(x_in)
        batch_logits.append(out_)
    logit = torch.cat(batch_logits, dim=0)
    loss = criterion(logit, y)
    loss_sum += loss.item()
    predicts.append(logit.max(1)[1])
    labels.append(y)
    n_corrects += logit.max(1)[1].eq(y).sum().item()
    total += y.size(0)
val_loss = loss_sum / total
val_acc = n_corrects / total

print("epoch #{}, val loss: {:.4f}, val acc: {:.4f}".format(epoch_idx, val_loss, val_acc))
predicts = torch.cat(predicts).cpu()
labels = torch.cat(labels).cpu()

epoch #12, val loss: 0.0084, val acc: 0.7485


In [129]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

conf_mat = confusion_matrix(labels, predicts)
is_correct = predicts.eq(labels).numpy()
predicts = predicts.numpy()
labels = labels.numpy()

pred_result = pd.DataFrame([predicts, labels, is_correct]).T
pred_result.columns = ['pred', 'label', 'is_correct']
pred_class_acc = pred_result.groupby('label').is_correct.mean()

sns.distplot(pred_class_acc, norm_hist=True)

In [114]:
# the labels where acc is 0
dev_train_df[dev_train_df.label.isin(pred_class_acc.sort_values()[:6].index)].label.value_counts()

777    67
202    44
451    41
854    37
791    32
128    30
Name: label, dtype: int64

In [97]:
untrained_labels = pred_class_acc.sort_values()[:200].index

In [101]:
untrained_dev_train_df = dev_train_df[dev_train_df.label.isin(untrained_labels)]
untrained_dev_train_dataset = FeatDataset.read_df(config, untrained_dev_train_df, 'train')
untrained_dev_train_dataloader = init_default_loader(config, untrained_dev_train_dataset, shuffle=True)

### LDA on embedding

In [20]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

global_mean = si_embeds.mean(0)
clf = LDA(solver='svd', n_components=200)
clf.fit(si_embeds - global_mean, si_key_df.label)

si_embeds = clf.transform(si_embeds - global_mean).astype(np.float32)

sv_embeds = clf.transform(sv_embeds - global_mean).astype(np.float32)

si_dataset, embed_dim, n_labels = embedToDataset(si_embeds.reshape(-1,200), si_key_df)
sv_dataset, _, _ = embedToDataset(sv_embeds, sv_key_df)