### Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

### Configuration

In [3]:
from sv_system.utils.parser import set_train_config
import easydict

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector", 
                              data_folder="/dataset/SV_sets/voxceleb12/feats/fbank64_vad/",
                              input_frames=800, splice_frames=[200, 800], stride_frames=1, 
                              input_format='fbank', input_dim=65, random_clip=True,
                              n_epochs=200, lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False, batch_size=128,
                              gpu_no=[0], cuda=True, num_workers=4,
                              arch="tdnn_conv", loss="softmax",
                             ))
config = set_train_config(args)

### Dataset

In [4]:
# voxceleb2
# dev_df = pd.read_csv("/dataset/SV_sets/voxceleb2/dataframes/voxc2_dev.csv")
# dev_train_df = dev_df[dev_df.set == 'train']
# dev_val_df = dev_df[dev_df.set == 'val']
# eval_df = pd.read_csv("/dataset/SV_sets/voxceleb2/dataframes/voxc2_eval.csv")

In [5]:
# voxceleb1
voxc1_si = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_si.csv")
dev_train_df = voxc1_si[voxc1_si.set == 'train']
dev_train_df = dev_train_df.sample(frac=1.0)
dev_val_df = voxc1_si[voxc1_si.set == 'val']
eval_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_sv.csv")

In [6]:
from sv_system.data.feat_dataset import FeatDataset

dev_train_dataset = FeatDataset.read_df(config, dev_train_df, 'train')
dev_val_dataset = FeatDataset.read_df(config, dev_val_df, 'test')
eval_dataset = FeatDataset.read_df(config, eval_df, 'test')

### Dataloader

In [7]:
from sv_system.data.dataloader import init_default_loader 
dev_train_dataloader = init_default_loader(config, dev_train_dataset, shuffle=True, var_len=False) 
dev_val_dataloader = init_default_loader(config, dev_val_dataset, shuffle=False, var_len=False) 
eval_dataloader = init_default_loader(config, eval_dataset, shuffle=False, var_len=True) 

### Model

In [8]:
from tdnn_models import tdnn_xvector
import torch

model = tdnn_xvector(config, 512, n_labels=len(dev_train_df.label.unique()))
saved_model = torch.load("trained_models/voxc1_tdnn_gauss.pt")
model.load_state_dict(saved_model)

if not config['no_cuda']:
    model = model.cuda()

In [None]:
import torch.nn as nn
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
from sklearn.metrics import roc_curve
import torch.nn.functional as F
from tensorboardX import SummaryWriter
from tqdm import tqdm
from fine_tune_utils import class_weight

optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)

# criterion = nn.CrossEntropyLoss(weight=class_weight(config, dev_train_df))
criterion = nn.CrossEntropyLoss()
mse_criterion = nn.MSELoss()

# plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=5)
step_scheduler = MultiStepLR(optimizer, [15, 20, 25, 30], 0.2)

writer = SummaryWriter("logs/voxc1_tdnn_gauss")
model_path = "trained_models/voxc1_tdnn_gauss.pt"

for epoch_idx in range(0, config['n_epochs']):
    step_scheduler.step()
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))
    
# =============== train code #===============
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in tqdm(enumerate(dev_train_dataloader), ascii=None, total=len(dev_train_dataloader)):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()
        embed, logit = model.embed_logit(X)
#         import ipdb
#         ipdb.set_trace()
        if epoch_idx < 3:
            loss = criterion(logit, y)
        else:
            ce_loss = criterion(logit, y)
            clients = model.class_clients(y)
            gauss_loss = torch.norm(clients-embed, p=2)
            loss = ce_loss + 0.05*gauss_loss
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
        if (batch_idx+1) % 100 == 0:
            print("Batch {}/{}\t Loss {:.6f}" \
                  .format(batch_idx+1, len(dev_train_dataloader), loss_sum / total))
    train_loss = loss_sum / total
    train_acc = n_corrects / total
#     plateau_scheduler.step(train_loss)
    
    print("epoch #{}, train loss: {:.4f}, train acc: {:.4f}".format(epoch_idx, train_loss, train_acc))
    writer.add_scalar("train/loss", train_loss, epoch_idx+1)
    writer.add_scalar("train/acc", train_acc, epoch_idx+1)

#=============== dev_val code #===============
    model.eval()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(dev_val_dataloader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        logit = model(X)
        loss = criterion(logit, y)
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
    val_loss = loss_sum / total
    val_acc = n_corrects / total
    
    print("epoch #{}, val loss: {:.4f}, val acc: {:.4f}".format(epoch_idx, val_loss, val_acc))
    writer.add_scalar("val/loss", val_loss, epoch_idx+1)
    writer.add_scalar("val/acc", val_acc, epoch_idx+1)
    
#=============== model save #===============
    torch.save(model.state_dict(), model_path)

### Evaludation (SV)

In [77]:
model.eval()
loss_sum = 0
n_corrects = 0
total = 0
predicts = []
labels = []
for batch_idx, (seq_len, X, y) in enumerate(dev_full_val_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()

    batch_logits = []
    for i in range(len(X)):
        x_in = X[i:i+1,:,:seq_len[i]]
        out_ = model(x_in)
        batch_logits.append(out_)
    logit = torch.cat(batch_logits, dim=0)
    loss = criterion(logit, y)
    loss_sum += loss.item()
    predicts.append(logit.max(1)[1])
    labels.append(y)
    n_corrects += logit.max(1)[1].eq(y).sum().item()
    total += y.size(0)
val_loss = loss_sum / total
val_acc = n_corrects / total

print("val loss: {:.4f}, val acc: {:.4f}".format(val_loss, val_acc))
predicts = torch.cat(predicts).cpu()
labels = torch.cat(labels).cpu()

val loss: 0.0169, val acc: 0.7974


In [11]:
dev_dataset = FeatDataset.read_df(config, voxc1_si, 'test')
dev_full_dataloader = init_default_loader(config, dev_dataset, shuffle=False, var_len=False) 

In [21]:
model.eval()
batch_embeds = []
labels = []
for batch_idx, (X, y) in enumerate(dev_full_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        embed, _ = model.embed_logit(X)
        batch_embeds.append(embed.cpu().detach())
si_embeds = torch.cat(batch_embeds, dim=0)

In [22]:
si_embeds.shape

torch.Size([148642, 512])

In [None]:
model.eval()
batch_embeds = []
labels = []
for batch_idx, (seq_len, X, y) in enumerate(eval_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()
    for i in range(len(X)):
        x_in = X[i:i+1,:,:seq_len[i]]
        embed, _ = model.embed_logit(x_in)
        batch_embeds.append(embed.cpu().detach())
    labels.append(y.cpu().detach())
sv_embeds = torch.cat(batch_embeds, dim=0)

In [80]:
torch.cat(labels)

tensor([30, 30, 30,  ..., 19, 19, 19])

In [61]:
voxc1_trial = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_sv_trial.csv")
eval_ids = eval_df.id.tolist()
voxc1_trial['enroll_idx'] = voxc1_trial.enroll_id.apply(lambda x: eval_ids.index(x))
voxc1_trial['test_idx'] = voxc1_trial.test_id.apply(lambda x: eval_ids.index(x))

In [98]:
from batch_sv_system_utils import compute_eer
from torch.nn.functional import cosine_similarity as cosine
scores = cosine(embeds[voxc1_trial.enroll_idx], embeds[voxc1_trial.test_idx], dim=1)
compute_eer(scores, voxc1_trial.label)

(0.1411983032873807, 0.14125132555673384, 0.1411452810180276, 0.29218227)

In [66]:
embeds[voxc1_trial.test_idx]

tensor([[0.3223, 0.2266, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0108],
        [0.2230, 0.0000, 0.0000,  ..., 0.2799, 0.0000, 0.0000],
        ...,
        [0.0193, 0.1974, 0.0000,  ..., 0.1602, 0.0515, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0791, 0.0000, 0.2717,  ..., 0.0744, 0.0000, 0.1562]])

### LDA on embedding

In [20]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

global_mean = si_embeds.mean(0)
clf = LDA(solver='svd', n_components=200)
clf.fit(si_embeds - global_mean, si_key_df.label)

# si_lda_embeds = clf.transform(si_embeds - global_mean).astype(np.float32)
sv_lda_embeds = clf.transform(sv_embeds - global_mean).astype(np.float32)