# DDA (Deep Discriminant Analysis)

기존의 DDA 학습은 classification base였다면 이번에는 Metric learning을 이용하여 DDA를 수행해보자

### Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

### Configuration

In [3]:
from sv_system.utils.parser import set_train_config
import easydict

# datasets
# voxc1_fbank_xvector
# gcommand_fbank_xvector

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector",
                              input_frames=100, splice_frames=[50, 100], stride_frames=1, input_format='fbank',
                              cuda=True,
                              lrs=[0.01, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="tdnn_conv", loss="softmax",
                              n_epochs=50
                             ))
config = set_train_config(args)

### Dataset

In [4]:
import torch.utils.data as data

class embedDataset(data.Dataset):
    def __init__(self, embeds, labels):
        super().__init__()
        self.embeds = embeds
        self.labels = labels
        
    def __getitem__(self, index):
        
        return self.embeds[index], self.labels[index]
    
    def __len__(self):
        
        return self.embeds.shape[0]
        

def embedToDataset(embed_dict):
    embeds = np.array([v for v in embed_dict.values()])
    spks = [k.split("/")[0] for k in embed_dict.keys()]
    spk2label = pd.Series(spks).unique().tolist()
    labels = np.array([spk2label.index(spk) for spk in spks])
    
    dataset = embedDataset(embeds, labels)
    
    return dataset, embeds.shape[1], len(spk2label)
    

In [5]:
trial = pd.read_pickle("../dataset/dataframes/voxc1/voxc_trial.pkl")

In [38]:
si_embeds = pickle.load(open("../best_models/voxc1/ResNet34_v4_softmax/voxc_train_dvectors.pkl", "rb"))
sv_embeds = pickle.load(open("../best_models/voxc1/ResNet34_v4_softmax/voxc_test_dvectors.pkl", "rb"))

In [7]:
si_dataset, embed_dim, n_labels = embedToDataset(si_embeds)
sv_dataset, _, _ = embedToDataset(sv_embeds)

### Batch Sampler

In [8]:
import math
import random
import itertools

def index_dataset(dataset):
    return {c : [example_idx for example_idx, (_, class_label_ind) in \
                 enumerate(zip(dataset.embeds, dataset.labels)) if class_label_ind == c] for c in set(dataset.labels)}

def sample_from_class(images_by_class, class_label_ind):
    return images_by_class[class_label_ind][random.randrange(len(images_by_class[class_label_ind]))]

def simple(batch_size, dataset, prob_other = 0.5):
    '''lazy sampling, not like in lifted_struct. they add to the pool all postiive combinations, then compute the average number of positive pairs per image, then sample for every image the same number of negative pairs'''
    images_by_class = index_dataset(dataset)
    for batch_idx in range(int(math.ceil(len(dataset) * 1.0 / batch_size))):
        example_indices = []
        for i in range(0, batch_size, 2):
            perm = random.sample(images_by_class.keys(), 2)
            example_indices += [sample_from_class(images_by_class, perm[0]), sample_from_class(images_by_class, perm[0 if i == 0 or random.random() > prob_other else 1])]
        yield example_indices[:batch_size]

def triplet(batch_size, dataset, class2img=None):
    if class2img is not None:
        images_by_class = class2img
    else:
        images_by_class = index_dataset(dataset)
    for batch_idx in range(int(math.ceil(len(dataset) * 1.0 / batch_size))):
        example_indices = []
        for i in range(0, batch_size, 3):
            perm = random.sample(images_by_class.keys(), 2)
            example_indices += [sample_from_class(images_by_class, perm[0]), sample_from_class(images_by_class, perm[0]), sample_from_class(images_by_class, perm[1])]
        yield example_indices[:batch_size]

def npairs(batch_size, dataset, K = 4):
    images_by_class = index_dataset(dataset)
    for batch_idx in range(int(math.ceil(len(dataset) * 1.0 / batch_size))):
        example_indices = [sample_from_class(images_by_class, class_label_ind) for k in range(int(math.ceil(batch_size * 1.0 / K))) for class_label_ind in [random.choice(images_by_class.keys())] for i in range(K)]
        yield example_indices[:batch_size]

In [9]:
adapt_sampler = lambda batch, dataset, sampler, **kwargs: \
type('', (torch.utils.data.sampler.Sampler,), 
     dict(__len__ = dataset.__len__, __iter__ = \
          lambda _: itertools.chain.from_iterable(sampler(batch, dataset, **kwargs))))(dataset)

In [10]:
train_class2idx = index_dataset(si_dataset)

### Dataloader

In [20]:
import torch
from torch.utils.data.dataloader import DataLoader

# si_loader = DataLoader(si_dataset, batch_size=128, num_workers=4, shuffle=True)
n_pairs_per_batch = 42
si_loader = torch.utils.data.DataLoader(si_dataset, 
                                       sampler = adapt_sampler(n_pairs_per_batch*3, si_dataset, triplet, class2img=train_class2idx), 
                                       num_workers = 4, batch_size = n_pairs_per_batch*3, 
                                       drop_last = True, pin_memory = True)
sv_loader = DataLoader(sv_dataset, batch_size=128, num_workers=4, shuffle=False)

### Model Define

In [21]:
import torch.nn as nn

class dda_model(nn.Module):
    def __init__(self, in_dims, n_labels):
        super().__init__()
        
        self.input_layer = nn.Sequential(
            nn.Linear(in_dims, 2*in_dims),
            nn.PReLU()
        )
        
        self.hidden_layer = nn.Sequential(
            nn.Linear(2*in_dims, 2*in_dims),
            nn.PReLU()
        )    
        self.hidden_batch = nn.BatchNorm1d(2*in_dims)
    
        self.embedding_layer = nn.Linear(2*in_dims, n_labels)
        
    def embed(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_batch(x)
    
        return x
    
    def forward(self, x):           
        x = self.embed(x)
        
        return x

In [22]:
model = dda_model(embed_dim, n_labels) 

In [23]:
model

dda_model(
  (input_layer): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_layer): Sequential(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_batch): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (embedding_layer): Linear(in_features=256, out_features=1211, bias=True)
)

In [24]:
if not config['no_cuda']:
    model = model.cuda()

### Model Train

In [25]:
from sv_system.train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR

_, optimizer = find_optimizer(config, model)
criterion = nn.TripletMarginLoss(margin=1.0, p=2)
plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)
step_scheduler = MultiStepLR(optimizer, [30], 0.1)

In [29]:
import torch

def triplet_train(model, loader):
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(loader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()

        embeds = model(X)
        embeds = embeds / embeds.norm(dim=1,keepdim=True)
        anchor = embeds[0:63*2:3]
        pos_egs = embeds[1:63*2:3]
        neg_egs = embeds[2:63*2:3]
        loss = criterion(anchor, pos_egs, neg_egs)
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        total += y.size(0)
        if (batch_idx+1) % 100 == 0:
            print("Batch {}/{}\t Loss {:.6f}" \
                  .format(batch_idx+1, len(loader), loss_sum / total))
    return loss_sum 

In [30]:
import torch.nn.functional as F

def embeds_utterance(config, val_dataloader, model):
    val_iter = iter(val_dataloader)
    embeddings = []
    labels = []
    model.eval()

    with torch.no_grad():
        for batch in val_iter:
            X, y = batch
            if not config['no_cuda']:
                X = X.cuda()
                
            model_output = model.embed(X).cpu().data
            embeddings.append(model_output)
            labels.append(y.numpy())
        embeddings = torch.cat(embeddings)
        labels = np.hstack(labels)
    return embeddings, labels 

def sv_test(config, sv_loader, model, trial):
        embeddings, _ = embeds_utterance(config, sv_loader, model)
        sim_matrix = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
        cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
        score_vector = sim_matrix[cord].numpy()
        label_vector = np.array(trial.label)
        fpr, tpr, thres = roc_curve(
                label_vector, score_vector, pos_label=1)
        eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

        return eer, label_vector, score_vector
    
def sv_euc_test(config, sv_loader, model, trial):
        embeddings, _ = embeds_utterance(config, sv_loader, model)
#         embeddings /= embeddings.norm(dim=1,keepdim=True)
        a = embeddings.unsqueeze(1)
        b = embeddings.unsqueeze(0)
        dist = a - b
        sim_matrix = -dist.norm(dim=2)
        cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
        score_vector = sim_matrix[cord].numpy()
        label_vector = np.array(trial.label)
        fpr, tpr, thres = roc_curve(
                label_vector, score_vector, pos_label=1)
        eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

        return eer, label_vector, score_vector

In [31]:
from sv_system.train.si_train import val
from sklearn.metrics import roc_curve

for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))

#     step_scheduler.step()    
    
#     train code
    train_loss = triplet_train(model, si_loader)
    print("epoch #{}, train loss: {}".format(epoch_idx, train_loss))

#     evaluate best_metric
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))
    
    plateau_scheduler.step(train_loss)

------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.002708
Batch 200/1174	 Loss 0.002422
Batch 300/1174	 Loss 0.002304
Batch 400/1174	 Loss 0.002221
Batch 500/1174	 Loss 0.002174
Batch 600/1174	 Loss 0.002142
Batch 700/1174	 Loss 0.002104
Batch 800/1174	 Loss 0.002082
Batch 900/1174	 Loss 0.002075
Batch 1000/1174	 Loss 0.002055
Batch 1100/1174	 Loss 0.002040
epoch #0, train loss: 300.3295560851693
epoch #0, sv eer: 0.10057501863486316
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001867
Batch 200/1174	 Loss 0.001866
Batch 300/1174	 Loss 0.001846
Batch 400/1174	 Loss 0.001844
Batch 500/1174	 Loss 0.001852
Batch 600/1174	 Loss 0.001855
Batch 700/1174	 Loss 0.001847
Batch 800/1174	 Loss 0.001846
Batch 900/1174	 Loss 0.001838
Batch 1000/1174	 Loss 0.001841
Batch 1100/1174	 Loss 0.001839
epoch #1, train loss: 271.75236892700195
epoch #1, sv eer: 0.09791289532531147
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001844
Batch 200/1174

Batch 100/1174	 Loss 0.001525
Batch 200/1174	 Loss 0.001518
Batch 300/1174	 Loss 0.001525
Batch 400/1174	 Loss 0.001529
Batch 500/1174	 Loss 0.001525
Batch 600/1174	 Loss 0.001521
Batch 700/1174	 Loss 0.001522
Batch 800/1174	 Loss 0.001520
Batch 900/1174	 Loss 0.001513
Batch 1000/1174	 Loss 0.001511
Batch 1100/1174	 Loss 0.001511
epoch #18, train loss: 224.02292791381478
epoch #18, sv eer: 0.10765626663827069
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001542
Batch 200/1174	 Loss 0.001549
Batch 300/1174	 Loss 0.001545
Batch 400/1174	 Loss 0.001536
Batch 500/1174	 Loss 0.001533
Batch 600/1174	 Loss 0.001523
Batch 700/1174	 Loss 0.001520
Batch 800/1174	 Loss 0.001519
Batch 900/1174	 Loss 0.001521
Batch 1000/1174	 Loss 0.001520
Batch 1100/1174	 Loss 0.001517
epoch #19, train loss: 223.69455706328154
epoch #19, sv eer: 0.1067511447130231
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001538
Batch 200/1174	 Loss 0.001509
Batch 300/1174	 Loss 0.00

Batch 100/1174	 Loss 0.001442
Batch 200/1174	 Loss 0.001422
Batch 300/1174	 Loss 0.001421
Batch 400/1174	 Loss 0.001418
Batch 500/1174	 Loss 0.001413
Batch 600/1174	 Loss 0.001410
Batch 700/1174	 Loss 0.001408
Batch 800/1174	 Loss 0.001416
Batch 900/1174	 Loss 0.001415
Batch 1000/1174	 Loss 0.001414
Batch 1100/1174	 Loss 0.001419
epoch #36, train loss: 210.4752187281847
epoch #36, sv eer: 0.10951975295495688
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001328
Batch 200/1174	 Loss 0.001354
Batch 300/1174	 Loss 0.001368
Batch 400/1174	 Loss 0.001382
Batch 500/1174	 Loss 0.001390
Batch 600/1174	 Loss 0.001393
Batch 700/1174	 Loss 0.001406
Batch 800/1174	 Loss 0.001410
Batch 900/1174	 Loss 0.001414
Batch 1000/1174	 Loss 0.001413
Batch 1100/1174	 Loss 0.001412
epoch #37, train loss: 209.3656529188156
epoch #37, sv eer: 0.10872111596209136
------------------------------
curr_lr: 0.01
Batch 100/1174	 Loss 0.001440
Batch 200/1174	 Loss 0.001416
Batch 300/1174	 Loss 0.001

In [None]:
torch.save(dda_net.state_dict(), open("temp_dda_net.pt", "wb"))

### Extracting new embeddings

In [None]:
dda_net.load_state_dict(torch.load("temp_dda_net.pt"))

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=64, num_workers=1, shuffle=False)

In [None]:
new_embeds = []
for (X, y) in test_dataloader:
        if is_cuda:
            X = X.cuda()
        new_embed = dda_net.embed(X)
        new_embeds += [new_embed]

In [None]:
new_embed_tensor = torch.cat(new_embeds, dim=0)

In [None]:
import torch.nn.functional as F
sim_matrix = F.cosine_similarity(                                                                                                                                                                                                                                    
     new_embed_tensor.cpu().unsqueeze(1), new_embed_tensor.cpu().unsqueeze(0), dim=2)       

In [None]:
from sklearn.metrics import roc_curve

cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]                                                                                                                                                                                                         
score_vector = sim_matrix[cord].detach().numpy()                                                                                                                                                                                                                              
label_vector = np.array(trial.label)                                                                                                                                                                                                                                 
fpr, tpr, thres = roc_curve(                                                                                                                                                                                                                                         
     label_vector, score_vector, pos_label=1)                                                                                                                                                                                                                     
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

In [None]:
eer