# DDA (Deep Discriminant Analysis)

기존의 DDA 학습은 classification base였다면 이번에는 Metric learning을 이용하여 DDA를 수행해보자

### Environment

In [4]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [5]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

### Configuration

In [6]:
from sv_system.utils.parser import set_train_config
import easydict

# datasets
# voxc1_fbank_xvector
# gcommand_fbank_xvector

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector",
                              input_frames=100, splice_frames=[50, 100], stride_frames=1, input_format='fbank',
                              cuda=True,
                              lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="tdnn_conv", loss="softmax",
                              n_epochs=50
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [7]:
import torch.utils.data as data

class embedDataset(data.Dataset):
    def __init__(self, embeds, labels):
        super().__init__()
        self.embeds = embeds
        self.labels = labels
        
    def __getitem__(self, index):
        
        return self.embeds[index], self.labels[index]
    
    def __len__(self):
        
        return self.embeds.shape[0]
        

def embedToDataset(embed_dict):
    embeds = np.array([v for v in embed_dict.values()])
    spks = [k.split("/")[0] for k in embed_dict.keys()]
    spk2label = pd.Series(spks).unique().tolist()
    labels = np.array([spk2label.index(spk) for spk in spks])
    
    dataset = embedDataset(embeds, labels)
    
    return dataset, embeds.shape[1], len(spk2label)
    

In [8]:
trial = pd.read_pickle("../dataset/dataframes/voxc1/voxc_trial.pkl")

In [9]:
si_embeds = pickle.load(open("../best_models/voxc1/ResNet34_v4_softmax/voxc_train_dvectors.pkl", "rb"))
sv_embeds = pickle.load(open("../best_models/voxc1/ResNet34_v4_softmax/voxc_test_dvectors.pkl", "rb"))

### Dataset and Dataloader

In [10]:
si_dataset, embed_dim, n_labels = embedToDataset(si_embeds)
sv_dataset, _, _ = embedToDataset(sv_embeds)

In [11]:
from torch.utils.data.dataloader import DataLoader

si_loader = DataLoader(si_dataset, batch_size=128, num_workers=4, shuffle=True)
sv_loader = DataLoader(sv_dataset, batch_size=128, num_workers=4, shuffle=False)

### Model Define

In [15]:
import torch.nn as nn

class dda_model(nn.Module):
    def __init__(self, in_dims, n_labels):
        super().__init__()
        
        self.input_layer = nn.Sequential(
            nn.Linear(in_dims, 2*in_dims),
            nn.PReLU()
        )
        
        self.hidden_layer = nn.Sequential(
            nn.Linear(2*in_dims, 2*in_dims),
            nn.PReLU()
        )    
        self.hidden_batch = nn.BatchNorm1d(2*in_dims)
    
        self.embedding_layer = nn.Linear(2*in_dims, n_labels)
        
    def embed(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_batch(x)
    
        return x
    
    def forward(self, x):           
        x = self.embed(x)
        x = self.embedding_layer(x)
        
        return x

In [16]:
model = dda_model(embed_dim, n_labels) 

In [17]:
model

dda_model(
  (input_layer): Sequential(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_layer): Sequential(
    (0): Linear(in_features=256, out_features=256, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_batch): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (embedding_layer): Linear(in_features=256, out_features=1211, bias=True)
)

In [18]:
if not config['no_cuda']:
    model = model.cuda()

### Model Train

In [19]:
from sv_system.train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR

criterion, optimizer = find_optimizer(config, model)
plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)
step_scheduler = MultiStepLR(optimizer, [10], 0.1)

In [20]:
import torch

def train(model):
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(si_loader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()

        logit = model(X)
        loss = criterion(logit, y)
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        n_corrects += torch.sum(torch.eq(torch.argmax(logit, dim=1), y)).item()
        total += y.size(0)
        
        if (batch_idx+1) % 100 == 0:
            print("Batch {}/{}\t Loss {:.6f}" \
                  .format(batch_idx+1, len(si_loader), loss_sum /(batch_idx+1),)
                 )
        acc = n_corrects / total

    acc = n_corrects / total
    return loss_sum, acc 

In [21]:
import torch.nn.functional as F

def embeds_utterance(config, val_dataloader, model):
    val_iter = iter(val_dataloader)
    embeddings = []
    labels = []
    model.eval()

    with torch.no_grad():
        for batch in val_iter:
            X, y = batch
            if not config['no_cuda']:
                X = X.cuda()
                
            model_output = model.embed(X).cpu().data
            embeddings.append(model_output)
            labels.append(y.numpy())
        embeddings = torch.cat(embeddings)
        labels = np.hstack(labels)
    return embeddings, labels 

def sv_test(config, sv_loader, model, trial):
        embeddings, _ = embeds_utterance(config, sv_loader, model)
        sim_matrix = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
        cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
        score_vector = sim_matrix[cord].numpy()
        label_vector = np.array(trial.label)
        fpr, tpr, thres = roc_curve(
                label_vector, score_vector, pos_label=1)
        eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

        return eer, label_vector, score_vector

In [22]:
from sv_system.train.si_train import val
from sklearn.metrics import roc_curve

for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))

#     step_scheduler.step()    
    
#     train code
    train_loss, train_acc = train(model)
    print("epoch #{}, train accuracy: {}".format(epoch_idx, train_acc))

#     evaluate best_metric
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))
    
    plateau_scheduler.step(train_loss)

------------------------------
curr_lr: 0.1
Batch 100/1156	 Loss 4.762459
Batch 200/1156	 Loss 3.151353
Batch 300/1156	 Loss 2.317050
Batch 400/1156	 Loss 1.846502
Batch 500/1156	 Loss 1.548298
Batch 600/1156	 Loss 1.337297
Batch 700/1156	 Loss 1.185067
Batch 800/1156	 Loss 1.068703
Batch 900/1156	 Loss 0.977740
Batch 1000/1156	 Loss 0.902433
Batch 1100/1156	 Loss 0.840091
epoch #0, train accuracy: 0.8565856626220976
epoch #0, sv eer: 0.06250665530827387
------------------------------
curr_lr: 0.1
Batch 100/1156	 Loss 0.146855
Batch 200/1156	 Loss 0.147702
Batch 300/1156	 Loss 0.149415
Batch 400/1156	 Loss 0.151442
Batch 500/1156	 Loss 0.153520
Batch 600/1156	 Loss 0.154226
Batch 700/1156	 Loss 0.154828
Batch 800/1156	 Loss 0.154224
Batch 900/1156	 Loss 0.154184
Batch 1000/1156	 Loss 0.154478
Batch 1100/1156	 Loss 0.154087
epoch #1, train accuracy: 0.9619224659478826
epoch #1, sv eer: 0.06208071557874561
------------------------------
curr_lr: 0.1
Batch 100/1156	 Loss 0.111301
Batch 20

epoch #17, sv eer: 0.0636247470982856
------------------------------
curr_lr: 0.1
Batch 100/1156	 Loss 0.036331
Batch 200/1156	 Loss 0.034068
Batch 300/1156	 Loss 0.032654
Batch 400/1156	 Loss 0.032094
Batch 500/1156	 Loss 0.031675
Batch 600/1156	 Loss 0.032129
Batch 700/1156	 Loss 0.032884
Batch 800/1156	 Loss 0.034004
Batch 900/1156	 Loss 0.035218
Batch 1000/1156	 Loss 0.036979
Batch 1100/1156	 Loss 0.038609
epoch #18, train accuracy: 0.9947882515969851
epoch #18, sv eer: 0.0637844744968587
------------------------------
curr_lr: 0.1
Batch 100/1156	 Loss 0.032299
Batch 200/1156	 Loss 0.031656
Batch 300/1156	 Loss 0.032287
Batch 400/1156	 Loss 0.032451
Batch 500/1156	 Loss 0.032247
Batch 600/1156	 Loss 0.032480
Batch 700/1156	 Loss 0.033131
Batch 800/1156	 Loss 0.034948
Batch 900/1156	 Loss 0.037015
Batch 1000/1156	 Loss 0.038859
Batch 1100/1156	 Loss 0.039773
epoch #19, train accuracy: 0.9946192584581066
epoch #19, sv eer: 0.06495580875306144
------------------------------
curr_lr: 0

KeyboardInterrupt: 

In [None]:
torch.save(dda_net.state_dict(), open("temp_dda_net.pt", "wb"))

### Extracting new embeddings

In [None]:
dda_net.load_state_dict(torch.load("temp_dda_net.pt"))

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=64, num_workers=1, shuffle=False)

In [None]:
new_embeds = []
for (X, y) in test_dataloader:
        if is_cuda:
            X = X.cuda()
        new_embed = dda_net.embed(X)
        new_embeds += [new_embed]

In [None]:
new_embed_tensor = torch.cat(new_embeds, dim=0)

In [None]:
import torch.nn.functional as F
sim_matrix = F.cosine_similarity(                                                                                                                                                                                                                                    
     new_embed_tensor.cpu().unsqueeze(1), new_embed_tensor.cpu().unsqueeze(0), dim=2)       

In [None]:
from sklearn.metrics import roc_curve

cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]                                                                                                                                                                                                         
score_vector = sim_matrix[cord].detach().numpy()                                                                                                                                                                                                                              
label_vector = np.array(trial.label)                                                                                                                                                                                                                                 
fpr, tpr, thres = roc_curve(                                                                                                                                                                                                                                         
     label_vector, score_vector, pos_label=1)                                                                                                                                                                                                                     
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

In [None]:
eer