# DDA (Deep Discriminant Analysis)

기존의 DDA 학습은 classification base였다면 이번에는 Metric learning을 이용하여 DDA를 수행해보자

### Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('/host/projects/sv_experiments/')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

### Configuration

In [3]:
from sv_system.utils.parser import set_train_config
import easydict

# datasets
# voxc1_fbank_xvector
# gcommand_fbank_xvector

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector",
                              input_frames=100, splice_frames=[50, 100], stride_frames=1, input_format='fbank',
                              cuda=True,
                              lrs=[0.01, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="tdnn_conv", loss="softmax",
                              n_epochs=50
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [4]:
import torch.utils.data as data

class embedDataset(data.Dataset):
    def __init__(self, embeds, labels):
        super().__init__()
        self.embeds = embeds
        self.labels = labels
        
    def __getitem__(self, index):
        
        return self.embeds[index], self.labels[index]
    
    def __len__(self):
        
        return self.embeds.shape[0]

def embedToDataset(embeds, key_df):
    labels = key_df.label.tolist()
    dataset = embedDataset(embeds, labels)
    
    return dataset, embeds.shape[1], len(key_df.label.unique())

def key2df(keys):
    key_df = pd.DataFrame(keys, columns=['key'])
    key_df['spk'] = key_df.key.apply(lambda x: x.split("-")[0])
    key_df['label'] = key_df.groupby('spk').ngroup()
    key_df['origin'] = key_df.spk.apply(lambda x: 'voxc2' if x.startswith('id') else 'voxc1')
    
    return key_df

In [5]:
trial = pd.read_pickle("/dataset/SV_sets/dataframes/voxc1/voxc_trial.pkl")

In [6]:
si_keys = pickle.load(open("../../embeddings/voxc12/xvectors/xvectors_tdnn6b/train_feat/key.pkl", "rb"))
si_embeds = np.load("../../embeddings/voxc12/xvectors/xvectors_tdnn6b/train_feat/feat.npy")

sv_keys = pickle.load(open("../../embeddings/voxc12/xvectors/xvectors_tdnn6b/test_feat/key.pkl", "rb"))
sv_embeds = np.load("../../embeddings/voxc12/xvectors/xvectors_tdnn6b/test_feat/feat.npy")

In [7]:
si_key_df = key2df(si_keys)
sv_key_df = key2df(sv_keys)

### Dataset and Dataloader

In [8]:
si_dataset, embed_dim, n_labels = embedToDataset(si_embeds, si_key_df)
sv_dataset, _, _ = embedToDataset(sv_embeds, sv_key_df)

In [9]:
from torch.utils.data.dataloader import DataLoader

si_loader = DataLoader(si_dataset, batch_size=128, num_workers=0, shuffle=True)
sv_loader = DataLoader(sv_dataset, batch_size=128, num_workers=0, shuffle=False)

### Model Define

In [10]:
import torch.nn as nn

class dda_model(nn.Module):
    def __init__(self, in_dims, n_labels):
        super().__init__()
        
        self.input_layer = nn.Sequential(
            nn.Linear(in_dims, 2*in_dims),
            nn.PReLU()
        )
        
        self.hidden_layer = nn.Sequential(
            nn.Linear(2*in_dims, 2*in_dims),
            nn.PReLU()
        )    
        self.hidden_batch = nn.BatchNorm1d(2*in_dims)
    
        self.embedding_layer = nn.Linear(2*in_dims, n_labels)
        
    def embed(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_batch(x)
    
        return x
    
    def forward(self, x):           
        x = self.embed(x)
        x = self.embedding_layer(x)
        
        return x

In [11]:
model = dda_model(embed_dim, n_labels) 

In [12]:
model

dda_model(
  (input_layer): Sequential(
    (0): Linear(in_features=512, out_features=1024, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_layer): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): PReLU(num_parameters=1)
  )
  (hidden_batch): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (embedding_layer): Linear(in_features=1024, out_features=7324, bias=True)
)

In [13]:
if not config['no_cuda']:
    model = model.cuda()

### Model Train

In [14]:
from sv_system.train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR

criterion, optimizer = find_optimizer(config, model)
plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)
step_scheduler = MultiStepLR(optimizer, [10], 0.1)

In [15]:
import torch

def train(model):
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(si_loader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()

        logit = model(X)
        loss = criterion(logit, y)
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        n_corrects += torch.sum(torch.eq(torch.argmax(logit, dim=1), y)).item()
        total += y.size(0)
        
#         if (batch_idx+1) % 100 == 0:
#             print("Batch {}/{}\t Loss {:.6f}" \
#                   .format(batch_idx+1, len(si_loader), loss_sum /(batch_idx+1),)
#                  )
        acc = n_corrects / total

    acc = n_corrects / total
    return loss_sum, acc 

In [16]:
import torch.nn.functional as F

def embeds_utterance(config, val_dataloader, model):
    val_iter = iter(val_dataloader)
    embeddings = []
    labels = []
    model.eval()

    with torch.no_grad():
        for batch in val_iter:
            X, y = batch
            if not config['no_cuda']:
                X = X.cuda()
                
            model_output = model.embed(X).cpu().data
            embeddings.append(model_output)
            labels.append(y.numpy())
        embeddings = torch.cat(embeddings)
        labels = np.hstack(labels)
    return embeddings, labels 

def sv_test(config, sv_loader, model, trial):
        embeddings, _ = embeds_utterance(config, sv_loader, model)
        sim_matrix = F.cosine_similarity(embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
        cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
        score_vector = sim_matrix[cord].numpy()
        label_vector = np.array(trial.label)
        fpr, tpr, thres = roc_curve(
                label_vector, score_vector, pos_label=1)
        eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

        return eer, label_vector, score_vector

In [17]:
from sv_system.train.si_train import val
from sklearn.metrics import roc_curve

for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))

#     step_scheduler.step()    
    
#     train code
    train_loss, train_acc = train(model)
    print("epoch #{}, train accuracy: {}".format(epoch_idx, train_acc))

#     evaluate best_metric
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))
    
    plateau_scheduler.step(train_loss)

------------------------------
curr_lr: 0.01
epoch #0, train accuracy: 0.8410005448806273
epoch #0, sv eer: 0.07629645405175167
------------------------------
curr_lr: 0.01
epoch #1, train accuracy: 0.9811867437432673
epoch #1, sv eer: 0.0750718773293579
------------------------------
curr_lr: 0.01
epoch #2, train accuracy: 0.9885606383245233
epoch #2, sv eer: 0.0753913321265041
------------------------------
curr_lr: 0.01
epoch #3, train accuracy: 0.9913257509331863
epoch #3, sv eer: 0.07560430199126823
------------------------------
curr_lr: 0.01
epoch #4, train accuracy: 0.9928539218879174
epoch #4, sv eer: 0.07549781705888617
------------------------------
curr_lr: 0.01
epoch #5, train accuracy: 0.9936955119372698
epoch #5, sv eer: 0.07528484719412203
------------------------------
curr_lr: 0.01
epoch #6, train accuracy: 0.9940078788486109
epoch #6, sv eer: 0.07555105952507721
------------------------------
curr_lr: 0.01
epoch #7, train accuracy: 0.9944047962021194
epoch #7, sv eer

KeyboardInterrupt: 

In [None]:
torch.save(dda_net.state_dict(), open("temp_dda_net.pt", "wb"))

### Exatract & Save New embeddings

In [18]:
train_embeds, _ = embeds_utterance(config, si_loader, model)
train_keys = si_keys

In [19]:
test_embeds, _ = embeds_utterance(config, sv_loader, model)
test_keys = sv_keys

In [20]:
def save_key_embeds(save_dir, train_keys, train_embeds, test_keys, test_embeds):
    if not os.path.isdir(os.path.join(save_dir, "train_feat")):
        os.makedirs(os.path.join(save_dir, "train_feat"))    
    if not os.path.isdir(os.path.join(save_dir, "test_feat")):
        os.makedirs(os.path.join(save_dir, "test_feat"))
        
    pickle.dump(train_keys, open(os.path.join(save_dir, "train_feat", "key.pkl"), "wb"))
    np.save(open(os.path.join(save_dir, "train_feat", "feat.npy"), "wb"), train_embeds.numpy())
    pickle.dump(test_keys, open(os.path.join(save_dir, "test_feat", "key.pkl"), "wb"))
    np.save(open(os.path.join(save_dir, "test_feat", "feat.npy"), "wb"), test_embeds.numpy())

In [21]:
save_key_embeds("../../embeddings/voxc12/dda_xvector1/", train_keys, train_embeds, test_keys, test_embeds)

In [52]:
import torch.nn.functional as F
sim_matrix = F.cosine_similarity(                                                                                                                                                                                                                                    
     orig_embeddings.unsqueeze(1), orig_embeddings.unsqueeze(0), dim=2)       

NameError: name 'orig_embeddings' is not defined

In [None]:
from sklearn.metrics import roc_curve

cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]                                                                                                                                                                                                         
score_vector = sim_matrix[cord].detach().numpy()                                                                                                                                                                                                                              
label_vector = np.array(trial.label)                                                                                                                                                                                                                                 
fpr, tpr, thres = roc_curve(                                                                                                                                                                                                                                         
     label_vector, score_vector, pos_label=1)                                                                                                                                                                                                                     
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

In [25]:
si_keys.index('Betty_White-zqtRF50lhsQ-0000003')

10549

### New trial

In [30]:
sv_key_df['session'] = sv_key_df.key.apply(lambda x: x.split('-')[1])

In [107]:
new_trial_ids = []
new_trial_keys = []
for idx, row in sv_key_df.iterrows():
    pos_pair = sv_key_df[(sv_key_df.spk == row.spk) & (sv_key_df.session != row.session)].sample(n=4)
    neg_pair = sv_key_df[(sv_key_df.spk != row.spk)].sample(n=4)
    
    for pos_idx, neg_idx in zip(pos_pair.index.tolist(), neg_pair.index.tolist()):
        new_trial_ids += [(idx, pos_idx, 1), (idx, neg_idx, 0)]
    
    for pos_key, neg_key in zip(pos_pair.key, neg_pair.key):
        new_trial_keys += [(row.key, pos_key, 'target'), (row.key, neg_key, 'nontarget')]

In [81]:
new_trial = pd.DataFrame(new_trial_ids, columns=['enrolment_id', 'test_id', 'label'])

In [83]:
import torch.nn.functional as F
sv_embed_tensor = torch.from_numpy(sv_embeds)

sim_matrix = F.cosine_similarity(                                                                                                                                                                                                                                    
     sv_embed_tensor.unsqueeze(1), sv_embed_tensor.unsqueeze(0), dim=2)       

In [84]:
from sklearn.metrics import roc_curve

cord = [new_trial.enrolment_id.tolist(), new_trial.test_id.tolist()]                                                                                                                                                                                                         
score_vector = sim_matrix[cord].detach().numpy()                                                                                                                                                                                                                              
label_vector = np.array(new_trial.label)                                                                                                                                                                                                                                 
fpr, tpr, thres = roc_curve(                                                                                                                                                                                                                                         
     label_vector, score_vector, pos_label=1)                                                                                                                                                                                                                     
eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

In [85]:
eer

0.09478867459991794

plda trial format으로 만들자

In [112]:
new_trial_key = pd.DataFrame(new_trial_keys)

In [113]:
new_trial_key.to_csv("voxc12_trial_sv", sep=' ', index=False, header=False)

In [89]:
new_trial['is_target'] = new_trial.label.apply(lambda x: 'target' if x == 1 else 'nontarget')

In [90]:
new_trial.head(1)

Unnamed: 0,enrolment_id,test_id,label,is_target
0,0,100,1,target


In [91]:
new_trial