만약 sent label로 같이 MTL을 하면

같은 sent일 때는 좀더 성능이 좋아질지?

혹은 positive도 강화되지만 negative도 강화될지?

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
sys.path.append('../sv_system/')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

### Configuration

In [3]:
from sv_system.utils.parser import set_train_config
import easydict
args = easydict.EasyDict(dict(dataset="gcommand_equal30_wav",
                              input_frames=100, splice_frames=[20, 100], stride_frames=1, input_format='fbank',
                              cuda=True,
                              lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="ResNet34", loss="softmax",
                              n_epochs=50,
                              lamb=0.3
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [4]:
gc_si_df = pd.read_pickle("../dataset/dataframes/gcommand/equal_num_30spk/equal_num_30spk_si.pkl")
gc_sv_df = pd.read_pickle("../dataset/dataframes/gcommand/equal_num_30spk/equal_num_30spk_sv.pkl")

unique_spks = gc_si_df.spk.unique().tolist()
unique_sents = gc_si_df.sent.unique().tolist()

gc_si_df['sent_label'] = gc_si_df.sent.apply(lambda x: unique_sents.index(x))
gc_sv_df['sent_label'] = gc_sv_df.sent.apply(lambda x: unique_sents.index(x))

In [5]:
from sv_system.data.data_utils import find_dataset, find_trial


_, datasets = find_dataset(config, basedir='../')
trial = find_trial(config, basedir='../')

In [6]:
from sv_system.data.dataset import mtlSpeechDataset

train_dataset = mtlSpeechDataset.read_df(config, gc_si_df, "train")
datasets[2] = train_dataset

In [7]:
from sv_system.data.dataloader import init_loaders

dataloaders = init_loaders(config, datasets)

### Define Model

In [8]:
from sv_system.model.ResNet34 import ResNet34
import torch.nn as nn
import torch.nn.functional as F

class ResNet34_v1(ResNet34):
    """
        additional fc layer before output layer
    """
    def __init__(self, config, inplanes=16, n_labels1=1000, n_labels2=1000, fc_dims=None):
        super().__init__(config, inplanes, 10)

        extractor_output_dim = 8*inplanes
        if not fc_dims:
            fc_dims = extractor_output_dim

        fc = [nn.Linear(extractor_output_dim,fc_dims),
                  nn.ReLU(inplace=True)]

        self.fc = nn.Sequential(*fc)
        
        self.classifier_1 = nn.Linear(fc_dims, n_labels1) # for spks
        self.classifier_2 = nn.Linear(fc_dims, n_labels2) # for sents
    
    def extract(self, x):
        x = self.extractor(x)
        x = F.avg_pool2d(x,x.shape[-2:])
        x = x.view(x.size(0), -1)
        feat = self.fc(x)
        
        return feat
        
    def forward(self, x):
        x = self.spk_out(x)
        
        return x
    
    def joint_forward(self, x):
        feat = self.extract(x)
        
        out1 = self.classifier_1(feat)
        out2 = self.classifier_2(feat)
        
        return out1, out2
        
    def spk_out(self, x):
        feat = self.extract(x)
        out1 = self.classifier_1(feat)
        
        return out1
    
    def sent_out(self, x):
        feat = self.extract(x)
        out2 = self.classifier_2(feat)
        
        return out2


In [9]:
from sv_system.model.tdnnModel import gTDNN, st_pool_layer

class tdnn_xvector(gTDNN):
    """xvector architecture"""
    def __init__(self, config, n_labels_spk, n_labels_sent):
        super(tdnn_xvector, self).__init__(config, n_labels_spk)
        inDim = config['input_dim']
        self.extractor = nn.Sequential(
            nn.Conv1d(inDim, 512, stride=1, dilation=1, kernel_size=5),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=4, kernel_size=3),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(512),
            nn.ReLU(True)
        )
        
        self.spk_seg = nn.Sequential(
            nn.Conv1d(512, 1500, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(1500),
            nn.ReLU(True),
            st_pool_layer(),
            nn.Linear(3000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            ####################
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, n_labels_spk),
        )
        
        self.sent_seg = nn.Sequential(
            nn.Conv1d(512, 1500, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(1500),
            nn.ReLU(True),
            st_pool_layer(),
            nn.Linear(3000, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            ##################3333333##
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, n_labels_sent),
        )

        self._initialize_weights()

    def embed(self, x):
        x = x.squeeze(1)
        # (batch, time, freq) -> (batch, freq, time)
        x = x.permute(0,2,1)
        x = self.extractor(x)
        x = self.spk_seg(x)

        return x
    
    def sent_out(self, x):
        x = x.squeeze(1)
        x = x.permute(0,2,1)
        feat = self.extractor(x)
        out_sent = self.sent_seg(feat)
        
        return out_sent
    
    def spk_out(self, x):
        feat = self.embed(x)
        out_spk = self.classifier(feat)

        return out_spk

    def forward(self, x):
        x = self.spk_out(x)

        return x
    
    def joint_forward(self, x):
        out_spk = self.spk_out(x)
        out_sent = self.sent_out(x)
        
        return out_spk, out_sent

In [18]:
model = tdnn_xvector(config,  len(unique_spks), len(unique_sents))
# model = ResNet34_v1(config, n_labels1=len(unique_spks), n_labels2=len(unique_sents), fc_dims=128)

In [19]:
if not config["no_cuda"]:
    model.cuda()
else:
    model = model.cpu()

### Model Train

In [20]:
import torch
from sv_system.train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion, joint_optimizer = find_optimizer(config, model)

sent_optimizer = torch.optim.SGD([{'params':model.extractor.parameters()}, 
                                  {'params':model.sent_seg.parameters()}], 
                                lr=0.01,
                                momentum=config['momentum'],
                                weight_decay=config['weight_decay'],
                                nesterov=config['use_nesterov'])

spk_optimizer = torch.optim.SGD([{'params':model.extractor.parameters()}, 
                                 {'params':model.spk_seg.parameters()},
                                 {'params':model.classifier.parameters()}],
                                lr=0.01,
                                momentum=config['momentum'],
                                weight_decay=config['weight_decay'],
                                nesterov=config['use_nesterov'])

# sent_optimizer = torch.optim.SGD([{'params':model.extractor.parameters()}, 
#                                   {'params':model.classifier_2.parameters()}], 
#                                 lr=0.01,
#                                 momentum=config['momentum'],
#                                 weight_decay=config['weight_decay'],
#                                 nesterov=config['use_nesterov'])

# spk_optimizer = torch.optim.SGD([{'params':model.extractor.parameters()}, 
#                                  {'params':model.classifier_1.parameters()}],
#                                 lr=0.1,
#                                 momentum=config['momentum'],
#                                 weight_decay=config['weight_decay'],
#                                 nesterov=config['use_nesterov'])


scheduler = ReduceLROnPlateau(spk_optimizer, 'min', factor=0.1, patience=5)

In [21]:
config['seed'] = 13
set_seed(config)

In [22]:
if not config['no_eer']:
    train_loader, val_loader, joint_loader, sv_loader = dataloaders
else:
    train_loader, val_loader, joint_loader = dataloaders

In [23]:
# mtl trai
from tqdm import tqdm_notebook
from sv_system.train.train_utils import print_eval

def train(config, train_loader, model, optimizer_spk, criterion):
    model.train()
    loss_sum = 0
    corrects = 0
    total = 0
    print_steps = (np.arange(1,11)*0.1 \
                    * len(train_loader)).astype(np.int64)

    splice_frames = config['splice_frames']
    if len(splice_frames) > 1:
        splice_frames_ = np.random.randint(splice_frames[0], splice_frames[1])
    else:
        splice_frames_ = splice_frames[-1]

    for batch_idx, (X, y_spk) in enumerate(train_loader):
        # X.shape is (batch, channel, time, bank)
        X = X.narrow(2, 0, splice_frames_)
        if not config["no_cuda"]:
            X = X.cuda()
            y_spk = y_spk.cuda()
            
        logit_spk = model(X)
        loss_spk = criterion(logit_spk, y_spk)

        optimizer_spk.zero_grad()           
        loss_sum += loss_spk.item()
        loss_spk.backward()
        optimizer_spk.step()
        predicted = torch.argmax(logit_spk, dim=1)
        corrects += predicted.eq(y_spk).cpu().sum().float()
        total += y_spk.size(0)
        if batch_idx in print_steps:
            print("train loss: {:.4f}, acc: {:.5f} " \
                  .format(loss_sum/total, corrects/total))
            
    return loss_sum, corrects/total

In [24]:
# mtl trai
from tqdm import tqdm_notebook
from sv_system.train.train_utils import print_eval

def batch_switch_train(config, train_loader, model, optimizer_spk, 
                       optimizer_sent, criterion):
    model.train()
    loss_sum = 0
    corrects = 0
    total = 0
    print_steps = (np.arange(1,11)*0.1 \
                    * len(train_loader)).astype(np.int64)

    splice_frames = config['splice_frames']
    if len(splice_frames) > 1:
        splice_frames_ = np.random.randint(splice_frames[0], splice_frames[1])
    else:
        splice_frames_ = splice_frames[-1]

    for batch_idx, (X, y_spk, y_sent) in enumerate(train_loader):
        # X.shape is (batch, channel, time, bank)
        X = X.narrow(2, 0, splice_frames_)
        if not config["no_cuda"]:
            X = X.cuda()
            y_spk = y_spk.cuda()
            y_sent = y_sent.cuda()
            
        logit_spk, logit_sent = model.joint_forward(X)
        loss_spk = criterion(logit_spk, y_spk)
        loss_sent = criterion(logit_sent, y_sent)
        
        if batch_idx % 2 == 0:
            loss = loss_spk
            optimizer = optimizer_spk
        else:
            loss = loss_sent
            optimizer = optimizer_sent
        optimizer.zero_grad()           
        loss_sum += loss.item()
        loss.backward()
        optimizer.step()
        predicted = torch.argmax(logit_spk, dim=1)
        corrects += predicted.eq(y_spk).cpu().sum().float()
        total += y_spk.size(0)
        if batch_idx in print_steps:
            print("train loss: {:.4f}, acc: {:.5f} " \
                  .format(loss_sum/total, corrects/total))
            
    return loss_sum, corrects/total

In [25]:
### import torch
from sv_system.train.si_train import val, sv_test

print("lamb: {}".format(config['lamb']))
for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    
#     train code
    train_loss, train_acc = batch_switch_train(config, joint_loader, model, spk_optimizer,
                                                   sent_optimizer, criterion)
#     validation code
    val_loss, val_acc = val(config, val_loader, model, criterion)
    
    print("epoch #{}, train accuracy: {}".format(epoch_idx, train_acc))
    print("epoch #{}, val accuracy: {}".format(epoch_idx, val_acc))

#     evaluate best_metric
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))
    
    scheduler.step(train_loss)

lamb: 0.3
------------------------------
train loss: 0.0440, acc: 0.00326 
train loss: 0.0473, acc: 0.00583 
train loss: 0.0476, acc: 0.00700 
train loss: 0.0469, acc: 0.00765 
train loss: 0.0461, acc: 0.00897 
train loss: 0.0456, acc: 0.00974 
train loss: 0.0452, acc: 0.01061 
train loss: 0.0449, acc: 0.01126 
train loss: 0.0448, acc: 0.01085 
epoch #0, train accuracy: 0.009777718223631382
epoch #0, val accuracy: 0.002008928684517741
epoch #0, sv eer: 0.30195555555555553
------------------------------
train loss: 0.0440, acc: 0.00000 
train loss: 0.0429, acc: 0.00187 
train loss: 0.0420, acc: 0.00391 
train loss: 0.0418, acc: 0.00659 
train loss: 0.0414, acc: 0.00972 
train loss: 0.0410, acc: 0.01226 
train loss: 0.0408, acc: 0.01534 
train loss: 0.0406, acc: 0.01740 
train loss: 0.0405, acc: 0.01652 
epoch #1, train accuracy: 0.01487816870212555
epoch #1, val accuracy: 0.0066964286379516125
epoch #1, sv eer: 0.2922777777777778
------------------------------
train loss: 0.0430, acc: 0

Process Process-703:
Process Process-690:
Process Process-689:
Process Process-704:
Process Process-693:
Process Process-691:
Process Process-699:
Process Process-692:
Process Process-700:
Process Process-694:
Process Process-697:


KeyboardInterrupt: 

Process Process-698:
Process Process-701:
Process Process-702:
Process Process-695:
Process Process-696:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py",

  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 57, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "../sv_system/data/dataset.py", line 175, in __getitem__
    return self.preprocess(os.path.join(self.data_folder, self.audio_files[index])), self.audio_labels[in

KeyboardInterrupt
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/librosa/core/audio.py", line 112, in load
    with audioread.audio_open(os.path.realpath(path)) as input_file:
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/librosa/feature/spectral.p

In [None]:
torch.save(model.cpu().state_dict(), open("gcommand_ResNet34_v1_mtl_lamb0.3.pt", "wb"))

### SV_Test

equal_sent and diff_sent

In [None]:
from sv_system.sv_score.score_utils import embeds_utterance

def sv_test(config, sv_loader, model, trial):
    embeddings, _ = embeds_utterance(config, sv_loader, model, lda=None)
    sim_matrix = F.cosine_similarity(
            embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=2)
    cord = [trial.enrolment_id.tolist(), trial.test_id.tolist()]
    score_vector = sim_matrix[cord].numpy()
    label_vector = np.array(trial.label)
    fpr, tpr, thres = roc_curve(
            label_vector, score_vector, pos_label=1)
    eer = fpr[np.nanargmin(np.abs(fpr - (1 - tpr)))]

    return eer, label_vector, score_vector

In [None]:
equal_sent_trial = trial[trial.equal_command]
diff_sent_trial = trial[~trial.equal_command]

In [None]:
model.cuda()
equal_sent_eer, _, _ = sv_test(config, sv_loader, model, equal_sent_trial)
diff_sent_eer, _, _ = sv_test(config, sv_loader, model, diff_sent_trial)

In [None]:
# gcommand_ResNet34_v1_mtl_lamb0.1.pt
print(f"equal: {equal_sent_eer}\ndiff: {diff_sent_eer}")

In [None]:
config['lamb']