Gender dependent training
---------------------------

Gender loss를 붙여서 각 gender 안에서 잘 구분할 수 있는 feature를 만들 수 있을까?

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../../sv_system/')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

### Configuration

In [3]:
from utils.parser import set_train_config
import easydict
args = easydict.EasyDict(dict(dataset="voxc1_mfcc30", data_folder="/dataset/SV_sets/voxceleb12/feats/mfcc30/",
                              input_frames=800, splice_frames=[200, 800], stride_frames=1, input_format='mfcc',
                              input_dim=30,
                              cuda=True,
                              lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=True,
                              batch_size=256, num_workers=8,
                              arch="tdnn_xvector", loss="softmax",
                              n_epochs=50, n_labels=1211,
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [4]:
voxc1_si_df = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc1_si_train_dataframe.pkl")
voxc1_sv_df = pd.read_pickle("/dataset/SV_sets/voxceleb12/dataframes/voxc12_sv_test_dataframe.pkl")

si_spks = voxc1_si_df.spk.unique().tolist()

In [5]:
import torch.utils.data as data
from collections  import OrderedDict

class MTLfeatDataset(data.Dataset):
    def __init__(self, data, set_type, config):
        super().__init__()
        self.set_type = set_type
        # data
        self.files = list(data.keys())
        self.labels = list(data.values())
        self.data_folder = config["data_folder"]
        # input audio config
        self.input_frames = config["input_frames"]
        self.input_clip = config["input_clip"]
        self.input_dim = config["input_dim"]

        if set_type == "train":
            self.random_clip = False
        else:
            self.random_clip = False

    def preprocess(self, example):
        # file_data = self._file_cache.get(example)
        data = np.load(example)
        # self._file_cache[example] = data

        # clipping
        in_len = self.input_frames
        if self.input_clip:
            if len(data) > in_len:
                if self.random_clip:
                    start_sample = np.random.randint(0, len(data) - in_len)
                else:
                    start_sample = 0
                data = data[start_sample:start_sample+in_len]
            else:
                gap = max(0, in_len - len(data))

                # # zero-padding
                # data = np.pad(data, (0, gap), "constant")

                # repeat, it shows better result
                repeat = int(np.floor(gap / len(data)))
                residual = gap % len(data)
                # print(f"in_len: {in_len}, data: {len(data)}, repeat: {repeat}, residual: {residual}")
                data = np.concatenate([np.tile(data, (repeat+1, 1)), data[:residual]], axis=0)

        #TODO why do they have diffrent input dimension?
        data = data[:,:self.input_dim] # first dimension could be energy term
        # expand a singleton dimension standing for a channel dimension
        data = torch.from_numpy(data).unsqueeze(0).float()
        return data

    @classmethod
    def read_df(cls, config, df, set_type):
        files = df.feat.tolist()
        if "label" in df.columns:
            labels = df.label.tolist()
        else:
            labels = [-1] * len(df)

        gender = df.gender.apply(lambda x: 1 if x=='m' else 0).values
        samples = OrderedDict(zip(files, zip(labels, gender)))
        dataset = cls(samples, set_type, config)
        return dataset

    def __getitem__(self, index):
        return self.preprocess(os.path.join(self.data_folder, self.files[index])), self.labels[index]

    def __len__(self):
        return len(self.labels)

In [6]:
from data.data_utils import find_dataset, find_trial

trial = find_trial(config, basedir='../../')

=> loaded trial: voxc12_test_trial


In [29]:
from data.dataset import featDataset

train_dataset = MTLfeatDataset.read_df(config, voxc1_si_df[voxc1_si_df.set == 'train'], "train")
val_dataset = MTLfeatDataset.read_df(config, voxc1_si_df[voxc1_si_df.set == 'val'], "val")
sv_dataset = featDataset.read_df(config, voxc1_sv_df, 'sv')

In [8]:
def _mtl_collate_fn(batch):
    """
    collate_fn for multi labels
    """
    data, label = list(zip(*batch))
    data = torch.cat(data)
    y1, y2 = list(zip(*label))
    
    return data, torch.LongTensor(y1), torch.LongTensor(y2)

In [30]:
import torch
from torch.utils.data.dataloader import default_collate
batch_size = config["batch_size"]
num_workers = config["num_workers"]

train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
        pin_memory=False,
        collate_fn=_mtl_collate_fn)

val_loader = torch.utils.data.DataLoader(val_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
        pin_memory=False,
        collate_fn=_mtl_collate_fn)

sv_loader = torch.utils.data.DataLoader(sv_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
        pin_memory=False,
        collate_fn=default_collate)

### Define Model

In [10]:
import torch.nn as nn
from model.tdnnModel import st_pool_layer

class tdnn_xvector(nn.Module):
    """xvector architecture"""
    def __init__(self, config, n_spk_labels, n_gender_labels):
        super(tdnn_xvector, self).__init__()
        
        inDim = config['input_dim']
        self.extractor = nn.Sequential(
            nn.Conv1d(inDim, 512, stride=1, dilation=1, kernel_size=5),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=4, kernel_size=3),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 512, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Conv1d(512, 1500, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(1500),
            nn.ReLU(True),
            st_pool_layer(),
            nn.Linear(3000, 512),
        )

        self.spk_classifier = nn.Sequential(
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, n_spk_labels)
        )
        
        self.gender_classifier = nn.Sequential(
            nn.ReLU(True),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, n_gender_labels),
        )

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Conv1d):
                n = m.kernel_size[0] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()
                
    def embed(self, x):
        x = x.squeeze(1)
        # (batch, time, freq) -> (batch, freq, time)
        x = x.permute(0,2,1)
        x = self.extractor(x)

        return x
    

    def forward(self, x):
        x_ = self.embed(x)
        x1 = self.spk_classifier(x_)
        x2 = self.gender_classifier(x_)

        return x1, x2


### Model Train

In [11]:
model = tdnn_xvector(config,  len(si_spks), 2)

In [12]:
if not config["no_cuda"]:
    model.cuda()
else:
    model = model.cpu()

In [13]:
import torch
from train.train_utils import set_seed, find_optimizer
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion, optimizer = find_optimizer(config, model)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5)

In [14]:
set_seed(config)

In [52]:
import torch
from train.si_train import val
from eval.sv_test import sv_test

config['lamb'] = 0.3
print("lamb: {}".format(config['lamb']))
for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))

#==============================train code==============================
    model.train()
    loss_sum = 0
    loss_spk_sum = 0
    loss_gender_sum = 0
    corrects = 0
    total = 0
    lamb = config['lamb']
    print_steps = (np.arange(0, 1, 0.1) \
                    * len(train_loader)).astype(np.int64)

    splice_frames = config['splice_frames']
    if len(splice_frames) > 1:
        splice_frames_ = np.random.randint(splice_frames[0], splice_frames[1])
    else:
        splice_frames_ = splice_frames[-1]

    for batch_idx, (X, y_spk, y_gender) in enumerate(train_loader):
        # X.shape is (batch, channel, time, bank)
        X = X.narrow(1, 0, splice_frames_)
        if not config["no_cuda"]:
            X = X.cuda()
            y_spk = y_spk.cuda()
            y_gender = y_gender.cuda()
        optimizer.zero_grad()
        logit_spk, logit_gender = model(X)
        
        loss_spk = criterion(logit_spk, y_spk) 
        loss_gender = criterion(logit_gender, y_gender)
        loss = loss_spk * (1-lamb) + loss_gender  * (lamb)
        
        loss_sum += loss.item()
        loss_spk_sum += loss_spk.item()
        loss_gender_sum += loss_gender.item()
        
        loss.backward()
        optimizer.step()
        
        predicted = torch.argmax(logit_spk, dim=1)
        corrects += predicted.eq(y_spk).cpu().sum().float()
        total += y_spk.size(0)
        
        if batch_idx in print_steps:
            print("train loss: {:.4f}, spk_loss: {:.4f}, gender_loss: {:.4f}, acc: {:.5f} " \
                  .format(loss_sum/total, loss_spk_sum/total, loss_gender_sum/total, corrects/total)
                 )
    train_acc = corrects/total
    train_loss = loss_sum/total
    print("epoch #{}, train accuracy: {}".format(epoch_idx, train_acc))
    
    scheduler.step(train_loss)
#==============================validation code==============================
    total = 0
    corrects = 0
    for batch_idx, (X, y_spk, y_gender) in enumerate(val_loader):
        # X.shape is (batch, channel, time, bank)
        if not config["no_cuda"]:
            X = X.cuda()
            y_spk = y_spk.cuda()
            y_gender = y_gender.cuda()
        optimizer.zero_grad()
        logit_spk, logit_gender = model(X)
        predicted = torch.argmax(logit_spk, dim=1)
        corrects += predicted.eq(y_spk).cpu().sum().float()
        total += y_spk.size(0)
        
    val_acc = corrects/total

    
    print("epoch #{}, val accuracy: {}".format(epoch_idx, val_acc))

#==============================evaluate best_metric==============================
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))
    
    

lamb: 0.3
------------------------------
curr_lr: 0.00010000000000000003
train loss: 0.0000, spk_loss: 0.0000, gender_loss: 0.0000, acc: 1.00000 
train loss: 0.0000, spk_loss: 0.0000, gender_loss: 0.0000, acc: 1.00000 


Process Process-679:
Process Process-678:
Process Process-680:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process Process-673:
Process Process-677:
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process Process-676:
Process Process-675:
Process Process-674:
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 96, in _worker_loop
    r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/

  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/selectors.py", line 376, in select
    fd_event_list = self._poll.poll(timeout)
KeyboardInterrupt
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/numpy/lib/format.py", line 635, in read_array
    shape, fortran_order, dtype = _read_array_header(fp, version)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/numpy/lib/format.py", line 497, in _read_array_header
    header = _filter_header(header)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/numpy/lib/format.py", line 459, in _filter_header
    for token in tokenize.generate_tokens(StringIO(string).readline):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/tokenize.py", line 601, in _tokenize
    start, end = pseudomatch.span(1)
KeyboardInterrupt
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 257, in poll
    return self._poll(timeout)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/m

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-52-fe2429cd3edd>", line 42, in <module>
    loss_sum += loss.item()
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 1863, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site

RuntimeError: DataLoader worker (pid 30003) exited unexpectedly with exit code 1. Details are lost due to multiprocessing. Rerunning with num_workers=0 may give better error trace.

### SV_Test

equal_sent and diff_sent

In [35]:
from eval.sv_test import embeds_utterance

si_dataset = featDataset.read_df(config, voxc1_si_df, "train")
si_loader = torch.utils.data.DataLoader(si_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
        pin_memory=False,
        collate_fn=default_collate)

In [36]:
si_embeddings, _ = embeds_utterance(config, si_loader, model, lda=None)

In [53]:
sv_embeddings, _ = embeds_utterance(config, sv_loader, model, lda=None)

In [54]:
from torch.nn.functional import cosine_similarity

scores = cosine_similarity(sv_embeddings[trial.enrolment_id], 
                       sv_embeddings[trial.test_id], dim=1)
compute_eer(scores[trial.label == 1], scores[trial.label == 0])

eer:50.000% at threshold 0.8963


(0.5, 0.89626014)

In [None]:
equal_sent_trial = trial[trial.equal_command]
diff_sent_trial = trial[~trial.equal_command]

In [None]:
model.cuda()
equal_sent_eer, _, _ = sv_test(config, sv_loader, model, equal_sent_trial)
diff_sent_eer, _, _ = sv_test(config, sv_loader, model, diff_sent_trial)

In [None]:
# gcommand_ResNet34_v1_mtl_lamb0.1.pt
print(f"equal: {equal_sent_eer}\ndiff: {diff_sent_eer}")

In [None]:
config['lamb']