# Mix-up training

paper: https://arxiv.org/abs/1710.09412  
code: https://github.com/facebookresearch/mixup-cifar10

## Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


### Configuration

In [2]:
from sv_system.utils.parser import set_train_config
import easydict
args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector",
                              input_frames=800, splice_frames=[300, 800], stride_frames=1, 
                              input_format='fbank',
                              cuda=True,
                              lrs=[0.001, 0.001], lr_schedule=[20], seed=1337,
                              no_eer=False,
                              batch_size=128,
                              arch="ResNet34_v4", loss="softmax",
                              n_epochs=10
                             ))
config = set_train_config(args)

### Dataset and Dataloader

In [3]:
from sv_system.data.data_utils import find_dataset, find_trial

_, datasets = find_dataset(config, basedir='../')
trial = find_trial(config, basedir='../')

In [4]:
from sv_system.data.dataloader import init_loaders

dataloaders = init_loaders(config, datasets)

### Define Model

In [5]:
from sv_system.model.model_utils import find_model
model = find_model(config)

### Load Model

In [8]:
import torch 

saved_model = torch.load("../best_models/voxc1/ResNet34_v4_softmax/ResNet34_v4_softmax_best.pth.tar")

In [9]:
import itertools
model_state = model.state_dict()
for k1, k2 in zip(saved_model['state_dict'], model_state):
#     print(k1, k2)
    assert saved_model['state_dict'][k1].shape == model_state[k2].shape
    model_state[k2] = saved_model['state_dict'][k1]
    

model.load_state_dict(model_state)

## Train

In [10]:
from sv_system.train.train_utils import set_seed, find_optimizer

criterion, optimizer = find_optimizer(config, model)

In [11]:
set_seed(config)

In [12]:
if not config['no_eer']:
    train_loader, val_loader, test_loader, sv_loader = dataloaders
else:
    train_loader, val_loader, test_loader = dataloaders

In [13]:
import torch 

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [16]:
from tqdm import tqdm_notebook
from sv_system.train.train_utils import print_eval

alpha = 0.1
def train(config, train_loader, model, optimizer, criterion):
    model.train()
    loss_sum = 0
    corrects = 0
    total = 0
    print_steps = (np.array([0.25, 0.5, 0.75, 1.0]) \
                    * len(train_loader)).astype(np.int64)

    splice_frames = config['splice_frames']
    if len(splice_frames) > 1:
        splice_frames_ = np.random.randint(splice_frames[0], splice_frames[1])
    else:
        splice_frames_ = splice_frames[-1]

    for batch_idx, (X, y) in tqdm_notebook(enumerate(train_loader), ncols=300,
            total=len(train_loader)):
        # X.shape is (batch, channel, time, bank)
        X = X.narrow(2, 0, splice_frames_)
        X, y_a, y_b, lam = mixup_data(x=X, y=y, alpha=alpha, use_cuda=False)
        if not config["no_cuda"]:
            X = X.cuda()
            y_a = y_a.cuda()
            y_b = y_b.cuda()
        optimizer.zero_grad()
        scores = model(X)
        loss = mixup_criterion(criterion, scores, y_a, y_b, lam)
        loss_sum += loss.item()
        loss.backward()
        # learning rate change
        optimizer.step()
        # schedule over iteration
        predicted = torch.argmax(scores, dim=1)
        corrects += (lam * predicted.eq(y_a).cpu().sum().float()
                    + (1 - lam) * predicted.eq(y_b).cpu().sum().float())
        total += y_a.size(0)
        if batch_idx in print_steps:
            print("train loss, acc: {:.4f}, {:.5f} ".format(corrects/total, loss_sum))
            
    return loss_sum, corrects/total

In [None]:
from sv_system.train.si_train import val, sv_test

print("alpha value: {}".format(alpha))
for epoch_idx in range(0, config['n_epochs']):
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    idx = 0
    while(epoch_idx >= config['lr_schedule'][idx]):
    # use new lr from schedule epoch not a next epoch
        idx += 1
        if idx == len(config['lr_schedule']):
            break
    curr_lr = config['lrs'][idx]
    optimizer.state_dict()['param_groups'][0]['lr'] = curr_lr
    print("curr_lr: {}".format(curr_lr))

#     train code
    train_loss, train_acc = train(config, train_loader, model, optimizer, criterion)

#     validation code
    val_loss, val_acc = val(config, val_loader, model, criterion, tqdm=tqdm_notebook)
    print("epoch #{}, val accuracy: {}".format(epoch_idx, val_acc))

#     evaluate best_metric
    if not config['no_eer']:
        # eer validation code
        eer, label, score = sv_test(config, sv_loader, model, trial, tqdm=tqdm_notebook)
        print("epoch #{}, sv eer: {}".format(epoch_idx, eer))

alpha value: 0.1
------------------------------
curr_lr: 0.001


HBox(children=(IntProgress(value=0, layout=Layout(flex='2'), max=1042), HTML(value='')), layout=Layout(display…

train loss, acc: 0.8992, 488.82143 
train loss, acc: 0.9019, 891.48855 
