### Environment

In [1]:
%load_ext autoreload
%autoreload 2
%pylab
%matplotlib inline

import pandas as pd
import pickle
import numpy as np
import sys
import os

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
sys.path.append('../')
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

### Configuration

In [3]:
from sv_system.utils.parser import set_train_config
import easydict

args = easydict.EasyDict(dict(dataset="voxc1_fbank_xvector", 
                              data_folder="/dataset/SV_sets/voxceleb12/feats/fbank64_vad/",
                              input_frames=800, splice_frames=[200, 800], stride_frames=1, 
                              input_format='fbank', input_dim=64, random_clip=False,
                              n_epochs=200, lrs=[0.1, 0.01], lr_schedule=[20], seed=1337,
                              no_eer=False, batch_size=128,
                              gpu_no=[0], cuda=True, num_workers=4,
                              arch="tdnn_conv", loss="softmax",
                             ))
config = set_train_config(args)

### Dataset

In [4]:
voxc2_meta = pd.read_csv("/dataset/SV_sets/voxceleb2/vox2_meta.csv")
voxc2_meta.columns = ['voxc2_id', 'vggface2_id', 'gender', 'set']
voxc2_dev_spks = voxc2_meta[voxc2_meta['set'] == 'dev '].voxc2_id.str.strip()
voxc2_test_spks = voxc2_meta[voxc2_meta['set'] == 'test '].voxc2_id.str.strip()

In [5]:
# voxceleb2
voxc2_si = pd.read_csv("/dataset/SV_sets/voxceleb2/dataframes/voxc2_si.csv")
dev_df = voxc2_si[voxc2_si.spk.isin(voxc2_dev_spks)]
dev_df['label'] = dev_df.groupby("spk").ngroup()
dev_train_df = dev_df[dev_df.set == 'train']
dev_val_df = dev_df[dev_df.set == 'val']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [6]:
eval_df = voxc2_si[voxc2_si.spk.isin(voxc2_test_spks)]
eval_df['label'] = eval_df.groupby("spk").ngroup()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [7]:
# dev_df.to_csv("/dataset/SV_sets/voxceleb2/dataframes/voxc2_dev.csv", index=False)
# eval_df.to_csv("/dataset/SV_sets/voxceleb2/dataframes/voxc2_eval.csv", index=False)

In [8]:
# dev_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_dev.csv")
# # dev_df['label'] = dev_df.groupby("spk").ngroup()
# dev_train_df = dev_df[dev_df.set == 'train']
# dev_val_df = dev_df[dev_df.set == 'val']
# eval_df = pd.read_csv("/dataset/SV_sets/voxceleb1/dataframes/voxc1_eval.csv") 

In [9]:
from sv_system.data.feat_dataset import FeatDataset

dev_train_dataset = FeatDataset.read_df(config, dev_train_df, 'train')
dev_val_dataset = FeatDataset.read_df(config, dev_val_df, 'test')
eval_dataset = FeatDataset.read_df(config, eval_df, 'test')

### Dataloader

In [10]:
from sv_system.data.dataloader import init_default_loader 
dev_train_dataloader = init_default_loader(config, dev_train_dataset, shuffle=True, var_len=False) 
dev_val_dataloader = init_default_loader(config, dev_val_dataset, shuffle=False, var_len=False) 
eval_dataloader = init_default_loader(config, eval_dataset, shuffle=False, var_len=True) 

### Model

In [13]:
from tdnn_models import tdnn_xvector_deep_v0
import torch

model = tdnn_xvector_deep_v0(config, 512, n_labels=len(dev_train_df.label.unique()))
# saved_model = torch.load("trained_models/voxc2_dev_tdnn_deep_v0.pt")
# model.load_state_dict(saved_model)

if not config['no_cuda']:
    model = model.cuda()

In [14]:
import torch.nn as nn
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
from sklearn.metrics import roc_curve
import torch.nn.functional as F
from tensorboardX import SummaryWriter
from tqdm import tqdm

optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
from fine_tune_utils import class_weight
# criterion = nn.CrossEntropyLoss(weight=class_weight(config, dev_train_df))
criterion = nn.CrossEntropyLoss()
# plateau_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.2, patience=5)
step_scheduler = MultiStepLR(optimizer, [8, 15, 20], 0.2)

writer = SummaryWriter("logs/voxc2_dev_tdnn_deep_v0_noRandIn")
model_path = "trained_models/voxc2_dev_tdnn_deep_v0_noRandIn.pt"

for epoch_idx in range(0, config['n_epochs']):
    step_scheduler.step()
    print("-"*30)
    curr_lr = optimizer.state_dict()['param_groups'][0]['lr']
    print("curr_lr: {}".format(curr_lr))
    
# =============== train code #===============
    model.train()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in tqdm(enumerate(dev_train_dataloader), ascii=None, total=len(dev_train_dataloader)):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        optimizer.zero_grad()
        logit = model(X)
        loss = criterion(logit, y)
        loss.backward()
        optimizer.step()
                        
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
        if (batch_idx+1) % 1000 == 0:
            print("Batch {}/{}\t Loss {:.6f}" \
                  .format(batch_idx+1, len(dev_train_dataloader), loss_sum / total))
    train_loss = loss_sum / total
    train_acc = n_corrects / total
#     plateau_scheduler.step(train_loss)
    
    print("epoch #{}, train loss: {:.4f}, train acc: {:.4f}".format(epoch_idx, train_loss, train_acc))
    writer.add_scalar("train/loss", train_loss, epoch_idx+1)
    writer.add_scalar("train/acc", train_acc, epoch_idx+1)

#=============== dev_val code #===============
    model.eval()
    loss_sum = 0
    n_corrects = 0
    total = 0
    for batch_idx, (X, y) in enumerate(dev_val_dataloader):
        if not config['no_cuda']:
            X = X.cuda()
            y = y.cuda()

        logit = model(X)
        loss = criterion(logit, y)
        loss_sum += loss.item()
        n_corrects += logit.max(1)[1].eq(y).sum().item()
        total += y.size(0)
    val_loss = loss_sum / total
    val_acc = n_corrects / total
    
    print("epoch #{}, val loss: {:.4f}, val acc: {:.4f}".format(epoch_idx, val_loss, val_acc))
    writer.add_scalar("val/loss", val_loss, epoch_idx+1)
    writer.add_scalar("val/acc", val_acc, epoch_idx+1)
    
#=============== model save #===============
    torch.save(model.state_dict(), model_path)

------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:52<39:38,  2.83it/s]

Batch 1000/7737	 Loss 0.056144


 26%|██▌       | 2000/7737 [11:46<33:47,  2.83it/s]

Batch 2000/7737	 Loss 0.047170


 39%|███▉      | 3000/7737 [17:40<27:55,  2.83it/s]

Batch 3000/7737	 Loss 0.040755


 52%|█████▏    | 4000/7737 [23:34<22:01,  2.83it/s]

Batch 4000/7737	 Loss 0.035940


 65%|██████▍   | 5000/7737 [29:28<16:08,  2.83it/s]

Batch 5000/7737	 Loss 0.032160


 78%|███████▊  | 6000/7737 [35:22<10:14,  2.83it/s]

Batch 6000/7737	 Loss 0.029158


 90%|█████████ | 7000/7737 [41:15<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.026759


100%|██████████| 7737/7737 [45:34<00:00,  2.83it/s]

epoch #0, train loss: 0.0253, train acc: 0.4190





epoch #0, val loss: 0.0275, val acc: 0.3437
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:52<39:37,  2.83it/s]

Batch 1000/7737	 Loss 0.009389


 26%|██▌       | 2000/7737 [11:45<33:43,  2.83it/s]

Batch 2000/7737	 Loss 0.009182


 39%|███▉      | 3000/7737 [17:38<27:50,  2.83it/s]

Batch 3000/7737	 Loss 0.008920


 52%|█████▏    | 4000/7737 [23:31<21:58,  2.83it/s]

Batch 4000/7737	 Loss 0.008746


 65%|██████▍   | 5000/7737 [29:24<16:05,  2.83it/s]

Batch 5000/7737	 Loss 0.008564


 78%|███████▊  | 6000/7737 [35:17<10:12,  2.83it/s]

Batch 6000/7737	 Loss 0.008398


 90%|█████████ | 7000/7737 [41:10<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.008239


100%|██████████| 7737/7737 [45:30<00:00,  2.83it/s]

epoch #1, train loss: 0.0081, train acc: 0.7686





epoch #1, val loss: 0.0179, val acc: 0.5314
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:53<39:41,  2.83it/s]

Batch 1000/7737	 Loss 0.005692


 26%|██▌       | 2000/7737 [11:47<33:48,  2.83it/s]

Batch 2000/7737	 Loss 0.005851


 39%|███▉      | 3000/7737 [17:40<27:54,  2.83it/s]

Batch 3000/7737	 Loss 0.005911


 52%|█████▏    | 4000/7737 [23:33<22:00,  2.83it/s]

Batch 4000/7737	 Loss 0.005943


 65%|██████▍   | 5000/7737 [29:26<16:07,  2.83it/s]

Batch 5000/7737	 Loss 0.005946


 78%|███████▊  | 6000/7737 [35:19<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.005941


 90%|█████████ | 7000/7737 [41:13<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.005918


100%|██████████| 7737/7737 [45:33<00:00,  2.83it/s]

epoch #2, train loss: 0.0059, train acc: 0.8306





epoch #2, val loss: 0.0251, val acc: 0.3932
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:53<39:42,  2.83it/s]

Batch 1000/7737	 Loss 0.004642


 26%|██▌       | 2000/7737 [11:47<33:48,  2.83it/s]

Batch 2000/7737	 Loss 0.004786


 39%|███▉      | 3000/7737 [17:40<27:54,  2.83it/s]

Batch 3000/7737	 Loss 0.004890


 52%|█████▏    | 4000/7737 [23:33<22:00,  2.83it/s]

Batch 4000/7737	 Loss 0.004971


 65%|██████▍   | 5000/7737 [29:26<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.005013


 78%|███████▊  | 6000/7737 [35:19<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.005035


 90%|█████████ | 7000/7737 [41:12<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.005056


100%|██████████| 7737/7737 [45:32<00:00,  2.83it/s]

epoch #3, train loss: 0.0051, train acc: 0.8551





epoch #3, val loss: 0.0133, val acc: 0.6469
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:53<39:40,  2.83it/s]

Batch 1000/7737	 Loss 0.004003


 26%|██▌       | 2000/7737 [11:46<33:45,  2.83it/s]

Batch 2000/7737	 Loss 0.004180


 39%|███▉      | 3000/7737 [17:39<27:52,  2.83it/s]

Batch 3000/7737	 Loss 0.004323


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.004425


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.004497


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.004549


 90%|█████████ | 7000/7737 [41:11<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.004581


100%|██████████| 7737/7737 [45:31<00:00,  2.83it/s]

epoch #4, train loss: 0.0046, train acc: 0.8694





epoch #4, val loss: 0.0135, val acc: 0.6435
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:53<39:40,  2.83it/s]

Batch 1000/7737	 Loss 0.003702


 26%|██▌       | 2000/7737 [11:46<33:46,  2.83it/s]

Batch 2000/7737	 Loss 0.003909


 39%|███▉      | 3000/7737 [17:39<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.004039


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.004149


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.004226


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.004276


 90%|█████████ | 7000/7737 [41:12<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.004308


100%|██████████| 7737/7737 [45:32<00:00,  2.83it/s]

epoch #5, train loss: 0.0043, train acc: 0.8769





epoch #5, val loss: 0.0202, val acc: 0.4920
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:52<39:34,  2.84it/s]

Batch 1000/7737	 Loss 0.003501


 26%|██▌       | 2000/7737 [11:45<33:44,  2.83it/s]

Batch 2000/7737	 Loss 0.003679


 39%|███▉      | 3000/7737 [17:38<27:51,  2.83it/s]

Batch 3000/7737	 Loss 0.003813


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.003936


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.004006


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.004057


 90%|█████████ | 7000/7737 [41:11<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.004099


100%|██████████| 7737/7737 [45:31<00:00,  2.83it/s]

epoch #6, train loss: 0.0041, train acc: 0.8831





epoch #6, val loss: 0.0194, val acc: 0.5049
------------------------------
curr_lr: 0.1


 13%|█▎        | 1000/7737 [05:53<39:43,  2.83it/s]

Batch 1000/7737	 Loss 0.003576


 26%|██▌       | 2000/7737 [11:46<33:47,  2.83it/s]

Batch 2000/7737	 Loss 0.003583


 39%|███▉      | 3000/7737 [17:39<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.003704


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.003803


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.003885


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.003942


 90%|█████████ | 7000/7737 [41:09<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.003983


100%|██████████| 7737/7737 [45:28<00:00,  2.84it/s]

epoch #7, train loss: 0.0040, train acc: 0.8866





epoch #7, val loss: 0.0187, val acc: 0.5578
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:52<39:37,  2.83it/s]

Batch 1000/7737	 Loss 0.001558


 26%|██▌       | 2000/7737 [11:45<33:42,  2.84it/s]

Batch 2000/7737	 Loss 0.001374


 39%|███▉      | 3000/7737 [17:37<27:49,  2.84it/s]

Batch 3000/7737	 Loss 0.001285


 52%|█████▏    | 4000/7737 [23:30<21:57,  2.84it/s]

Batch 4000/7737	 Loss 0.001228


 65%|██████▍   | 5000/7737 [29:23<16:05,  2.84it/s]

Batch 5000/7737	 Loss 0.001187


 78%|███████▊  | 6000/7737 [35:15<10:12,  2.84it/s]

Batch 6000/7737	 Loss 0.001160


 90%|█████████ | 7000/7737 [41:08<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.001139


100%|██████████| 7737/7737 [45:28<00:00,  2.84it/s]

epoch #8, train loss: 0.0011, train acc: 0.9750





epoch #8, val loss: 0.0044, val acc: 0.8986
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:54<39:45,  2.82it/s]

Batch 1000/7737	 Loss 0.000677


 26%|██▌       | 2000/7737 [11:47<33:50,  2.83it/s]

Batch 2000/7737	 Loss 0.000682


 39%|███▉      | 3000/7737 [17:41<27:56,  2.83it/s]

Batch 3000/7737	 Loss 0.000696


 52%|█████▏    | 4000/7737 [23:35<22:02,  2.83it/s]

Batch 4000/7737	 Loss 0.000712


 65%|██████▍   | 5000/7737 [29:29<16:08,  2.83it/s]

Batch 5000/7737	 Loss 0.000725


 78%|███████▊  | 6000/7737 [35:23<10:14,  2.83it/s]

Batch 6000/7737	 Loss 0.000734


 90%|█████████ | 7000/7737 [41:17<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000745


100%|██████████| 7737/7737 [45:37<00:00,  2.83it/s]

epoch #9, train loss: 0.0008, train acc: 0.9875





epoch #9, val loss: 0.0044, val acc: 0.9021
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:54<39:45,  2.82it/s]

Batch 1000/7737	 Loss 0.000563


 26%|██▌       | 2000/7737 [11:47<33:49,  2.83it/s]

Batch 2000/7737	 Loss 0.000579


 39%|███▉      | 3000/7737 [17:42<27:57,  2.82it/s]

Batch 3000/7737	 Loss 0.000592


 52%|█████▏    | 4000/7737 [23:35<22:02,  2.83it/s]

Batch 4000/7737	 Loss 0.000605


 65%|██████▍   | 5000/7737 [29:29<16:08,  2.83it/s]

Batch 5000/7737	 Loss 0.000620


 78%|███████▊  | 6000/7737 [35:22<10:14,  2.83it/s]

Batch 6000/7737	 Loss 0.000634


 90%|█████████ | 7000/7737 [41:16<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000649


100%|██████████| 7737/7737 [45:36<00:00,  2.83it/s]

epoch #10, train loss: 0.0007, train acc: 0.9908





epoch #10, val loss: 0.0046, val acc: 0.8960
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:54<39:46,  2.82it/s]

Batch 1000/7737	 Loss 0.000535


 26%|██▌       | 2000/7737 [11:49<33:56,  2.82it/s]

Batch 2000/7737	 Loss 0.000536


 39%|███▉      | 3000/7737 [17:44<28:00,  2.82it/s]

Batch 3000/7737	 Loss 0.000551


 52%|█████▏    | 4000/7737 [23:41<22:07,  2.81it/s]

Batch 4000/7737	 Loss 0.000569


 65%|██████▍   | 5000/7737 [29:39<16:14,  2.81it/s]

Batch 5000/7737	 Loss 0.000587


 78%|███████▊  | 6000/7737 [35:33<10:17,  2.81it/s]

Batch 6000/7737	 Loss 0.000608


 90%|█████████ | 7000/7737 [41:27<04:21,  2.81it/s]

Batch 7000/7737	 Loss 0.000625


100%|██████████| 7737/7737 [45:48<00:00,  2.82it/s]

epoch #11, train loss: 0.0006, train acc: 0.9918





epoch #11, val loss: 0.0047, val acc: 0.8961
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:55<39:56,  2.81it/s]

Batch 1000/7737	 Loss 0.000506


 26%|██▌       | 2000/7737 [11:50<33:57,  2.82it/s]

Batch 2000/7737	 Loss 0.000516


 39%|███▉      | 3000/7737 [17:45<28:02,  2.81it/s]

Batch 3000/7737	 Loss 0.000536


 52%|█████▏    | 4000/7737 [23:39<22:06,  2.82it/s]

Batch 4000/7737	 Loss 0.000555


 65%|██████▍   | 5000/7737 [29:34<16:11,  2.82it/s]

Batch 5000/7737	 Loss 0.000577


 78%|███████▊  | 6000/7737 [35:28<10:16,  2.82it/s]

Batch 6000/7737	 Loss 0.000602


 90%|█████████ | 7000/7737 [41:21<04:21,  2.82it/s]

Batch 7000/7737	 Loss 0.000632


100%|██████████| 7737/7737 [45:42<00:00,  2.82it/s]

epoch #12, train loss: 0.0007, train acc: 0.9918





epoch #12, val loss: 0.0049, val acc: 0.8919
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:52<39:34,  2.84it/s]

Batch 1000/7737	 Loss 0.000527


 26%|██▌       | 2000/7737 [11:45<33:42,  2.84it/s]

Batch 2000/7737	 Loss 0.000541


 39%|███▉      | 3000/7737 [17:38<27:50,  2.84it/s]

Batch 3000/7737	 Loss 0.000563


 52%|█████▏    | 4000/7737 [23:31<21:58,  2.83it/s]

Batch 4000/7737	 Loss 0.000592


 65%|██████▍   | 5000/7737 [29:24<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000626


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000659


 90%|█████████ | 7000/7737 [41:12<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000691


100%|██████████| 7737/7737 [45:33<00:00,  2.83it/s]

epoch #13, train loss: 0.0007, train acc: 0.9903





epoch #13, val loss: 0.0059, val acc: 0.8642
------------------------------
curr_lr: 0.020000000000000004


 13%|█▎        | 1000/7737 [05:52<39:36,  2.84it/s]

Batch 1000/7737	 Loss 0.000602


 26%|██▌       | 2000/7737 [11:45<33:44,  2.83it/s]

Batch 2000/7737	 Loss 0.000597


 39%|███▉      | 3000/7737 [17:38<27:51,  2.83it/s]

Batch 3000/7737	 Loss 0.000626


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.000654


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000694


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000736


 90%|█████████ | 7000/7737 [41:11<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000771


100%|██████████| 7737/7737 [45:31<00:00,  2.83it/s]

epoch #14, train loss: 0.0008, train acc: 0.9879





epoch #14, val loss: 0.0068, val acc: 0.8325
------------------------------
curr_lr: 0.004000000000000001


 13%|█▎        | 1000/7737 [05:52<39:36,  2.83it/s]

Batch 1000/7737	 Loss 0.000457


 26%|██▌       | 2000/7737 [11:44<33:41,  2.84it/s]

Batch 2000/7737	 Loss 0.000396


 39%|███▉      | 3000/7737 [17:37<27:49,  2.84it/s]

Batch 3000/7737	 Loss 0.000371


 52%|█████▏    | 4000/7737 [23:30<21:57,  2.84it/s]

Batch 4000/7737	 Loss 0.000352


 65%|██████▍   | 5000/7737 [29:22<16:04,  2.84it/s]

Batch 5000/7737	 Loss 0.000340


 78%|███████▊  | 6000/7737 [35:14<10:12,  2.84it/s]

Batch 6000/7737	 Loss 0.000332


 90%|█████████ | 7000/7737 [41:06<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000327


100%|██████████| 7737/7737 [45:25<00:00,  2.84it/s]

epoch #15, train loss: 0.0003, train acc: 0.9962





epoch #15, val loss: 0.0037, val acc: 0.9223
------------------------------
curr_lr: 0.004000000000000001


 13%|█▎        | 1000/7737 [05:52<39:35,  2.84it/s]

Batch 1000/7737	 Loss 0.000231


 26%|██▌       | 2000/7737 [11:44<33:42,  2.84it/s]

Batch 2000/7737	 Loss 0.000237


 39%|███▉      | 3000/7737 [17:37<27:49,  2.84it/s]

Batch 3000/7737	 Loss 0.000237


 52%|█████▏    | 4000/7737 [23:29<21:56,  2.84it/s]

Batch 4000/7737	 Loss 0.000240


 65%|██████▍   | 5000/7737 [29:21<16:04,  2.84it/s]

Batch 5000/7737	 Loss 0.000243


 78%|███████▊  | 6000/7737 [35:13<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000245


 90%|█████████ | 7000/7737 [41:05<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000248


100%|██████████| 7737/7737 [45:25<00:00,  2.84it/s]

epoch #16, train loss: 0.0002, train acc: 0.9977





epoch #16, val loss: 0.0036, val acc: 0.9244
------------------------------
curr_lr: 0.004000000000000001


 13%|█▎        | 1000/7737 [05:51<39:30,  2.84it/s]

Batch 1000/7737	 Loss 0.000231


 26%|██▌       | 2000/7737 [11:43<33:38,  2.84it/s]

Batch 2000/7737	 Loss 0.000231


 39%|███▉      | 3000/7737 [17:35<27:47,  2.84it/s]

Batch 3000/7737	 Loss 0.000234


 52%|█████▏    | 4000/7737 [23:28<21:55,  2.84it/s]

Batch 4000/7737	 Loss 0.000238


 65%|██████▍   | 5000/7737 [29:19<16:03,  2.84it/s]

Batch 5000/7737	 Loss 0.000240


 78%|███████▊  | 6000/7737 [35:12<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000242


 90%|█████████ | 7000/7737 [41:04<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000245


100%|██████████| 7737/7737 [45:23<00:00,  2.84it/s]

epoch #17, train loss: 0.0002, train acc: 0.9980





epoch #17, val loss: 0.0037, val acc: 0.9248
------------------------------
curr_lr: 0.004000000000000001


 13%|█▎        | 1000/7737 [05:54<39:46,  2.82it/s]

Batch 1000/7737	 Loss 0.000238


 26%|██▌       | 2000/7737 [11:46<33:46,  2.83it/s]

Batch 2000/7737	 Loss 0.000240


 39%|███▉      | 3000/7737 [17:38<27:51,  2.83it/s]

Batch 3000/7737	 Loss 0.000242


 52%|█████▏    | 4000/7737 [23:31<21:58,  2.83it/s]

Batch 4000/7737	 Loss 0.000244


 65%|██████▍   | 5000/7737 [29:24<16:05,  2.83it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000251


 90%|█████████ | 7000/7737 [41:12<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000253


100%|██████████| 7737/7737 [45:34<00:00,  2.83it/s]

epoch #18, train loss: 0.0003, train acc: 0.9982





epoch #18, val loss: 0.0038, val acc: 0.9250
------------------------------
curr_lr: 0.004000000000000001


 13%|█▎        | 1000/7737 [05:54<39:50,  2.82it/s]

Batch 1000/7737	 Loss 0.000242


 26%|██▌       | 2000/7737 [11:47<33:49,  2.83it/s]

Batch 2000/7737	 Loss 0.000247


 39%|███▉      | 3000/7737 [17:40<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.000250


 52%|█████▏    | 4000/7737 [23:33<22:00,  2.83it/s]

Batch 4000/7737	 Loss 0.000252


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000255


 78%|███████▊  | 6000/7737 [35:21<10:14,  2.83it/s]

Batch 6000/7737	 Loss 0.000259


 90%|█████████ | 7000/7737 [41:14<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000261


100%|██████████| 7737/7737 [45:38<00:00,  2.83it/s]

epoch #19, train loss: 0.0003, train acc: 0.9983





epoch #19, val loss: 0.0037, val acc: 0.9257
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:54<39:47,  2.82it/s]

Batch 1000/7737	 Loss 0.000243


 26%|██▌       | 2000/7737 [11:46<33:46,  2.83it/s]

Batch 2000/7737	 Loss 0.000239


 39%|███▉      | 3000/7737 [17:38<27:51,  2.83it/s]

Batch 3000/7737	 Loss 0.000238


 52%|█████▏    | 4000/7737 [23:30<21:57,  2.84it/s]

Batch 4000/7737	 Loss 0.000239


 65%|██████▍   | 5000/7737 [29:22<16:04,  2.84it/s]

Batch 5000/7737	 Loss 0.000238


 78%|███████▊  | 6000/7737 [35:14<10:12,  2.84it/s]

Batch 6000/7737	 Loss 0.000238


 90%|█████████ | 7000/7737 [41:06<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000238


100%|██████████| 7737/7737 [45:25<00:00,  2.84it/s]

epoch #20, train loss: 0.0002, train acc: 0.9987





epoch #20, val loss: 0.0037, val acc: 0.9277
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:32,  2.84it/s]

Batch 1000/7737	 Loss 0.000229


 26%|██▌       | 2000/7737 [11:44<33:40,  2.84it/s]

Batch 2000/7737	 Loss 0.000230


 39%|███▉      | 3000/7737 [17:36<27:47,  2.84it/s]

Batch 3000/7737	 Loss 0.000232


 52%|█████▏    | 4000/7737 [23:28<21:55,  2.84it/s]

Batch 4000/7737	 Loss 0.000233


 65%|██████▍   | 5000/7737 [29:20<16:03,  2.84it/s]

Batch 5000/7737	 Loss 0.000234


 78%|███████▊  | 6000/7737 [35:12<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000235


 90%|█████████ | 7000/7737 [41:04<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000236


100%|██████████| 7737/7737 [45:23<00:00,  2.84it/s]

epoch #21, train loss: 0.0002, train acc: 0.9987





epoch #21, val loss: 0.0037, val acc: 0.9272
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:31,  2.84it/s]

Batch 1000/7737	 Loss 0.000236


 26%|██▌       | 2000/7737 [11:43<33:38,  2.84it/s]

Batch 2000/7737	 Loss 0.000236


 39%|███▉      | 3000/7737 [17:35<27:46,  2.84it/s]

Batch 3000/7737	 Loss 0.000235


 52%|█████▏    | 4000/7737 [23:27<21:55,  2.84it/s]

Batch 4000/7737	 Loss 0.000235


 65%|██████▍   | 5000/7737 [29:19<16:03,  2.84it/s]

Batch 5000/7737	 Loss 0.000236


 78%|███████▊  | 6000/7737 [35:11<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000236


 90%|█████████ | 7000/7737 [41:03<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000237


100%|██████████| 7737/7737 [45:21<00:00,  2.84it/s]

epoch #22, train loss: 0.0002, train acc: 0.9987





epoch #22, val loss: 0.0037, val acc: 0.9272
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:31,  2.84it/s]

Batch 1000/7737	 Loss 0.000238


 26%|██▌       | 2000/7737 [11:43<33:38,  2.84it/s]

Batch 2000/7737	 Loss 0.000236


 39%|███▉      | 3000/7737 [17:35<27:46,  2.84it/s]

Batch 3000/7737	 Loss 0.000238


 52%|█████▏    | 4000/7737 [23:27<21:54,  2.84it/s]

Batch 4000/7737	 Loss 0.000238


 65%|██████▍   | 5000/7737 [29:18<16:02,  2.84it/s]

Batch 5000/7737	 Loss 0.000238


 78%|███████▊  | 6000/7737 [35:10<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000239


 90%|█████████ | 7000/7737 [41:02<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000238


100%|██████████| 7737/7737 [45:21<00:00,  2.84it/s]

epoch #23, train loss: 0.0002, train acc: 0.9987





epoch #23, val loss: 0.0037, val acc: 0.9281
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:36,  2.84it/s]

Batch 1000/7737	 Loss 0.000232


 26%|██▌       | 2000/7737 [11:45<33:45,  2.83it/s]

Batch 2000/7737	 Loss 0.000235


 39%|███▉      | 3000/7737 [17:39<27:52,  2.83it/s]

Batch 3000/7737	 Loss 0.000236


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.000237


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000238


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000239


 90%|█████████ | 7000/7737 [41:11<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000239


100%|██████████| 7737/7737 [45:31<00:00,  2.83it/s]

epoch #24, train loss: 0.0002, train acc: 0.9987





epoch #24, val loss: 0.0037, val acc: 0.9276
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:53<39:40,  2.83it/s]

Batch 1000/7737	 Loss 0.000236


 26%|██▌       | 2000/7737 [11:46<33:47,  2.83it/s]

Batch 2000/7737	 Loss 0.000238


 39%|███▉      | 3000/7737 [17:40<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.000238


 52%|█████▏    | 4000/7737 [23:33<22:00,  2.83it/s]

Batch 4000/7737	 Loss 0.000239


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000239


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000240


 90%|█████████ | 7000/7737 [41:09<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000241


100%|██████████| 7737/7737 [45:28<00:00,  2.84it/s]

epoch #25, train loss: 0.0002, train acc: 0.9987





epoch #25, val loss: 0.0037, val acc: 0.9282
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:36,  2.83it/s]

Batch 1000/7737	 Loss 0.000236


 26%|██▌       | 2000/7737 [11:44<33:41,  2.84it/s]

Batch 2000/7737	 Loss 0.000238


 39%|███▉      | 3000/7737 [17:36<27:48,  2.84it/s]

Batch 3000/7737	 Loss 0.000238


 52%|█████▏    | 4000/7737 [23:28<21:56,  2.84it/s]

Batch 4000/7737	 Loss 0.000239


 65%|██████▍   | 5000/7737 [29:20<16:03,  2.84it/s]

Batch 5000/7737	 Loss 0.000239


 78%|███████▊  | 6000/7737 [35:11<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000241


 90%|█████████ | 7000/7737 [41:03<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000242


100%|██████████| 7737/7737 [45:21<00:00,  2.84it/s]

epoch #26, train loss: 0.0002, train acc: 0.9987





epoch #26, val loss: 0.0038, val acc: 0.9273
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:28,  2.84it/s]

Batch 1000/7737	 Loss 0.000239


 26%|██▌       | 2000/7737 [11:42<33:36,  2.85it/s]

Batch 2000/7737	 Loss 0.000240


 39%|███▉      | 3000/7737 [17:34<27:44,  2.85it/s]

Batch 3000/7737	 Loss 0.000241


 52%|█████▏    | 4000/7737 [23:25<21:53,  2.85it/s]

Batch 4000/7737	 Loss 0.000241


 65%|██████▍   | 5000/7737 [29:17<16:01,  2.85it/s]

Batch 5000/7737	 Loss 0.000242


 78%|███████▊  | 6000/7737 [35:08<10:10,  2.85it/s]

Batch 6000/7737	 Loss 0.000243


 90%|█████████ | 7000/7737 [41:00<04:19,  2.85it/s]

Batch 7000/7737	 Loss 0.000243


100%|██████████| 7737/7737 [45:19<00:00,  2.85it/s]

epoch #27, train loss: 0.0002, train acc: 0.9988





epoch #27, val loss: 0.0038, val acc: 0.9274
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:26,  2.85it/s]

Batch 1000/7737	 Loss 0.000237


 26%|██▌       | 2000/7737 [11:42<33:35,  2.85it/s]

Batch 2000/7737	 Loss 0.000240


 39%|███▉      | 3000/7737 [17:33<27:44,  2.85it/s]

Batch 3000/7737	 Loss 0.000239


 52%|█████▏    | 4000/7737 [23:25<21:52,  2.85it/s]

Batch 4000/7737	 Loss 0.000241


 65%|██████▍   | 5000/7737 [29:16<16:01,  2.85it/s]

Batch 5000/7737	 Loss 0.000242


 78%|███████▊  | 6000/7737 [35:08<10:10,  2.85it/s]

Batch 6000/7737	 Loss 0.000243


 90%|█████████ | 7000/7737 [40:59<04:18,  2.85it/s]

Batch 7000/7737	 Loss 0.000244


100%|██████████| 7737/7737 [45:18<00:00,  2.85it/s]

epoch #28, train loss: 0.0002, train acc: 0.9988





epoch #28, val loss: 0.0038, val acc: 0.9278
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:31,  2.84it/s]

Batch 1000/7737	 Loss 0.000245


 26%|██▌       | 2000/7737 [11:43<33:39,  2.84it/s]

Batch 2000/7737	 Loss 0.000244


 39%|███▉      | 3000/7737 [17:35<27:47,  2.84it/s]

Batch 3000/7737	 Loss 0.000243


 52%|█████▏    | 4000/7737 [23:27<21:54,  2.84it/s]

Batch 4000/7737	 Loss 0.000244


 65%|██████▍   | 5000/7737 [29:19<16:02,  2.84it/s]

Batch 5000/7737	 Loss 0.000244


 78%|███████▊  | 6000/7737 [35:10<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000245


 90%|█████████ | 7000/7737 [41:02<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000245


100%|██████████| 7737/7737 [45:21<00:00,  2.84it/s]

epoch #29, train loss: 0.0002, train acc: 0.9988





epoch #29, val loss: 0.0038, val acc: 0.9284
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:26,  2.85it/s]

Batch 1000/7737	 Loss 0.000242


 26%|██▌       | 2000/7737 [11:42<33:35,  2.85it/s]

Batch 2000/7737	 Loss 0.000242


 39%|███▉      | 3000/7737 [17:33<27:43,  2.85it/s]

Batch 3000/7737	 Loss 0.000243


 52%|█████▏    | 4000/7737 [23:24<21:52,  2.85it/s]

Batch 4000/7737	 Loss 0.000244


 65%|██████▍   | 5000/7737 [29:16<16:01,  2.85it/s]

Batch 5000/7737	 Loss 0.000245


 78%|███████▊  | 6000/7737 [35:07<10:10,  2.85it/s]

Batch 6000/7737	 Loss 0.000245


 90%|█████████ | 7000/7737 [40:58<04:18,  2.85it/s]

Batch 7000/7737	 Loss 0.000246


100%|██████████| 7737/7737 [45:17<00:00,  2.85it/s]

epoch #30, train loss: 0.0002, train acc: 0.9988





epoch #30, val loss: 0.0038, val acc: 0.9282
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:27,  2.85it/s]

Batch 1000/7737	 Loss 0.000240


 26%|██▌       | 2000/7737 [11:42<33:34,  2.85it/s]

Batch 2000/7737	 Loss 0.000243


 39%|███▉      | 3000/7737 [17:33<27:43,  2.85it/s]

Batch 3000/7737	 Loss 0.000244


 52%|█████▏    | 4000/7737 [23:24<21:52,  2.85it/s]

Batch 4000/7737	 Loss 0.000245


 65%|██████▍   | 5000/7737 [29:15<16:00,  2.85it/s]

Batch 5000/7737	 Loss 0.000246


 78%|███████▊  | 6000/7737 [35:06<10:09,  2.85it/s]

Batch 6000/7737	 Loss 0.000246


 90%|█████████ | 7000/7737 [40:58<04:18,  2.85it/s]

Batch 7000/7737	 Loss 0.000247


100%|██████████| 7737/7737 [45:16<00:00,  2.85it/s]

epoch #31, train loss: 0.0002, train acc: 0.9988





epoch #31, val loss: 0.0038, val acc: 0.9278
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:27,  2.85it/s]

Batch 1000/7737	 Loss 0.000243


 26%|██▌       | 2000/7737 [11:42<33:35,  2.85it/s]

Batch 2000/7737	 Loss 0.000244


 39%|███▉      | 3000/7737 [17:33<27:43,  2.85it/s]

Batch 3000/7737	 Loss 0.000244


 52%|█████▏    | 4000/7737 [23:24<21:52,  2.85it/s]

Batch 4000/7737	 Loss 0.000245


 65%|██████▍   | 5000/7737 [29:15<16:01,  2.85it/s]

Batch 5000/7737	 Loss 0.000246


 78%|███████▊  | 6000/7737 [35:07<10:10,  2.85it/s]

Batch 6000/7737	 Loss 0.000247


 90%|█████████ | 7000/7737 [40:59<04:18,  2.85it/s]

Batch 7000/7737	 Loss 0.000248


100%|██████████| 7737/7737 [45:17<00:00,  2.85it/s]

epoch #32, train loss: 0.0002, train acc: 0.9988





epoch #32, val loss: 0.0038, val acc: 0.9278
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:30,  2.84it/s]

Batch 1000/7737	 Loss 0.000245


 26%|██▌       | 2000/7737 [11:43<33:37,  2.84it/s]

Batch 2000/7737	 Loss 0.000244


 39%|███▉      | 3000/7737 [17:34<27:45,  2.84it/s]

Batch 3000/7737	 Loss 0.000245


 52%|█████▏    | 4000/7737 [23:26<21:54,  2.84it/s]

Batch 4000/7737	 Loss 0.000246


 65%|██████▍   | 5000/7737 [29:18<16:02,  2.84it/s]

Batch 5000/7737	 Loss 0.000246


 78%|███████▊  | 6000/7737 [35:09<10:10,  2.84it/s]

Batch 6000/7737	 Loss 0.000247


 90%|█████████ | 7000/7737 [41:01<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000248


100%|██████████| 7737/7737 [45:20<00:00,  2.84it/s]

epoch #33, train loss: 0.0002, train acc: 0.9987





epoch #33, val loss: 0.0038, val acc: 0.9281
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:27,  2.85it/s]

Batch 1000/7737	 Loss 0.000244


 26%|██▌       | 2000/7737 [11:42<33:34,  2.85it/s]

Batch 2000/7737	 Loss 0.000244


 39%|███▉      | 3000/7737 [17:33<27:43,  2.85it/s]

Batch 3000/7737	 Loss 0.000245


 52%|█████▏    | 4000/7737 [23:25<21:52,  2.85it/s]

Batch 4000/7737	 Loss 0.000246


 65%|██████▍   | 5000/7737 [29:16<16:01,  2.85it/s]

Batch 5000/7737	 Loss 0.000247


 78%|███████▊  | 6000/7737 [35:07<10:10,  2.85it/s]

Batch 6000/7737	 Loss 0.000248


 90%|█████████ | 7000/7737 [40:58<04:18,  2.85it/s]

Batch 7000/7737	 Loss 0.000248


100%|██████████| 7737/7737 [45:17<00:00,  2.85it/s]

epoch #34, train loss: 0.0002, train acc: 0.9988





epoch #34, val loss: 0.0038, val acc: 0.9277
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:30,  2.84it/s]

Batch 1000/7737	 Loss 0.000243


 26%|██▌       | 2000/7737 [11:43<33:38,  2.84it/s]

Batch 2000/7737	 Loss 0.000245


 39%|███▉      | 3000/7737 [17:35<27:45,  2.84it/s]

Batch 3000/7737	 Loss 0.000245


 52%|█████▏    | 4000/7737 [23:26<21:54,  2.84it/s]

Batch 4000/7737	 Loss 0.000247


 65%|██████▍   | 5000/7737 [29:18<16:02,  2.84it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:10<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000248


 90%|█████████ | 7000/7737 [41:02<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:21<00:00,  2.84it/s]

epoch #35, train loss: 0.0002, train acc: 0.9988





epoch #35, val loss: 0.0038, val acc: 0.9285
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:37,  2.83it/s]

Batch 1000/7737	 Loss 0.000247


 26%|██▌       | 2000/7737 [11:45<33:45,  2.83it/s]

Batch 2000/7737	 Loss 0.000245


 39%|███▉      | 3000/7737 [17:38<27:52,  2.83it/s]

Batch 3000/7737	 Loss 0.000247


 52%|█████▏    | 4000/7737 [23:31<21:58,  2.83it/s]

Batch 4000/7737	 Loss 0.000248


 65%|██████▍   | 5000/7737 [29:24<16:05,  2.83it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000248


 90%|█████████ | 7000/7737 [41:10<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:30<00:00,  2.83it/s]

epoch #36, train loss: 0.0002, train acc: 0.9988





epoch #36, val loss: 0.0038, val acc: 0.9284
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:53<39:38,  2.83it/s]

Batch 1000/7737	 Loss 0.000243


 26%|██▌       | 2000/7737 [11:45<33:43,  2.83it/s]

Batch 2000/7737	 Loss 0.000245


 39%|███▉      | 3000/7737 [17:37<27:50,  2.84it/s]

Batch 3000/7737	 Loss 0.000247


 52%|█████▏    | 4000/7737 [23:30<21:57,  2.84it/s]

Batch 4000/7737	 Loss 0.000248


 65%|██████▍   | 5000/7737 [29:22<16:05,  2.84it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:15<10:12,  2.84it/s]

Batch 6000/7737	 Loss 0.000249


 90%|█████████ | 7000/7737 [41:07<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000250


100%|██████████| 7737/7737 [45:27<00:00,  2.84it/s]

epoch #37, train loss: 0.0003, train acc: 0.9988





epoch #37, val loss: 0.0038, val acc: 0.9279
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:32,  2.84it/s]

Batch 1000/7737	 Loss 0.000245


 26%|██▌       | 2000/7737 [11:44<33:41,  2.84it/s]

Batch 2000/7737	 Loss 0.000246


 39%|███▉      | 3000/7737 [17:37<27:49,  2.84it/s]

Batch 3000/7737	 Loss 0.000247


 52%|█████▏    | 4000/7737 [23:29<21:56,  2.84it/s]

Batch 4000/7737	 Loss 0.000248


 65%|██████▍   | 5000/7737 [29:21<16:04,  2.84it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:14<10:12,  2.84it/s]

Batch 6000/7737	 Loss 0.000249


 90%|█████████ | 7000/7737 [41:06<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:27<00:00,  2.84it/s]

epoch #38, train loss: 0.0003, train acc: 0.9988





epoch #38, val loss: 0.0038, val acc: 0.9283
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:37,  2.83it/s]

Batch 1000/7737	 Loss 0.000244


 26%|██▌       | 2000/7737 [11:46<33:47,  2.83it/s]

Batch 2000/7737	 Loss 0.000245


 39%|███▉      | 3000/7737 [17:39<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.000248


 52%|█████▏    | 4000/7737 [23:32<22:00,  2.83it/s]

Batch 4000/7737	 Loss 0.000247


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000249


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000249


 90%|█████████ | 7000/7737 [41:09<04:19,  2.83it/s]

Batch 7000/7737	 Loss 0.000250


100%|██████████| 7737/7737 [45:28<00:00,  2.84it/s]

epoch #39, train loss: 0.0003, train acc: 0.9988





epoch #39, val loss: 0.0038, val acc: 0.9288
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:33,  2.84it/s]

Batch 1000/7737	 Loss 0.000247


 26%|██▌       | 2000/7737 [11:44<33:41,  2.84it/s]

Batch 2000/7737	 Loss 0.000246


 39%|███▉      | 3000/7737 [17:37<27:49,  2.84it/s]

Batch 3000/7737	 Loss 0.000247


 52%|█████▏    | 4000/7737 [23:29<21:56,  2.84it/s]

Batch 4000/7737	 Loss 0.000247


 65%|██████▍   | 5000/7737 [29:21<16:04,  2.84it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:13<10:11,  2.84it/s]

Batch 6000/7737	 Loss 0.000248


 90%|█████████ | 7000/7737 [41:06<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:25<00:00,  2.84it/s]

epoch #40, train loss: 0.0003, train acc: 0.9988





epoch #40, val loss: 0.0038, val acc: 0.9276
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:52<39:37,  2.83it/s]

Batch 1000/7737	 Loss 0.000246


 26%|██▌       | 2000/7737 [11:45<33:43,  2.84it/s]

Batch 2000/7737	 Loss 0.000247


 39%|███▉      | 3000/7737 [17:37<27:50,  2.84it/s]

Batch 3000/7737	 Loss 0.000248


 52%|█████▏    | 4000/7737 [23:30<21:57,  2.84it/s]

Batch 4000/7737	 Loss 0.000248


 65%|██████▍   | 5000/7737 [29:24<16:05,  2.83it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:17<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000249


 90%|█████████ | 7000/7737 [41:09<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:29<00:00,  2.83it/s]

epoch #41, train loss: 0.0003, train acc: 0.9988





epoch #41, val loss: 0.0038, val acc: 0.9279
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:56<40:00,  2.81it/s]

Batch 1000/7737	 Loss 0.000243


 26%|██▌       | 2000/7737 [11:47<33:49,  2.83it/s]

Batch 2000/7737	 Loss 0.000244


 39%|███▉      | 3000/7737 [17:39<27:53,  2.83it/s]

Batch 3000/7737	 Loss 0.000246


 52%|█████▏    | 4000/7737 [23:32<21:59,  2.83it/s]

Batch 4000/7737	 Loss 0.000246


 65%|██████▍   | 5000/7737 [29:25<16:06,  2.83it/s]

Batch 5000/7737	 Loss 0.000247


 78%|███████▊  | 6000/7737 [35:18<10:13,  2.83it/s]

Batch 6000/7737	 Loss 0.000248


 90%|█████████ | 7000/7737 [41:14<04:20,  2.83it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:33<00:00,  2.83it/s]

epoch #42, train loss: 0.0002, train acc: 0.9989





epoch #42, val loss: 0.0038, val acc: 0.9281
------------------------------
curr_lr: 0.0008000000000000003


 13%|█▎        | 1000/7737 [05:51<39:31,  2.84it/s]

Batch 1000/7737	 Loss 0.000244


 26%|██▌       | 2000/7737 [11:43<33:39,  2.84it/s]

Batch 2000/7737	 Loss 0.000246


 39%|███▉      | 3000/7737 [17:35<27:46,  2.84it/s]

Batch 3000/7737	 Loss 0.000246


 52%|█████▏    | 4000/7737 [23:27<21:54,  2.84it/s]

Batch 4000/7737	 Loss 0.000247


 65%|██████▍   | 5000/7737 [29:18<16:02,  2.84it/s]

Batch 5000/7737	 Loss 0.000248


 78%|███████▊  | 6000/7737 [35:10<10:10,  2.84it/s]

Batch 6000/7737	 Loss 0.000249


 90%|█████████ | 7000/7737 [41:01<04:19,  2.84it/s]

Batch 7000/7737	 Loss 0.000249


100%|██████████| 7737/7737 [45:22<00:00,  2.84it/s]

epoch #43, train loss: 0.0002, train acc: 0.9988





epoch #43, val loss: 0.0038, val acc: 0.9285
------------------------------
curr_lr: 0.0008000000000000003


 10%|█         | 811/7737 [04:46<40:46,  2.83it/s]Process Process-360:
Process Process-359:
Process Process-357:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 96, in _worker_loop
    r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process Process-358:
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/queues.py", lin

KeyboardInterrupt: 

  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 96, in _worker_loop
    r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 414, in _poll
    r = wait([self], timeout)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/queues.py", line 104, in get
    if not self._poll(timeout):
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 911, in wait
    ready = selector.select(timeout)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/connection.py", line 257, in poll
    return self._poll(timeout)
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/pytorch-py3.6/lib/python3.6/selectors.py", line 376, in select
    fd_event_list = self._poll.poll(t

In [221]:
# See, Fr features
fr_feats = []
model.eval()
total = 0
for batch_idx, (X, y) in enumerate(dev_val_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()

    fr_feat = model.fr_feat(X).cpu().detach()
    fr_feats.append(fr_feat)
    break
    

In [222]:
from torch.nn.functional import cosine_similarity as cosine
cosine(fr_feats[0][0,:,0:1], fr_feats[0][0,:,1:], dim=0)

tensor([ 0.9731,  0.9194,  0.8460,  0.7652,  0.6475,  0.5775,  0.5488,  0.5367,
         0.5829,  0.5654,  0.4195,  0.2916,  0.2122,  0.1197,  0.0676,  0.0322,
        -0.0061, -0.0410, -0.0531, -0.0531, -0.0590, -0.0614, -0.0328, -0.0150,
        -0.0346, -0.0619, -0.0721, -0.0648, -0.0658, -0.0635, -0.0456, -0.0339,
        -0.0354, -0.0256, -0.0266, -0.0331, -0.0207,  0.0241,  0.0486,  0.0436,
         0.0121, -0.0017, -0.0279, -0.0312, -0.0458, -0.0290, -0.0084,  0.0147,
         0.0084,  0.0343,  0.0646,  0.0471, -0.0223, -0.0798,  0.0046,  0.2005,
         0.3268,  0.3368,  0.2863,  0.2927,  0.3569,  0.3999,  0.4128,  0.3561,
         0.1756, -0.0672, -0.1856, -0.2118, -0.2193, -0.2026, -0.1163,  0.0897,
         0.2522,  0.3210,  0.3290,  0.2939,  0.2097,  0.0964,  0.0285,  0.0154,
         0.0167,  0.0372,  0.0647,  0.0906,  0.1198,  0.1151,  0.0794,  0.0454,
         0.0297,  0.0120, -0.0102, -0.0400, -0.0716, -0.1135, -0.1244, -0.1197,
        -0.1167, -0.0907,  0.0283,  0.23

In [223]:
fr_feats[0].shape

torch.Size([64, 1500, 396])

### Untrained Classes

In [123]:
model.eval()
loss_sum = 0
n_corrects = 0
total = 0
predicts = []
labels = []
for batch_idx, (seq_len, X, y) in enumerate(dev_val_dataloader):
    if not config['no_cuda']:
        X = X.cuda()
        y = y.cuda()

    batch_logits = []
    for i in range(len(X)):
        x_in = X[i:i+1,:,:seq_len[i]]
        out_ = model(x_in)
        batch_logits.append(out_)
    logit = torch.cat(batch_logits, dim=0)
    loss = criterion(logit, y)
    loss_sum += loss.item()
    predicts.append(logit.max(1)[1])
    labels.append(y)
    n_corrects += logit.max(1)[1].eq(y).sum().item()
    total += y.size(0)
val_loss = loss_sum / total
val_acc = n_corrects / total

print("epoch #{}, val loss: {:.4f}, val acc: {:.4f}".format(epoch_idx, val_loss, val_acc))
predicts = torch.cat(predicts).cpu()
labels = torch.cat(labels).cpu()

epoch #12, val loss: 0.0084, val acc: 0.7485


In [129]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

conf_mat = confusion_matrix(labels, predicts)
is_correct = predicts.eq(labels).numpy()
predicts = predicts.numpy()
labels = labels.numpy()

pred_result = pd.DataFrame([predicts, labels, is_correct]).T
pred_result.columns = ['pred', 'label', 'is_correct']
pred_class_acc = pred_result.groupby('label').is_correct.mean()

sns.distplot(pred_class_acc, norm_hist=True)

In [114]:
# the labels where acc is 0
dev_train_df[dev_train_df.label.isin(pred_class_acc.sort_values()[:6].index)].label.value_counts()

777    67
202    44
451    41
854    37
791    32
128    30
Name: label, dtype: int64

In [97]:
untrained_labels = pred_class_acc.sort_values()[:200].index

In [101]:
untrained_dev_train_df = dev_train_df[dev_train_df.label.isin(untrained_labels)]
untrained_dev_train_dataset = FeatDataset.read_df(config, untrained_dev_train_df, 'train')
untrained_dev_train_dataloader = init_default_loader(config, untrained_dev_train_dataset, shuffle=True)

### LDA on embedding

In [20]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

global_mean = si_embeds.mean(0)
clf = LDA(solver='svd', n_components=200)
clf.fit(si_embeds - global_mean, si_key_df.label)

si_embeds = clf.transform(si_embeds - global_mean).astype(np.float32)

sv_embeds = clf.transform(sv_embeds - global_mean).astype(np.float32)

si_dataset, embed_dim, n_labels = embedToDataset(si_embeds.reshape(-1,200), si_key_df)
sv_dataset, _, _ = embedToDataset(sv_embeds, sv_key_df)