In [17]:
# Code credits: Adapted bits and pieces from https://github.com/webdataset/webdataset/blob/master/docs/gettingstarted.ipynb

import sys
sys.path.append('..')

import gc
import json
import os
from itertools import islice
from datetime import datetime
import pytz
from pytz import timezone
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import matplotlib.pyplot as plt
import skimage.transform as st
import tqdm

import torch
import torch.optim as optim
from torchvision import transforms
import webdataset as wds

from model.selfattn_3d_cnn import *
from model.baseline_3d_cnn import *
from model.resattn_3d_cnn import *
from utils.model_utils import *
from utils.model_run import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
data_dir = '../data'
shards_dir = os.path.join(data_dir, 'shards_new')

# Opening JSON file
with open('../parameters.json') as json_file:
    parameters = json.load(json_file)

batch_size = parameters['batch_size']
shard_size = parameters['shard_size']
parameters

{'batch_size': 4, 'shard_size': 16}

In [3]:
urls = [os.path.join(shards_dir, it) for it in os.listdir(shards_dir) if it.endswith('.tar')]

# Try to overfit on smaller data
# urls = urls[:round(len(urls)*0.3)]

# Another shard directory, continued; realize can't use because keys will collide cuz we refreshed...
# shards_dir2 = os.path.join(data_dir, 'shards_new_cont')
# urls += [os.path.join(shards_dir2, it) for it in os.listdir(shards_dir2) if it.endswith('.tar')]


# All the data
total_num_shards = round(len(urls)*0.75)
train_urls = urls[:round(total_num_shards*0.7)]
val_urls = urls[round(total_num_shards*0.7):round(total_num_shards*0.85)]
test_urls = urls[round(total_num_shards*0.85):]

# Smaller data just to run model once
# train_urls = urls[:2]
# val_urls = urls[2:3]
# test_urls = urls[3:]


print("Number of train shards:", len(train_urls))
print("Number of validation shards:", len(val_urls))
print("Number of test shards:", len(test_urls))

Number of train shards: 50
Number of validation shards: 11
Number of test shards: 35


In [4]:
# Create dataset objects
train_iternum = len(train_urls)*shard_size//batch_size
val_iternum = len(val_urls)*shard_size//batch_size
test_iternum = len(test_urls)*shard_size//batch_size

print("Number of iterations per train epoch:", train_iternum)

train_dataset = (
    wds
    .WebDataset(train_urls, length=train_iternum)
    .shuffle(shard_size)
    .decode('torch')
    .to_tuple('volumes.pyd', 'labels.pyd', 'studynames.pyd')
    .batched(batch_size)
#     .map_tuple(pre_transforms, identity, identity)
)
loader_train = torch.utils.data.DataLoader(train_dataset, num_workers=0, batch_size=None) #setting batch_size = None disables batching

val_dataset = (
    wds
    .WebDataset(val_urls, length=val_iternum)
    .shuffle(shard_size)
    .decode('torch')
    .to_tuple('volumes.pyd', 'labels.pyd', 'studynames.pyd')
    .batched(batch_size)
)
loader_val = torch.utils.data.DataLoader(val_dataset, num_workers=0, batch_size=None)

test_dataset = (
    wds
    .WebDataset(test_urls, length=test_iternum)
    .shuffle(shard_size)
    .decode('torch')
    .to_tuple('volumes.pyd', 'labels.pyd', 'studynames.pyd')
    .batched(batch_size)
)
loader_test = torch.utils.data.DataLoader(test_dataset, num_workers=0, batch_size=None)

# for image, target in islice(dataset, 0, 2):
#     print(image.shape)

Number of iterations per train epoch: 200


In [5]:
gc.collect()

66

In [6]:
USE_GPU = True
dtype = torch.float

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
#     dtype = torch.cuda.FloatTensor
else:
    device = torch.device('cpu')

print(device)
print(dtype)

cuda
torch.float32


In [410]:
# Make log directory and checkpoint directory (DIFFERENT DIRECTORY FROM BASELINE)
dir_nm = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific')).strftime('%Y-%m-%d_%H-%M-%S')
# dir_nm = "first_mini_c2fc2"
log_dir = os.path.join('../runs/baseline', dir_nm) # running from this notebook since the other one gives cuda memory errors
# log_dir = os.path.join('../runs/experiment', dir_nm)
os.mkdir(log_dir)
os.mkdir(os.path.join(log_dir, 'Checkpoints'))


# Model, optimizer, criterion
model = baseline_3DCNN(in_num_ch=1)
# model = selfattn_3DCNN(in_num_ch=1)
optimizer = optim.Adam(model.parameters(), lr = 1e-4)
criterion = torch.nn.BCEWithLogitsLoss()

In [7]:
gc.collect()

44

In [412]:
# Baseline model
train_loss_dict, val_loss_dict = train(model, optimizer, criterion, loader_train, loader_val, log_dir, device=device, epochs=10, val_every=5)

Epoch 1:   2%|▎         | 5/200 [02:46<2:56:10, 54.21s/batch, loss=0.704]

Total iteration 5, validation loss = 0.7028



Epoch 1:   5%|▌         | 10/200 [05:19<2:47:13, 52.81s/batch, loss=0.708]

Total iteration 10, validation loss = 0.7026



Epoch 1:   8%|▊         | 15/200 [07:52<2:41:49, 52.49s/batch, loss=0.628]

Total iteration 15, validation loss = 0.7018



Epoch 1:  10%|█         | 20/200 [10:24<2:36:39, 52.22s/batch, loss=0.66] 

Total iteration 20, validation loss = 0.6990



Epoch 1:  12%|█▎        | 25/200 [12:55<2:31:09, 51.82s/batch, loss=0.658]

Total iteration 25, validation loss = 0.6925



Epoch 1:  15%|█▌        | 30/200 [15:26<2:26:21, 51.66s/batch, loss=0.624]

Total iteration 30, validation loss = 0.6815



Epoch 1:  18%|█▊        | 35/200 [17:50<2:16:26, 49.62s/batch, loss=0.611]

Total iteration 35, validation loss = 0.6680



Epoch 1:  20%|██        | 40/200 [20:15<2:12:07, 49.54s/batch, loss=0.588]

Total iteration 40, validation loss = 0.6541



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 1:  22%|██▎       | 45/200 [22:44<2:11:47, 51.01s/batch, loss=0.622]

Total iteration 45, validation loss = 0.6430



Epoch 1:  25%|██▌       | 50/200 [25:12<2:06:51, 50.75s/batch, loss=0.578]

Total iteration 50, validation loss = 0.6333



Epoch 1:  28%|██▊       | 55/200 [27:41<2:02:40, 50.76s/batch, loss=0.714]

Total iteration 55, validation loss = 0.6286



Epoch 1:  30%|███       | 60/200 [30:08<1:58:15, 50.68s/batch, loss=0.602]

Total iteration 60, validation loss = 0.6248



Epoch 1:  32%|███▎      | 65/200 [32:37<1:54:41, 50.97s/batch, loss=0.684]

Total iteration 65, validation loss = 0.6216



Epoch 1:  35%|███▌      | 70/200 [35:06<1:50:05, 50.81s/batch, loss=0.657]

Total iteration 70, validation loss = 0.6189



Epoch 1:  38%|███▊      | 75/200 [37:32<1:44:36, 50.21s/batch, loss=0.596]

Total iteration 75, validation loss = 0.6123



Epoch 1:  40%|████      | 80/200 [39:58<1:39:56, 49.97s/batch, loss=0.658]

Total iteration 80, validation loss = 0.6099



Epoch 1:  42%|████▎     | 85/200 [42:18<1:32:35, 48.31s/batch, loss=0.775]

Total iteration 85, validation loss = 0.6074



Epoch 1:  45%|████▌     | 90/200 [44:40<1:28:28, 48.26s/batch, loss=0.684]

Total iteration 90, validation loss = 0.6073



Epoch 1:  48%|████▊     | 95/200 [47:01<1:24:21, 48.20s/batch, loss=0.675]

Total iteration 95, validation loss = 0.6061



Epoch 1:  50%|█████     | 100/200 [49:22<1:20:21, 48.21s/batch, loss=0.535]

Total iteration 100, validation loss = 0.6014



Epoch 1:  52%|█████▎    | 105/200 [51:43<1:16:20, 48.22s/batch, loss=0.768]

Total iteration 105, validation loss = 0.6005



Epoch 1:  55%|█████▌    | 110/200 [54:09<1:14:39, 49.77s/batch, loss=0.635]

Total iteration 110, validation loss = 0.5972



Epoch 1:  57%|█████▊    | 115/200 [56:29<1:07:56, 47.95s/batch, loss=0.676]

Total iteration 115, validation loss = 0.5941



Epoch 1:  60%|██████    | 120/200 [58:47<1:03:21, 47.52s/batch, loss=0.668]

Total iteration 120, validation loss = 0.5932



Epoch 1:  62%|██████▎   | 125/200 [1:01:07<59:31, 47.62s/batch, loss=0.716]

Total iteration 125, validation loss = 0.5939



Epoch 1:  65%|██████▌   | 130/200 [1:03:37<59:05, 50.64s/batch, loss=0.71] 

Total iteration 130, validation loss = 0.5937



Epoch 1:  68%|██████▊   | 135/200 [1:06:00<53:30, 49.40s/batch, loss=0.609]

Total iteration 135, validation loss = 0.5932



Epoch 1:  70%|███████   | 140/200 [1:08:27<50:03, 50.06s/batch, loss=0.661]

Total iteration 140, validation loss = 0.5936



Epoch 1:  72%|███████▎  | 145/200 [1:10:53<45:56, 50.12s/batch, loss=0.564]

Total iteration 145, validation loss = 0.5919



Epoch 1:  75%|███████▌  | 150/200 [1:13:22<42:13, 50.66s/batch, loss=0.662]

Total iteration 150, validation loss = 0.5915



Epoch 1:  78%|███████▊  | 155/200 [1:15:45<37:01, 49.36s/batch, loss=0.657]

Total iteration 155, validation loss = 0.5982



Epoch 1:  80%|████████  | 160/200 [1:18:13<33:36, 50.40s/batch, loss=0.576]

Total iteration 160, validation loss = 0.5972



Epoch 1:  82%|████████▎ | 165/200 [1:20:35<28:27, 48.80s/batch, loss=0.654]

Total iteration 165, validation loss = 0.5994



Epoch 1:  85%|████████▌ | 170/200 [1:22:57<24:18, 48.60s/batch, loss=0.529]

Total iteration 170, validation loss = 0.6014



Epoch 1:  88%|████████▊ | 175/200 [1:25:21<20:32, 49.29s/batch, loss=0.714]

Total iteration 175, validation loss = 0.5985



Epoch 1:  90%|█████████ | 180/200 [1:27:49<16:47, 50.35s/batch, loss=0.581]

Total iteration 180, validation loss = 0.5981



Epoch 1:  92%|█████████▎| 185/200 [1:30:15<12:30, 50.04s/batch, loss=0.547]

Total iteration 185, validation loss = 0.5991



Epoch 1:  95%|█████████▌| 190/200 [1:32:42<08:22, 50.22s/batch, loss=0.693]

Total iteration 190, validation loss = 0.5969



Epoch 1:  98%|█████████▊| 195/200 [1:35:11<04:14, 50.90s/batch, loss=0.649]

Total iteration 195, validation loss = 0.5989



Epoch 1: 100%|██████████| 200/200 [1:37:21<00:00, 29.21s/batch, loss=0.508]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 200, validation loss = 0.5942



Epoch 2:   2%|▎         | 5/200 [02:31<2:38:39, 48.82s/batch, loss=0.568]

Total iteration 206, validation loss = 0.5893



Epoch 2:   5%|▌         | 10/200 [04:50<2:30:30, 47.53s/batch, loss=0.635]

Total iteration 211, validation loss = 0.5868



Epoch 2:   8%|▊         | 15/200 [07:11<2:28:13, 48.07s/batch, loss=0.478]

Total iteration 216, validation loss = 0.5870



Epoch 2:  10%|█         | 20/200 [09:40<2:31:45, 50.58s/batch, loss=0.669]

Total iteration 221, validation loss = 0.5846



Epoch 2:  12%|█▎        | 25/200 [12:00<2:21:06, 48.38s/batch, loss=0.608]

Total iteration 226, validation loss = 0.5838



Epoch 2:  15%|█▌        | 30/200 [14:26<2:20:17, 49.51s/batch, loss=0.652]

Total iteration 231, validation loss = 0.5855



Epoch 2:  18%|█▊        | 35/200 [16:52<2:17:02, 49.83s/batch, loss=0.678]

Total iteration 236, validation loss = 0.5909



Epoch 2:  20%|██        | 40/200 [19:18<2:13:40, 50.13s/batch, loss=0.613]

Total iteration 241, validation loss = 0.5928



Epoch 2:  22%|██▎       | 45/200 [21:46<2:10:44, 50.61s/batch, loss=0.669]

Total iteration 246, validation loss = 0.5941



Epoch 2:  25%|██▌       | 50/200 [24:15<2:06:51, 50.74s/batch, loss=0.576]

Total iteration 251, validation loss = 0.5954



Epoch 2:  28%|██▊       | 55/200 [26:43<2:02:56, 50.87s/batch, loss=0.466]

Total iteration 256, validation loss = 0.5944



Epoch 2:  30%|███       | 60/200 [29:11<1:58:27, 50.77s/batch, loss=0.596]

Total iteration 261, validation loss = 0.5898



Epoch 2:  32%|███▎      | 65/200 [31:40<1:54:22, 50.83s/batch, loss=0.635]

Total iteration 266, validation loss = 0.5888



Epoch 2:  35%|███▌      | 70/200 [34:09<1:50:38, 51.07s/batch, loss=0.594]

Total iteration 271, validation loss = 0.5917



Epoch 2:  38%|███▊      | 75/200 [36:38<1:46:36, 51.17s/batch, loss=0.605]

Total iteration 276, validation loss = 0.5915



Epoch 2:  40%|████      | 80/200 [39:07<1:41:47, 50.90s/batch, loss=0.538]

Total iteration 281, validation loss = 0.5900



Epoch 2:  42%|████▎     | 85/200 [41:32<1:35:49, 49.99s/batch, loss=0.599]

Total iteration 286, validation loss = 0.5897



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:  45%|████▌     | 90/200 [43:56<1:30:05, 49.14s/batch, loss=0.492]

Total iteration 291, validation loss = 0.5910



Epoch 2:  48%|████▊     | 95/200 [46:25<1:28:47, 50.74s/batch, loss=0.667]

Total iteration 296, validation loss = 0.5882



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:  50%|█████     | 100/200 [48:46<1:21:18, 48.78s/batch, loss=0.59]

Total iteration 301, validation loss = 0.5858



Epoch 2:  52%|█████▎    | 105/200 [51:08<1:17:02, 48.66s/batch, loss=0.58] 

Total iteration 306, validation loss = 0.5811



Epoch 2:  55%|█████▌    | 110/200 [53:30<1:12:29, 48.33s/batch, loss=0.639]

Total iteration 311, validation loss = 0.5811



Epoch 2:  57%|█████▊    | 115/200 [55:49<1:07:26, 47.61s/batch, loss=0.65] 

Total iteration 316, validation loss = 0.5837



Epoch 2:  60%|██████    | 120/200 [58:14<1:05:49, 49.37s/batch, loss=0.566]

Total iteration 321, validation loss = 0.5835



Epoch 2:  62%|██████▎   | 125/200 [1:00:41<1:02:45, 50.21s/batch, loss=0.703]

Total iteration 326, validation loss = 0.5893



Epoch 2:  65%|██████▌   | 130/200 [1:03:04<57:07, 48.96s/batch, loss=0.628]  

Total iteration 331, validation loss = 0.5885



Epoch 2:  68%|██████▊   | 135/200 [1:05:22<51:31, 47.56s/batch, loss=0.62] 

Total iteration 336, validation loss = 0.5866



Epoch 2:  70%|███████   | 140/200 [1:07:41<47:25, 47.43s/batch, loss=0.526]

Total iteration 341, validation loss = 0.5829



Epoch 2:  72%|███████▎  | 145/200 [1:10:00<43:25, 47.38s/batch, loss=0.661]

Total iteration 346, validation loss = 0.5794



Epoch 2:  75%|███████▌  | 150/200 [1:12:20<39:41, 47.64s/batch, loss=0.516]

Total iteration 351, validation loss = 0.5788



Epoch 2:  78%|███████▊  | 155/200 [1:14:39<35:41, 47.60s/batch, loss=0.64] 

Total iteration 356, validation loss = 0.5812



Epoch 2:  80%|████████  | 160/200 [1:17:01<32:16, 48.40s/batch, loss=0.68] 

Total iteration 361, validation loss = 0.5842



Epoch 2:  82%|████████▎ | 165/200 [1:19:26<28:51, 49.47s/batch, loss=0.685]

Total iteration 366, validation loss = 0.5869



Epoch 2:  85%|████████▌ | 170/200 [1:21:53<24:57, 49.93s/batch, loss=0.572]

Total iteration 371, validation loss = 0.5906



Epoch 2:  88%|████████▊ | 175/200 [1:24:19<20:53, 50.14s/batch, loss=0.558]

Total iteration 376, validation loss = 0.5922



Epoch 2:  90%|█████████ | 180/200 [1:26:39<16:04, 48.24s/batch, loss=0.583]

Total iteration 381, validation loss = 0.5905



Epoch 2:  92%|█████████▎| 185/200 [1:28:59<11:57, 47.85s/batch, loss=0.592]

Total iteration 386, validation loss = 0.5844



Epoch 2:  95%|█████████▌| 190/200 [1:31:22<08:07, 48.75s/batch, loss=0.558]

Total iteration 391, validation loss = 0.5820



Epoch 2:  98%|█████████▊| 195/200 [1:33:44<04:01, 48.30s/batch, loss=0.521]

Total iteration 396, validation loss = 0.5829



Epoch 2: 100%|██████████| 200/200 [1:35:54<00:00, 28.77s/batch, loss=0.679]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 401, validation loss = 0.5806



Epoch 3:   2%|▎         | 5/200 [02:33<2:40:04, 49.26s/batch, loss=0.583]

Total iteration 407, validation loss = 0.5797



Epoch 3:   5%|▌         | 10/200 [04:53<2:32:28, 48.15s/batch, loss=0.64]

Total iteration 412, validation loss = 0.5802



Epoch 3:   8%|▊         | 15/200 [07:12<2:26:48, 47.62s/batch, loss=0.585]

Total iteration 417, validation loss = 0.5826



Epoch 3:  10%|█         | 20/200 [09:32<2:22:44, 47.58s/batch, loss=0.662]

Total iteration 422, validation loss = 0.5880



Epoch 3:  12%|█▎        | 25/200 [11:51<2:18:22, 47.44s/batch, loss=0.586]

Total iteration 427, validation loss = 0.5918



Epoch 3:  15%|█▌        | 30/200 [14:12<2:16:26, 48.15s/batch, loss=0.561]

Total iteration 432, validation loss = 0.5888



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  18%|█▊        | 35/200 [16:36<2:15:06, 49.13s/batch, loss=0.517]

Total iteration 437, validation loss = 0.5897



Epoch 3:  20%|██        | 40/200 [19:02<2:12:29, 49.69s/batch, loss=0.625]

Total iteration 442, validation loss = 0.5915



Epoch 3:  22%|██▎       | 45/200 [21:25<2:06:29, 48.97s/batch, loss=0.713]

Total iteration 447, validation loss = 0.5873



Epoch 3:  25%|██▌       | 50/200 [23:44<1:59:21, 47.74s/batch, loss=0.65] 

Total iteration 452, validation loss = 0.5862



Epoch 3:  28%|██▊       | 55/200 [26:12<2:01:40, 50.35s/batch, loss=0.599]

Total iteration 457, validation loss = 0.5859



Epoch 3:  30%|███       | 60/200 [28:37<1:56:03, 49.74s/batch, loss=0.603]

Total iteration 462, validation loss = 0.5814



Epoch 3:  32%|███▎      | 65/200 [30:59<1:49:20, 48.59s/batch, loss=0.581]

Total iteration 467, validation loss = 0.5807



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  35%|███▌      | 70/200 [33:21<1:45:13, 48.57s/batch, loss=0.524]

Total iteration 472, validation loss = 0.5776



Epoch 3:  38%|███▊      | 75/200 [35:44<1:41:37, 48.78s/batch, loss=0.529]

Total iteration 477, validation loss = 0.5773



Epoch 3:  40%|████      | 80/200 [38:11<1:39:56, 49.97s/batch, loss=0.525]

Total iteration 482, validation loss = 0.5777



Epoch 3:  42%|████▎     | 85/200 [40:40<1:37:32, 50.89s/batch, loss=0.583]

Total iteration 487, validation loss = 0.5787



Epoch 3:  45%|████▌     | 90/200 [43:01<1:29:27, 48.80s/batch, loss=0.618]

Total iteration 492, validation loss = 0.5799



Epoch 3:  48%|████▊     | 95/200 [45:26<1:26:21, 49.34s/batch, loss=0.654]

Total iteration 497, validation loss = 0.5783



Epoch 3:  50%|█████     | 100/200 [47:47<1:20:37, 48.38s/batch, loss=0.55]

Total iteration 502, validation loss = 0.5787



Epoch 3:  52%|█████▎    | 105/200 [50:06<1:15:31, 47.70s/batch, loss=0.515]

Total iteration 507, validation loss = 0.5782



Epoch 3:  55%|█████▌    | 110/200 [52:26<1:11:29, 47.67s/batch, loss=0.525]

Total iteration 512, validation loss = 0.5743



Epoch 3:  57%|█████▊    | 115/200 [54:46<1:07:49, 47.88s/batch, loss=0.503]

Total iteration 517, validation loss = 0.5745



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  60%|██████    | 120/200 [57:06<1:03:51, 47.90s/batch, loss=0.49]

Total iteration 522, validation loss = 0.5731



Epoch 3:  62%|██████▎   | 125/200 [59:27<1:00:13, 48.18s/batch, loss=0.564]

Total iteration 527, validation loss = 0.5716



Epoch 3:  65%|██████▌   | 130/200 [1:01:47<55:39, 47.71s/batch, loss=0.571]

Total iteration 532, validation loss = 0.5717



Epoch 3:  68%|██████▊   | 135/200 [1:04:07<51:52, 47.88s/batch, loss=0.505]

Total iteration 537, validation loss = 0.5708



Epoch 3:  70%|███████   | 140/200 [1:06:29<48:33, 48.55s/batch, loss=0.542]

Total iteration 542, validation loss = 0.5724



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  72%|███████▎  | 145/200 [1:08:50<44:07, 48.15s/batch, loss=0.592]

Total iteration 547, validation loss = 0.5719



Epoch 3:  75%|███████▌  | 150/200 [1:11:11<40:01, 48.03s/batch, loss=0.482]

Total iteration 552, validation loss = 0.5697



Epoch 3:  78%|███████▊  | 155/200 [1:13:33<36:24, 48.54s/batch, loss=0.658]

Total iteration 557, validation loss = 0.5697



Epoch 3:  80%|████████  | 160/200 [1:15:55<32:19, 48.49s/batch, loss=0.55] 

Total iteration 562, validation loss = 0.5707



Epoch 3:  82%|████████▎ | 165/200 [1:18:19<28:38, 49.10s/batch, loss=0.599]

Total iteration 567, validation loss = 0.5683



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  85%|████████▌ | 170/200 [1:20:38<23:56, 47.88s/batch, loss=0.488]

Total iteration 572, validation loss = 0.5721



Epoch 3:  88%|████████▊ | 175/200 [1:23:00<20:10, 48.44s/batch, loss=0.585]

Total iteration 577, validation loss = 0.5712



Epoch 3:  90%|█████████ | 180/200 [1:25:25<16:28, 49.42s/batch, loss=0.637]

Total iteration 582, validation loss = 0.5703



Epoch 3:  92%|█████████▎| 185/200 [1:27:52<12:30, 50.01s/batch, loss=0.656]

Total iteration 587, validation loss = 0.5697



Epoch 3:  95%|█████████▌| 190/200 [1:30:11<07:59, 47.94s/batch, loss=0.63] 

Total iteration 592, validation loss = 0.5684



Epoch 3:  98%|█████████▊| 195/200 [1:32:29<03:56, 47.34s/batch, loss=0.596]

Total iteration 597, validation loss = 0.5703



Epoch 3: 100%|██████████| 200/200 [1:34:34<00:00, 28.37s/batch, loss=0.558]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 602, validation loss = 0.5748



Epoch 4:   2%|▎         | 5/200 [02:30<2:36:52, 48.27s/batch, loss=0.708]

Total iteration 608, validation loss = 0.5749



Epoch 4:   5%|▌         | 10/200 [04:57<2:38:44, 50.13s/batch, loss=0.601]

Total iteration 613, validation loss = 0.5796



Epoch 4:   8%|▊         | 15/200 [07:17<2:28:58, 48.32s/batch, loss=0.47] 

Total iteration 618, validation loss = 0.5811



Epoch 4:  10%|█         | 20/200 [09:34<2:20:24, 46.80s/batch, loss=0.535]

Total iteration 623, validation loss = 0.5830



Epoch 4:  12%|█▎        | 25/200 [11:50<2:15:20, 46.40s/batch, loss=0.543]

Total iteration 628, validation loss = 0.5837



Epoch 4:  15%|█▌        | 30/200 [14:05<2:11:03, 46.26s/batch, loss=0.5]  

Total iteration 633, validation loss = 0.5824



Epoch 4:  18%|█▊        | 35/200 [16:29<2:13:46, 48.65s/batch, loss=0.575]

Total iteration 638, validation loss = 0.5802



Epoch 4:  20%|██        | 40/200 [18:55<2:12:57, 49.86s/batch, loss=0.55] 

Total iteration 643, validation loss = 0.5795



Epoch 4:  22%|██▎       | 45/200 [21:14<2:03:19, 47.74s/batch, loss=0.538]

Total iteration 648, validation loss = 0.5797



Epoch 4:  25%|██▌       | 50/200 [23:37<2:02:24, 48.96s/batch, loss=0.585]

Total iteration 653, validation loss = 0.5803



Epoch 4:  28%|██▊       | 55/200 [26:00<1:57:34, 48.65s/batch, loss=0.614]

Total iteration 658, validation loss = 0.5800



Epoch 4:  30%|███       | 60/200 [28:22<1:53:48, 48.78s/batch, loss=0.564]

Total iteration 663, validation loss = 0.5776



Epoch 4:  32%|███▎      | 65/200 [30:42<1:47:39, 47.85s/batch, loss=0.584]

Total iteration 668, validation loss = 0.5725



Epoch 4:  35%|███▌      | 70/200 [33:07<1:46:47, 49.29s/batch, loss=0.52] 

Total iteration 673, validation loss = 0.5684



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 4:  38%|███▊      | 75/200 [35:22<1:37:20, 46.73s/batch, loss=0.589]

Total iteration 678, validation loss = 0.5648



Epoch 4:  40%|████      | 80/200 [37:47<1:38:21, 49.18s/batch, loss=0.546]

Total iteration 683, validation loss = 0.5648



Epoch 4:  42%|████▎     | 85/200 [40:05<1:30:41, 47.32s/batch, loss=0.728]

Total iteration 688, validation loss = 0.5625



Epoch 4:  45%|████▌     | 90/200 [42:23<1:26:19, 47.08s/batch, loss=0.513]

Total iteration 693, validation loss = 0.5638



Epoch 4:  48%|████▊     | 95/200 [44:42<1:22:46, 47.30s/batch, loss=0.707]

Total iteration 698, validation loss = 0.5646



Epoch 4:  50%|█████     | 100/200 [47:00<1:18:49, 47.29s/batch, loss=0.642]

Total iteration 703, validation loss = 0.5644



Epoch 4:  52%|█████▎    | 105/200 [49:19<1:15:12, 47.50s/batch, loss=0.634]

Total iteration 708, validation loss = 0.5633



Epoch 4:  55%|█████▍    | 109/200 [51:38<21:03, 13.89s/batch, loss=0.56]   

Total iteration 713, validation loss = 0.5612



Epoch 4:  57%|█████▊    | 115/200 [53:57<1:07:11, 47.43s/batch, loss=0.699]

Total iteration 718, validation loss = 0.5605



Epoch 4:  60%|██████    | 120/200 [56:16<1:03:08, 47.36s/batch, loss=0.573]

Total iteration 723, validation loss = 0.5656



Epoch 4:  62%|██████▎   | 125/200 [58:39<1:00:59, 48.79s/batch, loss=0.563]

Total iteration 728, validation loss = 0.5681



Epoch 4:  65%|██████▌   | 130/200 [1:01:00<56:27, 48.39s/batch, loss=0.626]

Total iteration 733, validation loss = 0.5669



Epoch 4:  68%|██████▊   | 135/200 [1:03:21<52:05, 48.08s/batch, loss=0.462]

Total iteration 738, validation loss = 0.5677



Epoch 4:  70%|███████   | 140/200 [1:05:42<48:04, 48.07s/batch, loss=0.509]

Total iteration 743, validation loss = 0.5655



Epoch 4:  72%|███████▎  | 145/200 [1:08:04<44:25, 48.46s/batch, loss=0.547]

Total iteration 748, validation loss = 0.5645



Epoch 4:  75%|███████▌  | 150/200 [1:10:31<41:45, 50.10s/batch, loss=0.554]

Total iteration 753, validation loss = 0.5657



Epoch 4:  78%|███████▊  | 155/200 [1:12:50<35:52, 47.83s/batch, loss=0.648]

Total iteration 758, validation loss = 0.5656



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 4:  80%|████████  | 160/200 [1:15:11<32:11, 48.28s/batch, loss=0.662]

Total iteration 763, validation loss = 0.5641



Epoch 4:  82%|████████▎ | 165/200 [1:17:28<27:22, 46.94s/batch, loss=0.551]

Total iteration 768, validation loss = 0.5605



Epoch 4:  85%|████████▌ | 170/200 [1:19:45<23:25, 46.84s/batch, loss=0.546]

Total iteration 773, validation loss = 0.5594



Epoch 4:  88%|████████▊ | 175/200 [1:22:02<19:26, 46.65s/batch, loss=0.551]

Total iteration 778, validation loss = 0.5584



Epoch 4:  90%|█████████ | 180/200 [1:24:20<15:41, 47.09s/batch, loss=0.436]

Total iteration 783, validation loss = 0.5573



Epoch 4:  92%|█████████▎| 185/200 [1:26:38<11:44, 46.97s/batch, loss=0.428]

Total iteration 788, validation loss = 0.5563



Epoch 4:  95%|█████████▌| 190/200 [1:28:57<07:52, 47.25s/batch, loss=0.489]

Total iteration 793, validation loss = 0.5544



Epoch 4:  98%|█████████▊| 195/200 [1:31:15<03:55, 47.19s/batch, loss=0.598]

Total iteration 798, validation loss = 0.5542



Epoch 4: 100%|██████████| 200/200 [1:33:18<00:00, 27.99s/batch, loss=0.541]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 803, validation loss = 0.5533



Epoch 5:   2%|▎         | 5/200 [02:29<2:36:00, 48.00s/batch, loss=0.561]

Total iteration 809, validation loss = 0.5536



Epoch 5:   5%|▌         | 10/200 [04:56<2:37:50, 49.84s/batch, loss=0.388]

Total iteration 814, validation loss = 0.5565



Epoch 5:   8%|▊         | 15/200 [07:18<2:30:17, 48.75s/batch, loss=0.521]

Total iteration 819, validation loss = 0.5567



Epoch 5:  10%|█         | 20/200 [09:39<2:25:31, 48.51s/batch, loss=0.497]

Total iteration 824, validation loss = 0.5597



Epoch 5:  12%|█▎        | 25/200 [12:02<2:22:08, 48.74s/batch, loss=0.584]

Total iteration 829, validation loss = 0.5615



Epoch 5:  15%|█▌        | 30/200 [14:28<2:20:30, 49.59s/batch, loss=0.654]

Total iteration 834, validation loss = 0.5634



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 5:  18%|█▊        | 35/200 [16:51<2:15:32, 49.29s/batch, loss=0.533]

Total iteration 839, validation loss = 0.5658



Epoch 5:  20%|██        | 40/200 [19:14<2:10:15, 48.85s/batch, loss=0.616]

Total iteration 844, validation loss = 0.5676



Epoch 5:  22%|██▎       | 45/200 [21:37<2:06:10, 48.84s/batch, loss=0.601]

Total iteration 849, validation loss = 0.5658



Epoch 5:  25%|██▌       | 50/200 [23:59<2:01:14, 48.50s/batch, loss=0.673]

Total iteration 854, validation loss = 0.5655



Epoch 5:  28%|██▊       | 55/200 [26:22<1:58:37, 49.09s/batch, loss=0.649]

Total iteration 859, validation loss = 0.5629



Epoch 5:  30%|███       | 60/200 [28:50<1:57:03, 50.17s/batch, loss=0.644]

Total iteration 864, validation loss = 0.5613



Epoch 5:  32%|███▎      | 65/200 [31:10<1:48:53, 48.40s/batch, loss=0.546]

Total iteration 869, validation loss = 0.5627



Epoch 5:  35%|███▌      | 70/200 [33:30<1:43:36, 47.82s/batch, loss=0.556]

Total iteration 874, validation loss = 0.5633



Epoch 5:  38%|███▊      | 75/200 [35:50<1:39:52, 47.94s/batch, loss=0.547]

Total iteration 879, validation loss = 0.5621



Epoch 5:  40%|████      | 80/200 [38:12<1:36:38, 48.32s/batch, loss=0.627]

Total iteration 884, validation loss = 0.5616



Epoch 5:  42%|████▎     | 85/200 [40:33<1:32:18, 48.16s/batch, loss=0.485]

Total iteration 889, validation loss = 0.5592



Epoch 5:  45%|████▌     | 90/200 [42:58<1:30:43, 49.49s/batch, loss=0.56] 

Total iteration 894, validation loss = 0.5579



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 5:  48%|████▊     | 95/200 [45:20<1:24:59, 48.57s/batch, loss=0.791]

Total iteration 899, validation loss = 0.5563



Epoch 5:  50%|█████     | 100/200 [47:44<1:22:06, 49.26s/batch, loss=0.453]

Total iteration 904, validation loss = 0.5559



Epoch 5:  52%|█████▎    | 105/200 [50:03<1:15:42, 47.82s/batch, loss=0.505]

Total iteration 909, validation loss = 0.5567



Epoch 5:  55%|█████▌    | 110/200 [52:25<1:12:18, 48.21s/batch, loss=0.795]

Total iteration 914, validation loss = 0.5578



Epoch 5:  57%|█████▊    | 115/200 [54:51<1:10:29, 49.76s/batch, loss=0.679]

Total iteration 919, validation loss = 0.5569



Epoch 5:  60%|██████    | 120/200 [57:13<1:04:53, 48.67s/batch, loss=0.554]

Total iteration 924, validation loss = 0.5561



Epoch 5:  62%|██████▎   | 125/200 [59:35<1:00:45, 48.60s/batch, loss=0.675]

Total iteration 929, validation loss = 0.5563



Epoch 5:  65%|██████▌   | 130/200 [1:01:59<57:07, 48.96s/batch, loss=0.458]

Total iteration 934, validation loss = 0.5596



Epoch 5:  68%|██████▊   | 135/200 [1:04:24<53:36, 49.48s/batch, loss=0.568]

Total iteration 939, validation loss = 0.5619



Epoch 5:  70%|███████   | 140/200 [1:06:47<49:10, 49.18s/batch, loss=0.613]

Total iteration 944, validation loss = 0.5623



Epoch 5:  72%|███████▎  | 145/200 [1:09:12<45:16, 49.39s/batch, loss=0.633]

Total iteration 949, validation loss = 0.5616



Epoch 5:  75%|███████▌  | 150/200 [1:11:37<41:22, 49.65s/batch, loss=0.55] 

Total iteration 954, validation loss = 0.5597



Epoch 5:  78%|███████▊  | 155/200 [1:14:02<37:08, 49.52s/batch, loss=0.474]

Total iteration 959, validation loss = 0.5577



Epoch 5:  80%|████████  | 160/200 [1:16:26<32:58, 49.47s/batch, loss=0.407]

Total iteration 964, validation loss = 0.5541



Epoch 5:  82%|████████▎ | 165/200 [1:18:52<29:10, 50.00s/batch, loss=0.471]

Total iteration 969, validation loss = 0.5527



Epoch 5:  85%|████████▌ | 170/200 [1:21:17<24:45, 49.51s/batch, loss=0.509]

Total iteration 974, validation loss = 0.5521



Epoch 5:  88%|████████▊ | 175/200 [1:23:42<20:36, 49.44s/batch, loss=0.445]

Total iteration 979, validation loss = 0.5523



Epoch 5:  90%|█████████ | 180/200 [1:25:58<15:43, 47.16s/batch, loss=0.499]

Total iteration 984, validation loss = 0.5525



Epoch 5:  92%|█████████▎| 185/200 [1:28:18<11:55, 47.70s/batch, loss=0.528]

Total iteration 989, validation loss = 0.5547



Epoch 5:  95%|█████████▌| 190/200 [1:30:45<08:16, 49.67s/batch, loss=0.665]

Total iteration 994, validation loss = 0.5557



Epoch 5:  98%|█████████▊| 195/200 [1:33:06<04:02, 48.47s/batch, loss=0.521]

Total iteration 999, validation loss = 0.5558



Epoch 5: 100%|██████████| 200/200 [1:35:14<00:00, 28.57s/batch, loss=0.508]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 1004, validation loss = 0.5554



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 6:   2%|▏         | 4/200 [02:28<16:24,  5.02s/batch, loss=0.513]

Total iteration 1010, validation loss = 0.5583



Epoch 6:   5%|▌         | 10/200 [04:43<2:25:56, 46.09s/batch, loss=0.499]

Total iteration 1015, validation loss = 0.5584



Epoch 6:   8%|▊         | 15/200 [06:58<2:21:54, 46.02s/batch, loss=0.557]

Total iteration 1020, validation loss = 0.5591



Epoch 6:  10%|█         | 20/200 [09:13<2:18:03, 46.02s/batch, loss=0.564]

Total iteration 1025, validation loss = 0.5605



Epoch 6:  12%|█▎        | 25/200 [11:27<2:13:54, 45.91s/batch, loss=0.469]

Total iteration 1030, validation loss = 0.5636



Epoch 6:  15%|█▌        | 30/200 [13:52<2:18:52, 49.01s/batch, loss=0.447]

Total iteration 1035, validation loss = 0.5645



Epoch 6:  18%|█▊        | 35/200 [16:11<2:11:12, 47.71s/batch, loss=0.548]

Total iteration 1040, validation loss = 0.5635



Epoch 6:  20%|██        | 40/200 [18:37<2:11:46, 49.42s/batch, loss=0.501]

Total iteration 1045, validation loss = 0.5621



Epoch 6:  22%|██▎       | 45/200 [20:55<2:03:14, 47.71s/batch, loss=0.505]

Total iteration 1050, validation loss = 0.5590



Epoch 6:  25%|██▌       | 50/200 [23:12<1:57:20, 46.94s/batch, loss=0.417]

Total iteration 1055, validation loss = 0.5556



Epoch 6:  28%|██▊       | 55/200 [25:29<1:52:51, 46.70s/batch, loss=0.494]

Total iteration 1060, validation loss = 0.5537



Epoch 6:  30%|███       | 60/200 [27:47<1:49:29, 46.93s/batch, loss=0.553]

Total iteration 1065, validation loss = 0.5529



Epoch 6:  32%|███▎      | 65/200 [30:06<1:46:17, 47.24s/batch, loss=0.694]

Total iteration 1070, validation loss = 0.5523



Epoch 6:  35%|███▌      | 70/200 [32:25<1:43:14, 47.65s/batch, loss=0.458]

Total iteration 1075, validation loss = 0.5536



Epoch 6:  38%|███▊      | 75/200 [34:46<1:39:58, 47.99s/batch, loss=0.457]

Total iteration 1080, validation loss = 0.5542



Epoch 6:  40%|████      | 80/200 [37:06<1:35:38, 47.82s/batch, loss=0.653]

Total iteration 1085, validation loss = 0.5564



Epoch 6:  42%|████▎     | 85/200 [39:29<1:33:31, 48.80s/batch, loss=0.479]

Total iteration 1090, validation loss = 0.5564



Epoch 6:  45%|████▌     | 90/200 [41:47<1:26:43, 47.30s/batch, loss=0.628]

Total iteration 1095, validation loss = 0.5560



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 6:  48%|████▊     | 95/200 [44:15<1:27:38, 50.08s/batch, loss=0.773]

Total iteration 1100, validation loss = 0.5552



Epoch 6:  50%|█████     | 100/200 [46:40<1:23:04, 49.85s/batch, loss=0.452]

Total iteration 1105, validation loss = 0.5573



Epoch 6:  52%|█████▎    | 105/200 [48:58<1:15:13, 47.51s/batch, loss=0.595]

Total iteration 1110, validation loss = 0.5597



Epoch 6:  55%|█████▌    | 110/200 [51:21<1:12:43, 48.49s/batch, loss=0.494]

Total iteration 1115, validation loss = 0.5604



Epoch 6:  57%|█████▊    | 115/200 [53:42<1:08:24, 48.29s/batch, loss=0.464]

Total iteration 1120, validation loss = 0.5603



Epoch 6:  60%|██████    | 120/200 [56:08<1:06:23, 49.79s/batch, loss=0.457]

Total iteration 1125, validation loss = 0.5614



Epoch 6:  62%|██████▎   | 125/200 [58:28<1:00:03, 48.05s/batch, loss=0.615]

Total iteration 1130, validation loss = 0.5595



Epoch 6:  65%|██████▌   | 130/200 [1:00:49<56:16, 48.24s/batch, loss=0.653]

Total iteration 1135, validation loss = 0.5578



Epoch 6:  68%|██████▊   | 135/200 [1:03:12<52:50, 48.78s/batch, loss=0.617]

Total iteration 1140, validation loss = 0.5577



Epoch 6:  70%|███████   | 140/200 [1:05:35<48:44, 48.74s/batch, loss=0.548]

Total iteration 1145, validation loss = 0.5575



Epoch 6:  72%|███████▎  | 145/200 [1:07:59<45:02, 49.13s/batch, loss=0.53] 

Total iteration 1150, validation loss = 0.5587



Epoch 6:  75%|███████▌  | 150/200 [1:10:22<40:48, 48.97s/batch, loss=0.534]

Total iteration 1155, validation loss = 0.5569



Epoch 6:  78%|███████▊  | 155/200 [1:12:45<36:35, 48.80s/batch, loss=0.517]

Total iteration 1160, validation loss = 0.5571



Epoch 6:  80%|████████  | 160/200 [1:15:10<33:09, 49.74s/batch, loss=0.518]

Total iteration 1165, validation loss = 0.5555



Epoch 6:  82%|████████▎ | 165/200 [1:17:29<27:45, 47.59s/batch, loss=0.584]

Total iteration 1170, validation loss = 0.5517



Epoch 6:  85%|████████▌ | 170/200 [1:19:47<23:34, 47.14s/batch, loss=0.598]

Total iteration 1175, validation loss = 0.5516



Epoch 6:  88%|████████▊ | 175/200 [1:22:05<19:37, 47.09s/batch, loss=0.607]

Total iteration 1180, validation loss = 0.5506



Epoch 6:  90%|█████████ | 180/200 [1:24:21<15:34, 46.71s/batch, loss=0.523]

Total iteration 1185, validation loss = 0.5494



Epoch 6:  92%|█████████▎| 185/200 [1:26:42<11:57, 47.83s/batch, loss=0.662]

Total iteration 1190, validation loss = 0.5499



Epoch 6:  95%|█████████▌| 190/200 [1:29:05<08:07, 48.78s/batch, loss=0.542]

Total iteration 1195, validation loss = 0.5511



Epoch 6:  98%|█████████▊| 195/200 [1:31:28<04:04, 48.83s/batch, loss=0.464]

Total iteration 1200, validation loss = 0.5499



Epoch 6: 100%|██████████| 200/200 [1:33:40<00:00, 28.10s/batch, loss=0.504]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 1205, validation loss = 0.5505



Epoch 7:   2%|▎         | 5/200 [02:32<2:39:28, 49.07s/batch, loss=0.447]

Total iteration 1211, validation loss = 0.5514



Epoch 7:   5%|▌         | 10/200 [04:52<2:31:50, 47.95s/batch, loss=0.743]

Total iteration 1216, validation loss = 0.5516



Epoch 7:   8%|▊         | 15/200 [07:18<2:32:49, 49.57s/batch, loss=0.39] 

Total iteration 1221, validation loss = 0.5515



Epoch 7:  10%|█         | 20/200 [09:37<2:23:24, 47.81s/batch, loss=0.553]

Total iteration 1226, validation loss = 0.5492



Epoch 7:  12%|█▎        | 25/200 [11:55<2:18:07, 47.36s/batch, loss=0.553]

Total iteration 1231, validation loss = 0.5534



Epoch 7:  15%|█▌        | 30/200 [14:18<2:17:45, 48.62s/batch, loss=0.525]

Total iteration 1236, validation loss = 0.5580



Epoch 7:  18%|█▊        | 35/200 [16:38<2:11:31, 47.83s/batch, loss=0.435]

Total iteration 1241, validation loss = 0.5580



Epoch 7:  20%|██        | 40/200 [18:59<2:08:43, 48.27s/batch, loss=0.433]

Total iteration 1246, validation loss = 0.5568



Epoch 7:  22%|██▎       | 45/200 [21:20<2:04:28, 48.18s/batch, loss=0.49] 

Total iteration 1251, validation loss = 0.5526



Epoch 7:  25%|██▌       | 50/200 [23:48<2:05:26, 50.18s/batch, loss=0.667]

Total iteration 1256, validation loss = 0.5500



Epoch 7:  28%|██▊       | 55/200 [26:10<1:57:59, 48.83s/batch, loss=0.453]

Total iteration 1261, validation loss = 0.5484



Epoch 7:  30%|███       | 60/200 [28:32<1:53:09, 48.50s/batch, loss=0.603]

Total iteration 1266, validation loss = 0.5472



Epoch 7:  32%|███▎      | 65/200 [30:54<1:49:15, 48.56s/batch, loss=0.379]

Total iteration 1271, validation loss = 0.5465



Epoch 7:  35%|███▌      | 70/200 [33:16<1:45:08, 48.52s/batch, loss=0.586]

Total iteration 1276, validation loss = 0.5461



Epoch 7:  38%|███▊      | 75/200 [35:39<1:41:42, 48.82s/batch, loss=0.576]

Total iteration 1281, validation loss = 0.5471



Epoch 7:  40%|████      | 80/200 [38:00<1:36:48, 48.40s/batch, loss=0.454]

Total iteration 1286, validation loss = 0.5498



Epoch 7:  42%|████▎     | 85/200 [40:22<1:32:34, 48.30s/batch, loss=0.48] 

Total iteration 1291, validation loss = 0.5536



Epoch 7:  45%|████▌     | 90/200 [42:42<1:27:45, 47.87s/batch, loss=0.452]

Total iteration 1296, validation loss = 0.5556



Epoch 7:  48%|████▊     | 95/200 [45:07<1:26:29, 49.42s/batch, loss=0.53] 

Total iteration 1301, validation loss = 0.5559



Epoch 7:  50%|█████     | 100/200 [47:28<1:20:52, 48.52s/batch, loss=0.592]

Total iteration 1306, validation loss = 0.5534



Epoch 7:  52%|█████▎    | 105/200 [49:51<1:16:53, 48.56s/batch, loss=0.504]

Total iteration 1311, validation loss = 0.5527



Epoch 7:  55%|█████▌    | 110/200 [52:20<1:16:05, 50.73s/batch, loss=0.481]

Total iteration 1316, validation loss = 0.5522



Epoch 7:  57%|█████▊    | 115/200 [54:40<1:08:27, 48.32s/batch, loss=0.577]

Total iteration 1321, validation loss = 0.5547



Epoch 7:  60%|██████    | 120/200 [57:07<1:06:39, 49.99s/batch, loss=0.492]

Total iteration 1326, validation loss = 0.5533



Epoch 7:  62%|██████▎   | 125/200 [59:27<1:00:24, 48.33s/batch, loss=0.609]

Total iteration 1331, validation loss = 0.5527



Epoch 7:  65%|██████▌   | 130/200 [1:01:49<56:17, 48.26s/batch, loss=0.444]

Total iteration 1336, validation loss = 0.5535



Epoch 7:  68%|██████▊   | 135/200 [1:04:09<51:57, 47.97s/batch, loss=0.474]

Total iteration 1341, validation loss = 0.5539



Epoch 7:  70%|███████   | 140/200 [1:06:34<49:24, 49.40s/batch, loss=0.476]

Total iteration 1346, validation loss = 0.5550



Epoch 7:  72%|███████▎  | 145/200 [1:08:52<43:32, 47.50s/batch, loss=0.371]

Total iteration 1351, validation loss = 0.5521



Epoch 7:  75%|███████▌  | 150/200 [1:11:22<42:13, 50.67s/batch, loss=0.4]  

Total iteration 1356, validation loss = 0.5495



Epoch 7:  78%|███████▊  | 155/200 [1:13:47<37:16, 49.69s/batch, loss=0.304]

Total iteration 1361, validation loss = 0.5482



Epoch 7:  80%|████████  | 160/200 [1:16:12<33:13, 49.83s/batch, loss=0.552]

Total iteration 1366, validation loss = 0.5494



Epoch 7:  82%|████████▎ | 165/200 [1:18:42<29:50, 51.16s/batch, loss=0.451]

Total iteration 1371, validation loss = 0.5508



Epoch 7:  85%|████████▌ | 170/200 [1:21:02<24:07, 48.25s/batch, loss=0.616]

Total iteration 1376, validation loss = 0.5529



Epoch 7:  88%|████████▊ | 175/200 [1:23:28<20:42, 49.72s/batch, loss=0.688]

Total iteration 1381, validation loss = 0.5537



Epoch 7:  90%|█████████ | 180/200 [1:25:54<16:35, 49.77s/batch, loss=0.524]

Total iteration 1386, validation loss = 0.5528



Epoch 7:  92%|█████████▎| 185/200 [1:28:15<12:09, 48.65s/batch, loss=0.65] 

Total iteration 1391, validation loss = 0.5529



Epoch 7:  95%|█████████▌| 190/200 [1:30:37<08:03, 48.40s/batch, loss=0.626]

Total iteration 1396, validation loss = 0.5524



Epoch 7:  98%|█████████▊| 195/200 [1:32:58<04:01, 48.21s/batch, loss=0.549]

Total iteration 1401, validation loss = 0.5495



Epoch 7: 100%|██████████| 200/200 [1:35:09<00:00, 28.55s/batch, loss=0.532]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 1406, validation loss = 0.5499



Epoch 8:   2%|▎         | 5/200 [02:35<2:43:10, 50.21s/batch, loss=0.585]

Total iteration 1412, validation loss = 0.5502



Epoch 8:   5%|▌         | 10/200 [04:58<2:34:55, 48.92s/batch, loss=0.6] 

Total iteration 1417, validation loss = 0.5505



Epoch 8:   8%|▊         | 15/200 [07:21<2:31:06, 49.01s/batch, loss=0.548]

Total iteration 1422, validation loss = 0.5490



Epoch 8:  10%|█         | 20/200 [09:51<2:32:23, 50.79s/batch, loss=0.541]

Total iteration 1427, validation loss = 0.5488



Epoch 8:  12%|█▎        | 25/200 [12:17<2:26:55, 50.37s/batch, loss=0.422]

Total iteration 1432, validation loss = 0.5492



Epoch 8:  15%|█▌        | 30/200 [14:42<2:20:30, 49.59s/batch, loss=0.509]

Total iteration 1437, validation loss = 0.5506



Epoch 8:  18%|█▊        | 35/200 [17:05<2:14:53, 49.05s/batch, loss=0.737]

Total iteration 1442, validation loss = 0.5538



Epoch 8:  20%|██        | 40/200 [19:29<2:10:44, 49.03s/batch, loss=0.472]

Total iteration 1447, validation loss = 0.5571



Epoch 8:  22%|██▎       | 45/200 [21:52<2:06:47, 49.08s/batch, loss=0.379]

Total iteration 1452, validation loss = 0.5562



Epoch 8:  25%|██▌       | 50/200 [24:15<2:02:14, 48.89s/batch, loss=0.812]

Total iteration 1457, validation loss = 0.5577



Epoch 8:  28%|██▊       | 55/200 [26:38<1:57:56, 48.81s/batch, loss=0.577]

Total iteration 1462, validation loss = 0.5597



Epoch 8:  30%|███       | 60/200 [29:06<1:57:46, 50.48s/batch, loss=0.384]

Total iteration 1467, validation loss = 0.5586



Epoch 8:  32%|███▎      | 65/200 [31:28<1:49:54, 48.85s/batch, loss=0.549]

Total iteration 1472, validation loss = 0.5542



Epoch 8:  35%|███▌      | 70/200 [33:52<1:46:36, 49.20s/batch, loss=0.416]

Total iteration 1477, validation loss = 0.5516



Epoch 8:  38%|███▊      | 75/200 [36:14<1:41:09, 48.56s/batch, loss=0.641]

Total iteration 1482, validation loss = 0.5508



Epoch 8:  40%|████      | 80/200 [38:36<1:37:08, 48.57s/batch, loss=0.573]

Total iteration 1487, validation loss = 0.5506



Epoch 8:  42%|████▎     | 85/200 [40:58<1:32:42, 48.37s/batch, loss=0.51] 

Total iteration 1492, validation loss = 0.5518



Epoch 8:  45%|████▌     | 90/200 [43:21<1:29:33, 48.85s/batch, loss=0.424]

Total iteration 1497, validation loss = 0.5548



Epoch 8:  48%|████▊     | 95/200 [45:44<1:25:34, 48.90s/batch, loss=0.439]

Total iteration 1502, validation loss = 0.5556



Epoch 8:  50%|█████     | 100/200 [48:07<1:21:21, 48.82s/batch, loss=0.459]

Total iteration 1507, validation loss = 0.5548



Epoch 8:  52%|█████▎    | 105/200 [50:33<1:19:01, 49.91s/batch, loss=0.532]

Total iteration 1512, validation loss = 0.5539



Epoch 8:  55%|█████▌    | 110/200 [52:56<1:13:37, 49.08s/batch, loss=0.495]

Total iteration 1517, validation loss = 0.5529



Epoch 8:  57%|█████▊    | 115/200 [55:18<1:09:02, 48.73s/batch, loss=0.418]

Total iteration 1522, validation loss = 0.5532



Epoch 8:  60%|██████    | 120/200 [57:48<1:07:47, 50.84s/batch, loss=0.57] 

Total iteration 1527, validation loss = 0.5547



Epoch 8:  62%|██████▎   | 125/200 [1:00:08<1:00:25, 48.34s/batch, loss=0.469]

Total iteration 1532, validation loss = 0.5587



Epoch 8:  65%|██████▌   | 130/200 [1:02:28<55:58, 47.98s/batch, loss=0.44]   

Total iteration 1537, validation loss = 0.5646



Epoch 8:  68%|██████▊   | 135/200 [1:04:49<52:10, 48.16s/batch, loss=0.521]

Total iteration 1542, validation loss = 0.5634



Epoch 8:  70%|███████   | 140/200 [1:07:10<48:03, 48.05s/batch, loss=0.413]

Total iteration 1547, validation loss = 0.5632



Epoch 8:  72%|███████▎  | 145/200 [1:09:30<43:53, 47.89s/batch, loss=0.352]

Total iteration 1552, validation loss = 0.5571



Epoch 8:  75%|███████▌  | 150/200 [1:11:55<41:08, 49.38s/batch, loss=0.301]

Total iteration 1557, validation loss = 0.5513



Epoch 8:  78%|███████▊  | 155/200 [1:14:14<35:40, 47.58s/batch, loss=0.453]

Total iteration 1562, validation loss = 0.5503



Epoch 8:  80%|████████  | 160/200 [1:16:32<31:35, 47.38s/batch, loss=0.432]

Total iteration 1567, validation loss = 0.5478



Epoch 8:  82%|████████▎ | 165/200 [1:18:51<27:31, 47.19s/batch, loss=0.859]

Total iteration 1572, validation loss = 0.5469



Epoch 8:  85%|████████▌ | 170/200 [1:21:08<23:26, 46.87s/batch, loss=0.351]

Total iteration 1577, validation loss = 0.5482



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 8:  88%|████████▊ | 175/200 [1:23:25<19:30, 46.81s/batch, loss=0.529]

Total iteration 1582, validation loss = 0.5502



Epoch 8:  90%|█████████ | 180/200 [1:25:45<15:52, 47.65s/batch, loss=0.553]

Total iteration 1587, validation loss = 0.5516



Epoch 8:  92%|█████████▎| 185/200 [1:28:10<12:18, 49.21s/batch, loss=0.492]

Total iteration 1592, validation loss = 0.5528



Epoch 8:  95%|█████████▌| 190/200 [1:30:32<08:07, 48.75s/batch, loss=0.653]

Total iteration 1597, validation loss = 0.5515



Epoch 8:  98%|█████████▊| 195/200 [1:32:55<04:04, 48.82s/batch, loss=0.469]

Total iteration 1602, validation loss = 0.5494



Epoch 8: 100%|██████████| 200/200 [1:35:04<00:00, 28.52s/batch, loss=0.539]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 1607, validation loss = 0.5513



Epoch 9:   2%|▎         | 5/200 [02:29<2:35:57, 47.99s/batch, loss=0.423]

Total iteration 1613, validation loss = 0.5556



Epoch 9:   5%|▌         | 10/200 [04:46<2:29:00, 47.06s/batch, loss=0.3] 

Total iteration 1618, validation loss = 0.5558



Epoch 9:   8%|▊         | 15/200 [07:04<2:25:14, 47.10s/batch, loss=0.347]

Total iteration 1623, validation loss = 0.5554



Epoch 9:  10%|█         | 20/200 [09:23<2:21:27, 47.15s/batch, loss=0.743]

Total iteration 1628, validation loss = 0.5561



Epoch 9:  12%|█▎        | 25/200 [11:47<2:22:49, 48.97s/batch, loss=0.487]

Total iteration 1633, validation loss = 0.5561



Epoch 9:  15%|█▌        | 30/200 [14:10<2:18:12, 48.78s/batch, loss=0.466]

Total iteration 1638, validation loss = 0.5593



Epoch 9:  18%|█▊        | 35/200 [16:34<2:15:27, 49.26s/batch, loss=0.603]

Total iteration 1643, validation loss = 0.5611



Epoch 9:  20%|██        | 40/200 [18:56<2:09:47, 48.67s/batch, loss=0.52] 

Total iteration 1648, validation loss = 0.5592



Epoch 9:  22%|██▎       | 45/200 [21:25<2:10:38, 50.57s/batch, loss=0.368]

Total iteration 1653, validation loss = 0.5591



Epoch 9:  25%|██▌       | 50/200 [23:47<2:02:15, 48.90s/batch, loss=0.46] 

Total iteration 1658, validation loss = 0.5604



Epoch 9:  28%|██▊       | 55/200 [26:13<2:00:06, 49.70s/batch, loss=0.589]

Total iteration 1663, validation loss = 0.5602



Epoch 9:  30%|███       | 60/200 [28:32<1:52:01, 48.01s/batch, loss=0.478]

Total iteration 1668, validation loss = 0.5599



Epoch 9:  32%|███▎      | 65/200 [30:52<1:47:29, 47.77s/batch, loss=0.346]

Total iteration 1673, validation loss = 0.5585



Epoch 9:  35%|███▌      | 70/200 [33:12<1:43:30, 47.77s/batch, loss=0.362]

Total iteration 1678, validation loss = 0.5580



Epoch 9:  38%|███▊      | 75/200 [35:33<1:40:24, 48.20s/batch, loss=0.402]

Total iteration 1683, validation loss = 0.5571



Epoch 9:  40%|████      | 80/200 [37:55<1:36:31, 48.27s/batch, loss=0.357]

Total iteration 1688, validation loss = 0.5552



Epoch 9:  42%|████▎     | 85/200 [40:24<1:37:08, 50.68s/batch, loss=0.408]

Total iteration 1693, validation loss = 0.5539



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 9:  45%|████▌     | 90/200 [42:46<1:29:47, 48.97s/batch, loss=0.513]

Total iteration 1698, validation loss = 0.5498



Epoch 9:  48%|████▊     | 95/200 [45:06<1:23:56, 47.97s/batch, loss=0.54] 

Total iteration 1703, validation loss = 0.5475



Epoch 9:  50%|█████     | 100/200 [47:26<1:19:35, 47.76s/batch, loss=0.662]

Total iteration 1708, validation loss = 0.5476



Epoch 9:  52%|█████▎    | 105/200 [49:45<1:15:20, 47.58s/batch, loss=0.609]

Total iteration 1713, validation loss = 0.5487



Epoch 9:  55%|█████▌    | 110/200 [52:04<1:11:20, 47.56s/batch, loss=0.435]

Total iteration 1718, validation loss = 0.5494



Epoch 9:  57%|█████▊    | 115/200 [54:24<1:07:24, 47.59s/batch, loss=0.557]

Total iteration 1723, validation loss = 0.5505



Epoch 9:  60%|██████    | 120/200 [56:43<1:03:21, 47.52s/batch, loss=0.51] 

Total iteration 1728, validation loss = 0.5541



Epoch 9:  62%|██████▎   | 125/200 [59:07<1:01:15, 49.00s/batch, loss=0.466]

Total iteration 1733, validation loss = 0.5541



Epoch 9:  65%|██████▌   | 130/200 [1:01:27<55:54, 47.92s/batch, loss=0.524]

Total iteration 1738, validation loss = 0.5537



Epoch 9:  68%|██████▊   | 135/200 [1:03:52<53:27, 49.34s/batch, loss=0.493]

Total iteration 1743, validation loss = 0.5556



Epoch 9:  70%|███████   | 140/200 [1:06:22<51:09, 51.17s/batch, loss=0.611]

Total iteration 1748, validation loss = 0.5614



Epoch 9:  72%|███████▎  | 145/200 [1:08:48<46:05, 50.28s/batch, loss=0.47] 

Total iteration 1753, validation loss = 0.5611



Epoch 9:  75%|███████▌  | 150/200 [1:11:08<40:12, 48.24s/batch, loss=0.483]

Total iteration 1758, validation loss = 0.5567



Epoch 9:  78%|███████▊  | 155/200 [1:13:28<35:55, 47.90s/batch, loss=0.374]

Total iteration 1763, validation loss = 0.5508



Epoch 9:  80%|████████  | 160/200 [1:15:56<33:27, 50.18s/batch, loss=0.578]

Total iteration 1768, validation loss = 0.5483



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 9:  82%|████████▎ | 165/200 [1:18:20<28:52, 49.49s/batch, loss=0.553]

Total iteration 1773, validation loss = 0.5481



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 9:  85%|████████▌ | 170/200 [1:20:47<25:01, 50.05s/batch, loss=0.454]

Total iteration 1778, validation loss = 0.5479



Epoch 9:  88%|████████▊ | 175/200 [1:23:04<19:37, 47.08s/batch, loss=0.376]

Total iteration 1783, validation loss = 0.5479



Epoch 9:  90%|█████████ | 180/200 [1:25:19<15:29, 46.49s/batch, loss=0.48] 

Total iteration 1788, validation loss = 0.5493



Epoch 9:  92%|█████████▎| 185/200 [1:27:36<11:40, 46.69s/batch, loss=0.416]

Total iteration 1793, validation loss = 0.5519



Epoch 9:  95%|█████████▌| 190/200 [1:29:55<07:51, 47.15s/batch, loss=0.59] 

Total iteration 1798, validation loss = 0.5525



Epoch 9:  98%|█████████▊| 195/200 [1:32:13<03:56, 47.20s/batch, loss=0.53] 

Total iteration 1803, validation loss = 0.5554



Epoch 9: 100%|██████████| 200/200 [1:34:22<00:00, 28.31s/batch, loss=0.575]
  0%|          | 0/200 [00:00<?, ?batch/s]

Total iteration 1808, validation loss = 0.5589



Epoch 10:   2%|▎         | 5/200 [02:29<2:35:44, 47.92s/batch, loss=0.458]

Total iteration 1814, validation loss = 0.5613



Epoch 10:   5%|▌         | 10/200 [04:50<2:32:52, 48.28s/batch, loss=0.392]

Total iteration 1819, validation loss = 0.5639



Epoch 10:   8%|▊         | 15/200 [07:11<2:28:45, 48.25s/batch, loss=0.289]

Total iteration 1824, validation loss = 0.5623



Epoch 10:  10%|█         | 20/200 [09:34<2:25:46, 48.59s/batch, loss=0.45] 

Total iteration 1829, validation loss = 0.5623



Epoch 10:  12%|█▎        | 25/200 [11:55<2:20:47, 48.27s/batch, loss=0.435]

Total iteration 1834, validation loss = 0.5622



Epoch 10:  15%|█▌        | 30/200 [14:17<2:17:18, 48.46s/batch, loss=0.339]

Total iteration 1839, validation loss = 0.5597



Epoch 10:  18%|█▊        | 35/200 [16:39<2:13:53, 48.69s/batch, loss=0.451]

Total iteration 1844, validation loss = 0.5609



Epoch 10:  20%|██        | 40/200 [19:03<2:10:43, 49.02s/batch, loss=0.448]

Total iteration 1849, validation loss = 0.5602



Epoch 10:  22%|██▎       | 45/200 [21:24<2:05:13, 48.47s/batch, loss=0.429]

Total iteration 1854, validation loss = 0.5603



Epoch 10:  25%|██▌       | 50/200 [23:46<2:00:42, 48.28s/batch, loss=0.602]

Total iteration 1859, validation loss = 0.5624



Epoch 10:  28%|██▊       | 55/200 [26:07<1:56:59, 48.41s/batch, loss=0.407]

Total iteration 1864, validation loss = 0.5652



Epoch 10:  30%|███       | 60/200 [28:29<1:53:06, 48.47s/batch, loss=0.294]

Total iteration 1869, validation loss = 0.5704



Epoch 10:  32%|███▎      | 65/200 [30:54<1:50:52, 49.28s/batch, loss=0.408]

Total iteration 1874, validation loss = 0.5684



Epoch 10:  35%|███▌      | 70/200 [33:20<1:48:13, 49.95s/batch, loss=0.361]

Total iteration 1879, validation loss = 0.5703



Epoch 10:  38%|███▊      | 75/200 [35:46<1:43:51, 49.85s/batch, loss=0.482]

Total iteration 1884, validation loss = 0.5720



Epoch 10:  40%|████      | 80/200 [38:10<1:38:41, 49.34s/batch, loss=0.384]

Total iteration 1889, validation loss = 0.5740



Epoch 10:  42%|████▎     | 85/200 [40:29<1:31:55, 47.96s/batch, loss=0.505]

Total iteration 1894, validation loss = 0.5752



Epoch 10:  45%|████▌     | 90/200 [42:54<1:30:10, 49.19s/batch, loss=0.278]

Total iteration 1899, validation loss = 0.5774



Epoch 10:  47%|████▋     | 94/200 [45:17<25:15, 14.30s/batch, loss=0.471]  

Total iteration 1904, validation loss = 0.5775



Epoch 10:  50%|█████     | 100/200 [47:36<1:19:35, 47.76s/batch, loss=0.501]

Total iteration 1909, validation loss = 0.5766



Epoch 10:  52%|█████▎    | 105/200 [49:56<1:15:40, 47.79s/batch, loss=0.378]

Total iteration 1914, validation loss = 0.5769



Epoch 10:  55%|█████▌    | 110/200 [52:15<1:11:32, 47.69s/batch, loss=0.38] 

Total iteration 1919, validation loss = 0.5772



Epoch 10:  57%|█████▊    | 115/200 [54:37<1:08:22, 48.26s/batch, loss=0.534]

Total iteration 1924, validation loss = 0.5730



Epoch 10:  60%|█████▉    | 119/200 [57:07<18:59, 14.07s/batch, loss=0.413]  

Total iteration 1929, validation loss = 0.5661



Epoch 10:  62%|██████▎   | 125/200 [59:31<1:01:59, 49.60s/batch, loss=0.322]

Total iteration 1934, validation loss = 0.5622



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:  65%|██████▌   | 130/200 [1:01:54<57:04, 48.92s/batch, loss=0.28] 

Total iteration 1939, validation loss = 0.5597



Epoch 10:  68%|██████▊   | 135/200 [1:04:11<50:54, 46.99s/batch, loss=0.422]

Total iteration 1944, validation loss = 0.5562



Epoch 10:  70%|███████   | 140/200 [1:06:33<48:23, 48.40s/batch, loss=0.432]

Total iteration 1949, validation loss = 0.5549



Epoch 10:  72%|███████▎  | 145/200 [1:08:52<43:44, 47.72s/batch, loss=0.64] 

Total iteration 1954, validation loss = 0.5570



Epoch 10:  75%|███████▌  | 150/200 [1:11:12<39:46, 47.73s/batch, loss=0.603]

Total iteration 1959, validation loss = 0.5576



Epoch 10:  78%|███████▊  | 155/200 [1:13:32<35:47, 47.72s/batch, loss=0.508]

Total iteration 1964, validation loss = 0.5589



  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:  80%|████████  | 160/200 [1:15:52<31:46, 47.66s/batch, loss=0.276]

Total iteration 1969, validation loss = 0.5561



Epoch 10:  82%|████████▎ | 165/200 [1:18:10<27:40, 47.44s/batch, loss=0.308]

Total iteration 1974, validation loss = 0.5565



Epoch 10:  85%|████████▌ | 170/200 [1:20:28<23:35, 47.17s/batch, loss=0.323]

Total iteration 1979, validation loss = 0.5570



Epoch 10:  88%|████████▊ | 175/200 [1:22:50<20:03, 48.16s/batch, loss=0.227]

Total iteration 1984, validation loss = 0.5572



Epoch 10:  90%|█████████ | 180/200 [1:25:10<15:57, 47.88s/batch, loss=0.393]

Total iteration 1989, validation loss = 0.5548



Epoch 10:  92%|█████████▎| 185/200 [1:27:34<12:15, 49.01s/batch, loss=0.565]

Total iteration 1994, validation loss = 0.5572



Epoch 10:  95%|█████████▌| 190/200 [1:29:57<08:08, 48.81s/batch, loss=0.342]

Total iteration 1999, validation loss = 0.5581



Epoch 10:  98%|█████████▊| 195/200 [1:32:19<04:02, 48.57s/batch, loss=0.501]

Total iteration 2004, validation loss = 0.5571



Epoch 10: 100%|██████████| 200/200 [1:34:29<00:00, 28.35s/batch, loss=0.32] 

Total iteration 2009, validation loss = 0.5541






In [413]:
torch.save(model.state_dict(), '../runs/baseline/baseline_final_model.pt')

In [414]:
# Common errors and how to fix them:

# Error:
#   RuntimeError: running_mean should contain 1 elements not 8
# Fix: One of your batchnorm 3D parameter values is off

# Error:
#   RuntimeError: CUDA out of memory.
# Fix: Make the model / batch size smaller 
# First try to make batch size smaller. Will require longer training time possibly but does not decrease expressivity of model.
# If need to decrease complexity of model, 

# Error:
#   RuntimeError: Given groups=1, weight of size [1, 1, 1, 1, 1], expected input[8, 4, 5, 32, 32] to have 1 channels, but got 4 channels instead
# Fix: Wrong number of in_channels in self attention layer

# Experiment 1: 3D Self-Attention after 3D Conv Layers

In [9]:
# Make log directory and checkpoint directory (DIFFERENT DIRECTORY FROM BASELINE)
dir_nm = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific')).strftime('%Y-%m-%d_%H-%M-%S')
# dir_nm = "first_mini_c2fc2"
# log_dir = os.path.join('../runs/baseline', dir_nm) # running from this notebook since the other one gives cuda memory errors
log_dir = os.path.join('../runs/experiment_att', dir_nm)
os.mkdir(log_dir)
os.mkdir(os.path.join(log_dir, 'Checkpoints'))


# Model, optimizer, criterion
# model = baseline_3DCNN(in_num_ch=1)
model2 = selfattn_3DCNN(in_num_ch=1)
optimizer2 = optim.Adam(model2.parameters(), lr = 1e-4)
criterion2 = torch.nn.BCEWithLogitsLoss()

In [10]:
# Experimental model
train_loss_dict2, val_loss_dict2 = train(model2, optimizer2, criterion2, loader_train, loader_val, log_dir, device=device, epochs=10, val_every=5)

Epoch 1:   2%|▏         | 4/200 [00:28<16:18,  4.99s/batch, loss=0.669]

Total iteration 5, validation loss = 0.6871


Epoch 1:   2%|▎         | 5/200 [02:46<2:55:06, 53.88s/batch, loss=0.671]




Epoch 1:   4%|▍         | 9/200 [03:03<44:29, 13.98s/batch, loss=0.675]  

Total iteration 10, validation loss = 0.6864


Epoch 1:   5%|▌         | 10/200 [05:13<2:41:10, 50.90s/batch, loss=0.655]




Epoch 1:   7%|▋         | 14/200 [05:29<44:48, 14.45s/batch, loss=0.597]  

Total iteration 15, validation loss = 0.6857


Epoch 1:   8%|▊         | 15/200 [07:40<2:35:21, 50.39s/batch, loss=0.597]




Epoch 1:  10%|▉         | 19/200 [07:56<43:53, 14.55s/batch, loss=0.607]  

Total iteration 20, validation loss = 0.6846


Epoch 1:  10%|█         | 20/200 [10:07<2:31:17, 50.43s/batch, loss=0.713]




Epoch 1:  12%|█▏        | 24/200 [10:23<42:46, 14.58s/batch, loss=0.696]  

Total iteration 25, validation loss = 0.6833


Epoch 1:  12%|█▎        | 25/200 [12:34<2:26:11, 50.12s/batch, loss=0.659]




Epoch 1:  14%|█▍        | 29/200 [12:50<41:33, 14.58s/batch, loss=0.596]  

Total iteration 30, validation loss = 0.6817


Epoch 1:  15%|█▌        | 30/200 [15:00<2:22:18, 50.23s/batch, loss=0.642]




Epoch 1:  17%|█▋        | 34/200 [15:17<40:50, 14.76s/batch, loss=0.672]  

Total iteration 35, validation loss = 0.6791


Epoch 1:  18%|█▊        | 35/200 [17:27<2:18:06, 50.22s/batch, loss=0.604]




Epoch 1:  20%|█▉        | 39/200 [17:44<39:22, 14.68s/batch, loss=0.578]  

Total iteration 40, validation loss = 0.6757


Epoch 1:  20%|██        | 40/200 [19:52<2:11:44, 49.40s/batch, loss=0.582]




Epoch 1:  22%|██▏       | 44/200 [20:08<37:19, 14.36s/batch, loss=0.611]  

Total iteration 45, validation loss = 0.6713


Epoch 1:  22%|██▎       | 45/200 [22:15<2:07:08, 49.22s/batch, loss=0.569]




Epoch 1:  24%|██▍       | 49/200 [22:32<36:30, 14.51s/batch, loss=0.734]  

Total iteration 50, validation loss = 0.6655


Epoch 1:  25%|██▌       | 50/200 [24:40<2:03:24, 49.36s/batch, loss=0.578]




Epoch 1:  27%|██▋       | 54/200 [24:56<34:53, 14.34s/batch, loss=0.674]  

Total iteration 55, validation loss = 0.6589


Epoch 1:  28%|██▊       | 55/200 [27:09<2:02:45, 50.80s/batch, loss=0.592]




Epoch 1:  30%|██▉       | 59/200 [27:26<34:42, 14.77s/batch, loss=0.596]  

Total iteration 60, validation loss = 0.6551


Epoch 1:  30%|███       | 60/200 [29:30<1:53:02, 48.45s/batch, loss=0.594]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 1:  32%|███▏      | 64/200 [29:46<32:02, 14.14s/batch, loss=0.546]  

Total iteration 65, validation loss = 0.6456


Epoch 1:  32%|███▎      | 65/200 [31:50<1:47:49, 47.92s/batch, loss=0.544]




Epoch 1:  34%|███▍      | 69/200 [32:06<30:33, 13.99s/batch, loss=0.754]  

Total iteration 70, validation loss = 0.6345


Epoch 1:  35%|███▌      | 70/200 [34:07<1:41:53, 47.03s/batch, loss=0.559]




Epoch 1:  37%|███▋      | 74/200 [34:23<28:58, 13.80s/batch, loss=0.644]  

Total iteration 75, validation loss = 0.6231


Epoch 1:  38%|███▊      | 75/200 [36:30<1:41:06, 48.53s/batch, loss=0.667]




Epoch 1:  40%|███▉      | 79/200 [36:46<28:55, 14.34s/batch, loss=0.595]  

Total iteration 80, validation loss = 0.6123


Epoch 1:  40%|████      | 80/200 [38:53<1:37:20, 48.67s/batch, loss=0.581]




Epoch 1:  42%|████▏     | 84/200 [39:08<27:22, 14.16s/batch, loss=0.67]   

Total iteration 85, validation loss = 0.6066


Epoch 1:  42%|████▎     | 85/200 [41:16<1:34:10, 49.13s/batch, loss=0.688]




Epoch 1:  44%|████▍     | 89/200 [41:32<26:24, 14.28s/batch, loss=0.71]   

Total iteration 90, validation loss = 0.6009


Epoch 1:  45%|████▌     | 90/200 [43:40<1:30:19, 49.27s/batch, loss=0.562]




Epoch 1:  47%|████▋     | 94/200 [43:57<25:22, 14.36s/batch, loss=0.638]  

Total iteration 95, validation loss = 0.5961


Epoch 1:  48%|████▊     | 95/200 [46:06<1:27:02, 49.74s/batch, loss=0.621]




Epoch 1:  50%|████▉     | 99/200 [46:22<24:16, 14.42s/batch, loss=0.617]  

Total iteration 100, validation loss = 0.5929


Epoch 1:  50%|█████     | 100/200 [48:31<1:22:30, 49.50s/batch, loss=0.559]




Epoch 1:  52%|█████▏    | 104/200 [48:46<22:59, 14.37s/batch, loss=0.601]  

Total iteration 105, validation loss = 0.5930


Epoch 1:  52%|█████▎    | 105/200 [50:51<1:16:27, 48.29s/batch, loss=0.623]




Epoch 1:  55%|█████▍    | 109/200 [51:07<21:20, 14.07s/batch, loss=0.677]  

Total iteration 110, validation loss = 0.5908


Epoch 1:  55%|█████▌    | 110/200 [53:14<1:13:17, 48.86s/batch, loss=0.563]




Epoch 1:  57%|█████▋    | 114/200 [53:30<20:22, 14.21s/batch, loss=0.566]  

Total iteration 115, validation loss = 0.5896


Epoch 1:  57%|█████▊    | 115/200 [55:40<1:10:26, 49.73s/batch, loss=0.58]




Epoch 1:  60%|█████▉    | 119/200 [55:56<19:33, 14.48s/batch, loss=0.615]  

Total iteration 120, validation loss = 0.5873


Epoch 1:  60%|██████    | 120/200 [58:08<1:07:08, 50.36s/batch, loss=0.652]




Epoch 1:  62%|██████▏   | 124/200 [58:23<18:27, 14.57s/batch, loss=0.588]  

Total iteration 125, validation loss = 0.5848


Epoch 1:  62%|██████▎   | 125/200 [1:00:35<1:03:06, 50.49s/batch, loss=0.711]




Epoch 1:  64%|██████▍   | 129/200 [1:00:51<17:20, 14.65s/batch, loss=0.641]  

Total iteration 130, validation loss = 0.5875


Epoch 1:  65%|██████▌   | 130/200 [1:03:03<59:09, 50.71s/batch, loss=0.579]




Epoch 1:  67%|██████▋   | 134/200 [1:03:19<16:13, 14.75s/batch, loss=0.657]

Total iteration 135, validation loss = 0.5885


Epoch 1:  68%|██████▊   | 135/200 [1:05:28<54:02, 49.88s/batch, loss=0.542]




Epoch 1:  70%|██████▉   | 139/200 [1:05:44<14:43, 14.48s/batch, loss=0.685]

Total iteration 140, validation loss = 0.5918


Epoch 1:  70%|███████   | 140/200 [1:07:49<48:12, 48.22s/batch, loss=0.696]




Epoch 1:  72%|███████▏  | 144/200 [1:08:04<13:06, 14.04s/batch, loss=0.596]

Total iteration 145, validation loss = 0.5959


Epoch 1:  72%|███████▎  | 145/200 [1:10:06<43:09, 47.08s/batch, loss=0.666]




Epoch 1:  74%|███████▍  | 149/200 [1:10:22<11:43, 13.79s/batch, loss=0.633]

Total iteration 150, validation loss = 0.5933


Epoch 1:  75%|███████▌  | 150/200 [1:12:32<41:13, 49.46s/batch, loss=0.62] 




Epoch 1:  77%|███████▋  | 154/200 [1:12:48<11:09, 14.55s/batch, loss=0.788]

Total iteration 155, validation loss = 0.5953


Epoch 1:  78%|███████▊  | 155/200 [1:14:54<36:38, 48.86s/batch, loss=0.601]




Epoch 1:  80%|███████▉  | 159/200 [1:15:10<09:42, 14.21s/batch, loss=0.541]

Total iteration 160, validation loss = 0.6050


Epoch 1:  80%|████████  | 160/200 [1:17:20<33:12, 49.80s/batch, loss=0.52] 




Epoch 1:  82%|████████▏ | 164/200 [1:17:36<08:39, 14.44s/batch, loss=0.622]

Total iteration 165, validation loss = 0.6051


Epoch 1:  82%|████████▎ | 165/200 [1:19:44<28:41, 49.17s/batch, loss=0.694]




Epoch 1:  84%|████████▍ | 169/200 [1:20:00<07:24, 14.35s/batch, loss=0.703]

Total iteration 170, validation loss = 0.6025


Epoch 1:  85%|████████▌ | 170/200 [1:22:06<24:16, 48.53s/batch, loss=0.673]




Epoch 1:  87%|████████▋ | 174/200 [1:22:22<06:12, 14.34s/batch, loss=0.669]

Total iteration 175, validation loss = 0.5980


Epoch 1:  88%|████████▊ | 175/200 [1:24:27<20:06, 48.27s/batch, loss=0.658]




Epoch 1:  90%|████████▉ | 179/200 [1:24:44<04:58, 14.23s/batch, loss=0.574]

Total iteration 180, validation loss = 0.5917


Epoch 1:  90%|█████████ | 180/200 [1:26:48<16:04, 48.24s/batch, loss=0.594]




Epoch 1:  92%|█████████▏| 184/200 [1:27:05<03:47, 14.23s/batch, loss=0.601]

Total iteration 185, validation loss = 0.5877


Epoch 1:  92%|█████████▎| 185/200 [1:29:18<12:37, 50.51s/batch, loss=0.573]




Epoch 1:  94%|█████████▍| 189/200 [1:29:33<02:40, 14.62s/batch, loss=0.749]

Total iteration 190, validation loss = 0.5809


Epoch 1:  95%|█████████▌| 190/200 [1:31:40<08:10, 49.05s/batch, loss=0.637]




Epoch 1:  97%|█████████▋| 194/200 [1:31:56<01:26, 14.34s/batch, loss=0.536]

Total iteration 195, validation loss = 0.5782


Epoch 1:  98%|█████████▊| 195/200 [1:34:06<04:08, 49.61s/batch, loss=0.512]




Epoch 1: 100%|█████████▉| 199/200 [1:34:11<00:12, 12.71s/batch, loss=0.656]

Total iteration 200, validation loss = 0.5775


Epoch 1: 100%|██████████| 200/200 [1:36:08<00:00, 28.84s/batch, loss=0.636]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 2:   2%|▏         | 4/200 [00:28<16:29,  5.05s/batch, loss=0.68] 

Total iteration 206, validation loss = 0.5779


Epoch 2:   2%|▎         | 5/200 [02:31<2:37:54, 48.59s/batch, loss=0.655]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:   4%|▍         | 9/200 [02:47<40:29, 12.72s/batch, loss=0.647]  

Total iteration 211, validation loss = 0.5777


Epoch 2:   5%|▌         | 10/200 [04:47<2:27:42, 46.64s/batch, loss=0.646]




Epoch 2:   7%|▋         | 14/200 [05:03<41:48, 13.49s/batch, loss=0.613]  

Total iteration 216, validation loss = 0.5800


Epoch 2:   8%|▊         | 15/200 [07:06<2:26:25, 47.49s/batch, loss=0.586]




Epoch 2:  10%|▉         | 19/200 [07:23<41:53, 13.89s/batch, loss=0.664]  

Total iteration 221, validation loss = 0.5786


Epoch 2:  10%|█         | 20/200 [09:27<2:23:23, 47.79s/batch, loss=0.778]




Epoch 2:  12%|█▏        | 24/200 [09:43<40:58, 13.97s/batch, loss=0.535]  

Total iteration 226, validation loss = 0.5775


Epoch 2:  12%|█▎        | 25/200 [11:49<2:21:13, 48.42s/batch, loss=0.567]




Epoch 2:  14%|█▍        | 29/200 [12:05<40:14, 14.12s/batch, loss=0.571]  

Total iteration 231, validation loss = 0.5748


Epoch 2:  15%|█▌        | 30/200 [14:08<2:15:10, 47.71s/batch, loss=0.644]




Epoch 2:  17%|█▋        | 34/200 [14:24<38:38, 13.97s/batch, loss=0.511]  

Total iteration 236, validation loss = 0.5726


Epoch 2:  18%|█▊        | 35/200 [16:33<2:15:47, 49.38s/batch, loss=0.672]




Epoch 2:  20%|█▉        | 39/200 [16:49<38:37, 14.39s/batch, loss=0.646]  

Total iteration 241, validation loss = 0.5737


Epoch 2:  20%|██        | 40/200 [18:55<2:09:09, 48.44s/batch, loss=0.57]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:  22%|██▏       | 44/200 [19:11<37:03, 14.25s/batch, loss=0.587]  

Total iteration 246, validation loss = 0.5722


Epoch 2:  22%|██▎       | 45/200 [21:20<2:07:52, 49.50s/batch, loss=0.634]




Epoch 2:  24%|██▍       | 49/200 [21:36<36:14, 14.40s/batch, loss=0.778]  

Total iteration 251, validation loss = 0.5690


Epoch 2:  25%|██▌       | 50/200 [23:47<2:05:07, 50.05s/batch, loss=0.493]




Epoch 2:  27%|██▋       | 54/200 [24:03<35:19, 14.52s/batch, loss=0.546]  

Total iteration 256, validation loss = 0.5678


Epoch 2:  28%|██▊       | 55/200 [26:14<2:01:51, 50.42s/batch, loss=0.501]




Epoch 2:  30%|██▉       | 59/200 [26:30<34:17, 14.59s/batch, loss=0.513]  

Total iteration 261, validation loss = 0.5644


Epoch 2:  30%|███       | 60/200 [28:36<1:53:31, 48.65s/batch, loss=0.423]




Epoch 2:  32%|███▏      | 64/200 [28:53<32:30, 14.34s/batch, loss=0.539]  

Total iteration 266, validation loss = 0.5627


Epoch 2:  32%|███▎      | 65/200 [30:59<1:49:21, 48.60s/batch, loss=0.599]




Epoch 2:  34%|███▍      | 69/200 [31:15<30:56, 14.17s/batch, loss=0.765]  

Total iteration 271, validation loss = 0.5567


Epoch 2:  35%|███▌      | 70/200 [33:22<1:46:13, 49.03s/batch, loss=0.728]




Epoch 2:  37%|███▋      | 74/200 [33:38<30:01, 14.29s/batch, loss=0.653]  

Total iteration 276, validation loss = 0.5737


Epoch 2:  38%|███▊      | 75/200 [35:50<1:44:58, 50.39s/batch, loss=0.589]




Epoch 2:  40%|███▉      | 79/200 [36:07<29:40, 14.71s/batch, loss=0.619]  

Total iteration 281, validation loss = 0.5774


Epoch 2:  40%|████      | 80/200 [38:17<1:40:17, 50.15s/batch, loss=0.631]




Epoch 2:  42%|████▏     | 84/200 [38:34<28:33, 14.77s/batch, loss=0.546]  

Total iteration 286, validation loss = 0.5771


Epoch 2:  42%|████▎     | 85/200 [40:43<1:35:46, 49.97s/batch, loss=0.667]




Epoch 2:  44%|████▍     | 89/200 [40:59<26:47, 14.49s/batch, loss=0.638]  

Total iteration 291, validation loss = 0.5801


Epoch 2:  45%|████▌     | 90/200 [43:10<1:32:19, 50.36s/batch, loss=0.676]




Epoch 2:  47%|████▋     | 94/200 [43:26<25:46, 14.59s/batch, loss=0.585]  

Total iteration 296, validation loss = 0.5832


Epoch 2:  48%|████▊     | 95/200 [45:38<1:28:20, 50.48s/batch, loss=0.52]




Epoch 2:  50%|████▉     | 99/200 [45:54<24:42, 14.68s/batch, loss=0.643]  

Total iteration 301, validation loss = 0.5833


Epoch 2:  50%|█████     | 100/200 [48:05<1:24:15, 50.56s/batch, loss=0.515]




Epoch 2:  52%|█████▏    | 104/200 [48:22<23:27, 14.67s/batch, loss=0.622]  

Total iteration 306, validation loss = 0.5758


Epoch 2:  52%|█████▎    | 105/200 [50:28<1:17:20, 48.84s/batch, loss=0.599]




Epoch 2:  55%|█████▍    | 109/200 [50:43<21:33, 14.22s/batch, loss=0.709]  

Total iteration 311, validation loss = 0.5707


Epoch 2:  55%|█████▌    | 110/200 [52:51<1:13:33, 49.04s/batch, loss=0.548]




Epoch 2:  57%|█████▋    | 114/200 [53:08<20:40, 14.43s/batch, loss=0.538]  

Total iteration 316, validation loss = 0.5668


Epoch 2:  57%|█████▊    | 115/200 [55:18<1:10:36, 49.85s/batch, loss=0.642]




Epoch 2:  60%|█████▉    | 119/200 [55:33<19:30, 14.45s/batch, loss=0.531]  

Total iteration 321, validation loss = 0.5690


Epoch 2:  60%|██████    | 120/200 [57:48<1:08:32, 51.40s/batch, loss=0.565]




Epoch 2:  62%|██████▏   | 124/200 [58:04<18:48, 14.85s/batch, loss=0.715]  

Total iteration 326, validation loss = 0.5659


Epoch 2:  62%|██████▎   | 125/200 [1:00:07<1:00:09, 48.12s/batch, loss=0.611]




Epoch 2:  64%|██████▍   | 129/200 [1:00:24<16:52, 14.27s/batch, loss=0.519]  

Total iteration 331, validation loss = 0.5647


Epoch 2:  65%|██████▌   | 130/200 [1:02:32<57:09, 48.99s/batch, loss=0.489]




Epoch 2:  67%|██████▋   | 134/200 [1:02:48<15:54, 14.45s/batch, loss=0.499]

Total iteration 336, validation loss = 0.5665


Epoch 2:  68%|██████▊   | 135/200 [1:04:55<52:55, 48.85s/batch, loss=0.61] 




Epoch 2:  70%|██████▉   | 139/200 [1:05:11<14:28, 14.24s/batch, loss=0.642]

Total iteration 341, validation loss = 0.5670


Epoch 2:  70%|███████   | 140/200 [1:07:24<50:36, 50.60s/batch, loss=0.591]




Epoch 2:  72%|███████▏  | 144/200 [1:07:40<13:40, 14.65s/batch, loss=0.538]

Total iteration 346, validation loss = 0.5699


Epoch 2:  72%|███████▎  | 145/200 [1:09:51<46:18, 50.51s/batch, loss=0.534]




Epoch 2:  74%|███████▍  | 149/200 [1:10:07<12:26, 14.63s/batch, loss=0.722]

Total iteration 351, validation loss = 0.5692


Epoch 2:  75%|███████▌  | 150/200 [1:12:18<42:04, 50.48s/batch, loss=0.589]




Epoch 2:  77%|███████▋  | 154/200 [1:12:34<11:11, 14.61s/batch, loss=0.757]

Total iteration 356, validation loss = 0.5712


Epoch 2:  78%|███████▊  | 155/200 [1:14:40<36:36, 48.81s/batch, loss=0.595]




Epoch 2:  80%|███████▉  | 159/200 [1:14:56<09:41, 14.19s/batch, loss=0.665]

Total iteration 361, validation loss = 0.5715


Epoch 2:  80%|████████  | 160/200 [1:17:07<33:24, 50.12s/batch, loss=0.53] 




Epoch 2:  82%|████████▏ | 164/200 [1:17:23<08:43, 14.53s/batch, loss=0.556]

Total iteration 366, validation loss = 0.5720


Epoch 2:  82%|████████▎ | 165/200 [1:19:32<28:59, 49.71s/batch, loss=0.515]




Epoch 2:  84%|████████▍ | 169/200 [1:19:49<07:33, 14.64s/batch, loss=0.681]

Total iteration 371, validation loss = 0.5707


Epoch 2:  85%|████████▌ | 170/200 [1:21:54<24:14, 48.47s/batch, loss=0.562]




Epoch 2:  87%|████████▋ | 174/200 [1:22:11<06:14, 14.40s/batch, loss=0.595]

Total iteration 376, validation loss = 0.5735


Epoch 2:  88%|████████▊ | 175/200 [1:24:17<20:16, 48.67s/batch, loss=0.642]




Epoch 2:  90%|████████▉ | 179/200 [1:24:33<04:57, 14.17s/batch, loss=0.6]  

Total iteration 381, validation loss = 0.5708


Epoch 2:  90%|█████████ | 180/200 [1:26:36<15:52, 47.62s/batch, loss=0.572]




Epoch 2:  92%|█████████▏| 184/200 [1:26:53<03:46, 14.14s/batch, loss=0.585]

Total iteration 386, validation loss = 0.5692


Epoch 2:  92%|█████████▎| 185/200 [1:28:55<11:52, 47.50s/batch, loss=0.617]




Epoch 2:  94%|█████████▍| 189/200 [1:29:11<02:32, 13.90s/batch, loss=0.585]

Total iteration 391, validation loss = 0.5672


Epoch 2:  95%|█████████▌| 190/200 [1:31:19<08:09, 48.90s/batch, loss=0.575]




Epoch 2:  97%|█████████▋| 194/200 [1:31:35<01:25, 14.24s/batch, loss=0.692]

Total iteration 396, validation loss = 0.5653


Epoch 2:  98%|█████████▊| 195/200 [1:33:40<04:02, 48.40s/batch, loss=0.618]




Epoch 2: 100%|█████████▉| 199/200 [1:33:46<00:12, 12.42s/batch, loss=0.655]

Total iteration 401, validation loss = 0.5636


Epoch 2: 100%|██████████| 200/200 [1:35:41<00:00, 28.71s/batch, loss=0.574]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 3:   2%|▏         | 4/200 [00:28<16:33,  5.07s/batch, loss=0.537]

Total iteration 407, validation loss = 0.5628


Epoch 3:   2%|▎         | 5/200 [02:28<2:35:01, 47.70s/batch, loss=0.598]




Epoch 3:   4%|▍         | 9/200 [02:45<40:21, 12.68s/batch, loss=0.676]  

Total iteration 412, validation loss = 0.5618


Epoch 3:   5%|▌         | 10/200 [04:51<2:33:35, 48.50s/batch, loss=0.476]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:   7%|▋         | 14/200 [05:07<43:20, 13.98s/batch, loss=0.452]

Total iteration 417, validation loss = 0.5617


Epoch 3:   8%|▊         | 15/200 [07:15<2:31:59, 49.29s/batch, loss=0.673]




Epoch 3:  10%|▉         | 19/200 [07:32<43:44, 14.50s/batch, loss=0.685]  

Total iteration 422, validation loss = 0.5550


Epoch 3:  10%|█         | 20/200 [09:39<2:26:41, 48.90s/batch, loss=0.706]




Epoch 3:  12%|█▏        | 24/200 [09:55<41:45, 14.23s/batch, loss=0.581]  

Total iteration 427, validation loss = 0.5544


Epoch 3:  12%|█▎        | 25/200 [11:59<2:20:28, 48.16s/batch, loss=0.604]




Epoch 3:  14%|█▍        | 29/200 [12:15<40:08, 14.08s/batch, loss=0.675]  

Total iteration 432, validation loss = 0.5568


Epoch 3:  15%|█▌        | 30/200 [14:21<2:17:17, 48.46s/batch, loss=0.51]




Epoch 3:  17%|█▋        | 34/200 [14:38<39:21, 14.23s/batch, loss=0.51]   

Total iteration 437, validation loss = 0.5583


Epoch 3:  18%|█▊        | 35/200 [16:43<2:13:32, 48.56s/batch, loss=0.728]




Epoch 3:  20%|█▉        | 39/200 [17:00<38:31, 14.36s/batch, loss=0.629]  

Total iteration 442, validation loss = 0.5548


Epoch 3:  20%|██        | 40/200 [19:06<2:09:15, 48.47s/batch, loss=0.482]




Epoch 3:  22%|██▏       | 44/200 [19:22<36:41, 14.11s/batch, loss=0.609]  

Total iteration 447, validation loss = 0.5529


Epoch 3:  22%|██▎       | 45/200 [21:25<2:03:47, 47.92s/batch, loss=0.563]




Epoch 3:  24%|██▍       | 49/200 [21:42<35:17, 14.02s/batch, loss=0.546]  

Total iteration 452, validation loss = 0.5525


Epoch 3:  25%|██▌       | 50/200 [23:56<2:07:22, 50.95s/batch, loss=0.558]




Epoch 3:  27%|██▋       | 54/200 [24:12<35:49, 14.72s/batch, loss=0.536]  

Total iteration 457, validation loss = 0.5534


Epoch 3:  28%|██▊       | 55/200 [26:19<1:58:51, 49.18s/batch, loss=0.396]




Epoch 3:  30%|██▉       | 59/200 [26:35<33:34, 14.28s/batch, loss=0.57]   

Total iteration 462, validation loss = 0.5554


Epoch 3:  30%|███       | 60/200 [28:46<1:57:18, 50.28s/batch, loss=0.633]




Epoch 3:  32%|███▏      | 64/200 [29:02<33:09, 14.63s/batch, loss=0.55]   

Total iteration 467, validation loss = 0.5593


Epoch 3:  32%|███▎      | 65/200 [31:09<1:50:14, 49.00s/batch, loss=0.521]




Epoch 3:  34%|███▍      | 69/200 [31:25<31:07, 14.26s/batch, loss=0.813]  

Total iteration 472, validation loss = 0.5580


Epoch 3:  35%|███▌      | 70/200 [33:30<1:45:11, 48.55s/batch, loss=0.587]




Epoch 3:  37%|███▋      | 74/200 [33:46<29:42, 14.14s/batch, loss=0.502]  

Total iteration 477, validation loss = 0.5611


Epoch 3:  38%|███▊      | 75/200 [35:56<1:43:35, 49.72s/batch, loss=0.566]




Epoch 3:  40%|███▉      | 79/200 [36:12<29:05, 14.43s/batch, loss=0.598]  

Total iteration 482, validation loss = 0.5578


Epoch 3:  40%|████      | 80/200 [38:22<1:39:38, 49.82s/batch, loss=0.525]




Epoch 3:  42%|████▏     | 84/200 [38:38<27:55, 14.44s/batch, loss=0.429]  

Total iteration 487, validation loss = 0.5500


Epoch 3:  42%|████▎     | 85/200 [40:40<1:31:31, 47.75s/batch, loss=0.5]




Epoch 3:  44%|████▍     | 89/200 [40:57<25:50, 13.97s/batch, loss=0.483]  

Total iteration 492, validation loss = 0.5469


Epoch 3:  45%|████▌     | 90/200 [43:01<1:27:51, 47.92s/batch, loss=0.742]




Epoch 3:  47%|████▋     | 94/200 [43:18<24:58, 14.14s/batch, loss=0.641]  

Total iteration 497, validation loss = 0.5443


Epoch 3:  48%|████▊     | 95/200 [45:21<1:23:38, 47.79s/batch, loss=0.644]




Epoch 3:  50%|████▉     | 99/200 [45:37<23:35, 14.02s/batch, loss=0.468]  

Total iteration 502, validation loss = 0.5432


Epoch 3:  50%|█████     | 100/200 [47:45<1:21:41, 49.02s/batch, loss=0.775]




Epoch 3:  52%|█████▏    | 104/200 [48:01<22:48, 14.26s/batch, loss=0.495]  

Total iteration 507, validation loss = 0.5427


Epoch 3:  52%|█████▎    | 105/200 [50:11<1:18:43, 49.72s/batch, loss=0.572]




Epoch 3:  55%|█████▍    | 109/200 [50:27<22:01, 14.52s/batch, loss=0.58]   

Total iteration 512, validation loss = 0.5397


Epoch 3:  55%|█████▌    | 110/200 [52:35<1:13:59, 49.32s/batch, loss=0.477]




Epoch 3:  57%|█████▋    | 114/200 [52:51<20:34, 14.35s/batch, loss=0.72]   

Total iteration 517, validation loss = 0.5398


Epoch 3:  57%|█████▊    | 115/200 [54:52<1:06:49, 47.17s/batch, loss=0.557]




Epoch 3:  60%|█████▉    | 119/200 [55:08<18:39, 13.82s/batch, loss=0.423]  

Total iteration 522, validation loss = 0.5404


Epoch 3:  60%|██████    | 120/200 [57:10<1:02:55, 47.20s/batch, loss=0.497]




Epoch 3:  62%|██████▏   | 124/200 [57:27<17:30, 13.82s/batch, loss=0.417]  

Total iteration 527, validation loss = 0.5429


Epoch 3:  62%|██████▎   | 125/200 [59:31<59:49, 47.86s/batch, loss=0.835]




Epoch 3:  64%|██████▍   | 129/200 [59:48<16:45, 14.16s/batch, loss=0.556]

Total iteration 532, validation loss = 0.5439


Epoch 3:  65%|██████▌   | 130/200 [1:01:48<54:48, 46.98s/batch, loss=0.486]




Epoch 3:  67%|██████▋   | 134/200 [1:02:05<15:11, 13.81s/batch, loss=0.929]

Total iteration 537, validation loss = 0.5501


Epoch 3:  68%|██████▊   | 135/200 [1:04:17<54:26, 50.26s/batch, loss=0.505]




Epoch 3:  70%|██████▉   | 139/200 [1:04:33<14:48, 14.56s/batch, loss=0.665]

Total iteration 542, validation loss = 0.5677


Epoch 3:  70%|███████   | 140/200 [1:06:41<49:33, 49.55s/batch, loss=0.653]




Epoch 3:  72%|███████▏  | 144/200 [1:06:57<13:27, 14.42s/batch, loss=0.703]

Total iteration 547, validation loss = 0.5762


Epoch 3:  72%|███████▎  | 145/200 [1:09:10<46:30, 50.73s/batch, loss=0.535]




Epoch 3:  74%|███████▍  | 149/200 [1:09:27<12:30, 14.71s/batch, loss=0.644]

Total iteration 552, validation loss = 0.5787


Epoch 3:  75%|███████▌  | 150/200 [1:11:38<42:05, 50.51s/batch, loss=0.679]




Epoch 3:  77%|███████▋  | 154/200 [1:11:55<11:18, 14.75s/batch, loss=0.683]

Total iteration 557, validation loss = 0.5748


Epoch 3:  78%|███████▊  | 155/200 [1:14:05<37:52, 50.51s/batch, loss=0.408]




Epoch 3:  80%|███████▉  | 159/200 [1:14:21<09:59, 14.63s/batch, loss=0.625]

Total iteration 562, validation loss = 0.5674


Epoch 3:  80%|████████  | 160/200 [1:16:30<33:09, 49.73s/batch, loss=0.589]




Epoch 3:  82%|████████▏ | 164/200 [1:16:47<08:46, 14.64s/batch, loss=0.525]

Total iteration 567, validation loss = 0.5660


Epoch 3:  82%|████████▎ | 165/200 [1:18:55<28:56, 49.61s/batch, loss=0.669]




Epoch 3:  84%|████████▍ | 169/200 [1:19:11<07:26, 14.40s/batch, loss=0.589]

Total iteration 572, validation loss = 0.5670


Epoch 3:  85%|████████▌ | 170/200 [1:21:21<24:51, 49.71s/batch, loss=0.53] 




Epoch 3:  87%|████████▋ | 174/200 [1:21:37<06:16, 14.47s/batch, loss=0.723]

Total iteration 577, validation loss = 0.5655


Epoch 3:  88%|████████▊ | 175/200 [1:23:52<21:25, 51.41s/batch, loss=0.557]




Epoch 3:  90%|████████▉ | 179/200 [1:24:08<05:12, 14.89s/batch, loss=0.521]

Total iteration 582, validation loss = 0.5661


Epoch 3:  90%|█████████ | 180/200 [1:26:12<16:08, 48.41s/batch, loss=0.637]




Epoch 3:  92%|█████████▏| 184/200 [1:26:28<03:45, 14.11s/batch, loss=0.544]

Total iteration 587, validation loss = 0.5678


Epoch 3:  92%|█████████▎| 185/200 [1:28:36<12:17, 49.19s/batch, loss=0.537]




Epoch 3:  94%|█████████▍| 189/200 [1:28:52<02:37, 14.32s/batch, loss=0.508]

Total iteration 592, validation loss = 0.5648


Epoch 3:  95%|█████████▌| 190/200 [1:30:59<08:08, 48.83s/batch, loss=0.447]




Epoch 3:  97%|█████████▋| 194/200 [1:31:14<01:25, 14.22s/batch, loss=0.472]

Total iteration 597, validation loss = 0.5610


Epoch 3:  98%|█████████▊| 195/200 [1:33:24<04:08, 49.68s/batch, loss=0.477]




Epoch 3: 100%|█████████▉| 199/200 [1:33:30<00:12, 12.73s/batch, loss=0.701]

Total iteration 602, validation loss = 0.5601


Epoch 3: 100%|██████████| 200/200 [1:35:27<00:00, 28.64s/batch, loss=0.574]
  0%|          | 0/200 [00:00<?, ?batch/s]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 4:   2%|▏         | 4/200 [00:28<16:31,  5.06s/batch, loss=0.461]

Total iteration 608, validation loss = 0.5575


Epoch 4:   2%|▎         | 5/200 [02:34<2:42:03, 49.87s/batch, loss=0.38]




Epoch 4:   4%|▍         | 9/200 [02:51<41:42, 13.10s/batch, loss=0.675]  

Total iteration 613, validation loss = 0.5520


Epoch 4:   5%|▌         | 10/200 [04:57<2:34:58, 48.94s/batch, loss=0.594]




Epoch 4:   7%|▋         | 14/200 [05:13<43:32, 14.04s/batch, loss=0.571]  

Total iteration 618, validation loss = 0.5491


Epoch 4:   8%|▊         | 15/200 [07:18<2:28:53, 48.29s/batch, loss=0.546]




Epoch 4:  10%|▉         | 19/200 [07:34<42:28, 14.08s/batch, loss=0.593]  

Total iteration 623, validation loss = 0.5477


Epoch 4:  10%|█         | 20/200 [09:46<2:30:41, 50.23s/batch, loss=0.584]




Epoch 4:  12%|█▏        | 24/200 [10:03<43:06, 14.70s/batch, loss=0.529]  

Total iteration 628, validation loss = 0.5456


Epoch 4:  12%|█▎        | 25/200 [12:10<2:23:58, 49.37s/batch, loss=0.46]




Epoch 4:  14%|█▍        | 29/200 [12:27<41:04, 14.41s/batch, loss=0.566]  

Total iteration 633, validation loss = 0.5455


Epoch 4:  15%|█▌        | 30/200 [14:30<2:15:52, 47.96s/batch, loss=0.413]




Epoch 4:  17%|█▋        | 34/200 [14:46<38:45, 14.01s/batch, loss=0.538]  

Total iteration 638, validation loss = 0.5445


Epoch 4:  18%|█▊        | 35/200 [16:51<2:12:25, 48.15s/batch, loss=0.511]




Epoch 4:  20%|█▉        | 39/200 [17:08<37:50, 14.10s/batch, loss=0.383]  

Total iteration 643, validation loss = 0.5441


Epoch 4:  20%|██        | 40/200 [19:15<2:10:14, 48.84s/batch, loss=0.501]




Epoch 4:  22%|██▏       | 44/200 [19:31<36:58, 14.22s/batch, loss=0.62]   

Total iteration 648, validation loss = 0.5451


Epoch 4:  22%|██▎       | 45/200 [21:39<2:07:29, 49.35s/batch, loss=0.689]




Epoch 4:  24%|██▍       | 49/200 [21:56<36:25, 14.47s/batch, loss=0.502]  

Total iteration 653, validation loss = 0.5469


Epoch 4:  25%|██▌       | 50/200 [24:01<2:01:35, 48.63s/batch, loss=0.505]




Epoch 4:  27%|██▋       | 54/200 [24:17<34:28, 14.17s/batch, loss=0.451]  

Total iteration 658, validation loss = 0.5464


Epoch 4:  28%|██▊       | 55/200 [26:15<1:51:41, 46.22s/batch, loss=0.562]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 663, validation loss = 0.5447


Epoch 4:  30%|███       | 60/200 [28:33<1:49:12, 46.80s/batch, loss=0.339]




Epoch 4:  32%|███▏      | 64/200 [28:49<31:08, 13.74s/batch, loss=0.498]  

Total iteration 668, validation loss = 0.5426


Epoch 4:  32%|███▎      | 65/200 [30:54<1:47:43, 47.87s/batch, loss=0.535]




Epoch 4:  34%|███▍      | 69/200 [31:09<30:31, 13.98s/batch, loss=0.514]  

Total iteration 673, validation loss = 0.5453


Epoch 4:  35%|███▌      | 70/200 [33:15<1:44:28, 48.22s/batch, loss=0.771]




Epoch 4:  37%|███▋      | 74/200 [33:32<30:14, 14.40s/batch, loss=1.1]    

Total iteration 678, validation loss = 0.5452


Epoch 4:  38%|███▊      | 75/200 [35:36<1:40:21, 48.18s/batch, loss=0.448]




Epoch 4:  40%|███▉      | 79/200 [35:52<28:25, 14.10s/batch, loss=0.332]  

Total iteration 683, validation loss = 0.5446


Epoch 4:  40%|████      | 80/200 [38:01<1:38:54, 49.45s/batch, loss=0.866]




Epoch 4:  42%|████▏     | 84/200 [38:17<27:48, 14.39s/batch, loss=0.577]  

Total iteration 688, validation loss = 0.5454


Epoch 4:  42%|████▎     | 85/200 [40:23<1:32:59, 48.52s/batch, loss=0.483]




Epoch 4:  44%|████▍     | 89/200 [40:39<26:13, 14.18s/batch, loss=0.474]  

Total iteration 693, validation loss = 0.5435


Epoch 4:  45%|████▌     | 90/200 [42:46<1:29:42, 48.93s/batch, loss=0.608]




Epoch 4:  47%|████▋     | 94/200 [43:03<25:13, 14.28s/batch, loss=0.58]   

Total iteration 698, validation loss = 0.5438


Epoch 4:  48%|████▊     | 95/200 [45:03<1:22:02, 46.88s/batch, loss=0.746]




Epoch 4:  50%|████▉     | 99/200 [45:19<23:09, 13.76s/batch, loss=0.655]  

Total iteration 703, validation loss = 0.5487


Epoch 4:  50%|█████     | 100/200 [47:25<1:20:44, 48.45s/batch, loss=0.542]




Epoch 4:  52%|█████▏    | 104/200 [47:41<22:38, 14.15s/batch, loss=0.497]  

Total iteration 708, validation loss = 0.5533


Epoch 4:  52%|█████▎    | 105/200 [49:50<1:18:05, 49.32s/batch, loss=0.516]




Epoch 4:  55%|█████▍    | 109/200 [50:07<21:59, 14.50s/batch, loss=0.762]  

Total iteration 713, validation loss = 0.5560


Epoch 4:  55%|█████▌    | 110/200 [52:08<1:11:06, 47.40s/batch, loss=0.589]




Epoch 4:  57%|█████▋    | 114/200 [52:25<20:13, 14.10s/batch, loss=0.543]  

Total iteration 718, validation loss = 0.5551


Epoch 4:  57%|█████▊    | 115/200 [54:29<1:07:43, 47.80s/batch, loss=0.57]




Epoch 4:  60%|█████▉    | 119/200 [54:45<18:53, 14.00s/batch, loss=0.527]  

Total iteration 723, validation loss = 0.5533


Epoch 4:  60%|██████    | 120/200 [56:49<1:04:02, 48.03s/batch, loss=0.436]




Epoch 4:  62%|██████▏   | 124/200 [57:07<18:07, 14.30s/batch, loss=0.394]  

Total iteration 728, validation loss = 0.5488


Epoch 4:  62%|██████▎   | 125/200 [59:12<1:00:39, 48.53s/batch, loss=0.679]




Epoch 4:  64%|██████▍   | 129/200 [59:28<16:45, 14.17s/batch, loss=0.486]  

Total iteration 733, validation loss = 0.5468


Epoch 4:  65%|██████▌   | 130/200 [1:01:36<57:22, 49.18s/batch, loss=0.42]




Epoch 4:  67%|██████▋   | 134/200 [1:01:52<15:44, 14.31s/batch, loss=0.558]

Total iteration 738, validation loss = 0.5439


Epoch 4:  68%|██████▊   | 135/200 [1:03:57<52:23, 48.36s/batch, loss=0.692]




Epoch 4:  70%|██████▉   | 139/200 [1:04:13<14:20, 14.11s/batch, loss=0.527]

Total iteration 743, validation loss = 0.5431


Epoch 4:  70%|███████   | 140/200 [1:06:19<48:14, 48.25s/batch, loss=0.497]




Epoch 4:  72%|███████▏  | 144/200 [1:06:36<13:17, 14.25s/batch, loss=0.516]

Total iteration 748, validation loss = 0.5402


Epoch 4:  72%|███████▎  | 145/200 [1:08:40<44:00, 48.02s/batch, loss=0.486]




Epoch 4:  74%|███████▍  | 149/200 [1:08:57<12:06, 14.24s/batch, loss=0.569]

Total iteration 753, validation loss = 0.5427


Epoch 4:  75%|███████▌  | 150/200 [1:11:00<39:53, 47.86s/batch, loss=0.681]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 4:  77%|███████▋  | 154/200 [1:11:16<10:42, 13.98s/batch, loss=0.509]

Total iteration 758, validation loss = 0.5432


Epoch 4:  78%|███████▊  | 155/200 [1:13:27<37:25, 49.89s/batch, loss=0.564]




Epoch 4:  80%|███████▉  | 159/200 [1:13:43<09:56, 14.54s/batch, loss=0.507]

Total iteration 763, validation loss = 0.5378


Epoch 4:  80%|████████  | 160/200 [1:15:51<32:56, 49.40s/batch, loss=0.44] 




Epoch 4:  82%|████████▏ | 164/200 [1:16:07<08:36, 14.35s/batch, loss=0.786]

Total iteration 768, validation loss = 0.5365


Epoch 4:  82%|████████▎ | 165/200 [1:18:14<28:25, 48.74s/batch, loss=0.533]




Epoch 4:  84%|████████▍ | 169/200 [1:18:29<07:20, 14.20s/batch, loss=0.675]

Total iteration 773, validation loss = 0.5369


Epoch 4:  85%|████████▌ | 170/200 [1:20:32<23:46, 47.54s/batch, loss=0.708]




Epoch 4:  87%|████████▋ | 174/200 [1:20:49<06:04, 14.03s/batch, loss=0.542]

Total iteration 778, validation loss = 0.5369


Epoch 4:  88%|████████▊ | 175/200 [1:23:00<20:48, 49.94s/batch, loss=0.586]




Epoch 4:  90%|████████▉ | 179/200 [1:23:17<05:07, 14.63s/batch, loss=0.411]

Total iteration 783, validation loss = 0.5347


Epoch 4:  90%|█████████ | 180/200 [1:25:23<16:20, 49.05s/batch, loss=0.517]




Epoch 4:  92%|█████████▏| 184/200 [1:25:40<03:49, 14.33s/batch, loss=0.538]

Total iteration 788, validation loss = 0.5340


Epoch 4:  92%|█████████▎| 185/200 [1:27:52<12:36, 50.44s/batch, loss=0.439]




Epoch 4:  94%|█████████▍| 189/200 [1:28:08<02:41, 14.68s/batch, loss=0.436]

Total iteration 793, validation loss = 0.5352


Epoch 4:  95%|█████████▌| 190/200 [1:30:11<08:00, 48.09s/batch, loss=0.545]




Epoch 4:  97%|█████████▋| 194/200 [1:30:28<01:24, 14.07s/batch, loss=0.751]

Total iteration 798, validation loss = 0.5375


Epoch 4:  98%|█████████▊| 195/200 [1:32:35<04:04, 48.84s/batch, loss=0.509]




Epoch 4: 100%|█████████▉| 199/200 [1:32:40<00:12, 12.54s/batch, loss=0.473]

Total iteration 803, validation loss = 0.5438


Epoch 4: 100%|██████████| 200/200 [1:34:39<00:00, 28.40s/batch, loss=0.533]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 5:   2%|▏         | 4/200 [00:28<16:37,  5.09s/batch, loss=0.498]

Total iteration 809, validation loss = 0.5500


Epoch 5:   2%|▎         | 5/200 [02:37<2:44:41, 50.68s/batch, loss=0.497]




Epoch 5:   4%|▍         | 9/200 [02:53<42:26, 13.33s/batch, loss=0.737]  

Total iteration 814, validation loss = 0.5488


Epoch 5:   5%|▌         | 10/200 [05:00<2:36:06, 49.30s/batch, loss=0.583]




Epoch 5:   7%|▋         | 14/200 [05:17<44:19, 14.30s/batch, loss=0.467]  

Total iteration 819, validation loss = 0.5468


Epoch 5:   8%|▊         | 15/200 [07:27<2:34:13, 50.02s/batch, loss=0.744]




Epoch 5:  10%|▉         | 19/200 [07:44<44:15, 14.67s/batch, loss=0.483]  

Total iteration 824, validation loss = 0.5449


Epoch 5:  10%|█         | 20/200 [09:43<2:20:21, 46.79s/batch, loss=0.42]




Epoch 5:  12%|█▏        | 24/200 [09:59<40:21, 13.76s/batch, loss=0.732]  

Total iteration 829, validation loss = 0.5403


Epoch 5:  12%|█▎        | 25/200 [12:03<2:18:48, 47.59s/batch, loss=0.507]




Epoch 5:  14%|█▍        | 29/200 [12:20<40:11, 14.10s/batch, loss=0.531]  

Total iteration 834, validation loss = 0.5386


Epoch 5:  15%|█▌        | 30/200 [14:27<2:18:16, 48.80s/batch, loss=0.297]




Epoch 5:  17%|█▋        | 34/200 [14:43<39:23, 14.24s/batch, loss=0.459]  

Total iteration 839, validation loss = 0.5404


Epoch 5:  18%|█▊        | 35/200 [16:55<2:18:12, 50.26s/batch, loss=0.618]




Epoch 5:  20%|█▉        | 39/200 [17:11<39:02, 14.55s/batch, loss=0.512]  

Total iteration 844, validation loss = 0.5409


Epoch 5:  20%|██        | 40/200 [19:18<2:11:17, 49.23s/batch, loss=0.353]




Epoch 5:  22%|██▏       | 44/200 [19:34<37:17, 14.34s/batch, loss=0.44]   

Total iteration 849, validation loss = 0.5417


Epoch 5:  22%|██▎       | 45/200 [21:41<2:06:33, 48.99s/batch, loss=0.532]




Epoch 5:  24%|██▍       | 49/200 [21:57<35:53, 14.26s/batch, loss=0.556]  

Total iteration 854, validation loss = 0.5428


Epoch 5:  25%|██▌       | 50/200 [24:03<2:00:55, 48.37s/batch, loss=0.649]




Epoch 5:  27%|██▋       | 54/200 [24:18<34:20, 14.11s/batch, loss=0.468]  

Total iteration 859, validation loss = 0.5474


Epoch 5:  28%|██▊       | 55/200 [26:28<1:59:48, 49.57s/batch, loss=0.37]




Epoch 5:  30%|██▉       | 59/200 [26:45<34:16, 14.59s/batch, loss=0.661]  

Total iteration 864, validation loss = 0.5460


Epoch 5:  30%|███       | 60/200 [28:57<1:58:01, 50.58s/batch, loss=0.519]




Epoch 5:  32%|███▏      | 64/200 [29:13<33:30, 14.78s/batch, loss=0.383]  

Total iteration 869, validation loss = 0.5403


Epoch 5:  32%|███▎      | 65/200 [31:19<1:49:57, 48.87s/batch, loss=0.475]




Epoch 5:  34%|███▍      | 69/200 [31:35<31:04, 14.23s/batch, loss=0.448]  

Total iteration 874, validation loss = 0.5382


Epoch 5:  35%|███▌      | 70/200 [33:36<1:41:48, 46.99s/batch, loss=0.541]




Epoch 5:  37%|███▋      | 74/200 [33:52<29:13, 13.92s/batch, loss=0.558]  

Total iteration 879, validation loss = 0.5382


Epoch 5:  38%|███▊      | 75/200 [35:56<1:39:11, 47.61s/batch, loss=0.511]




Epoch 5:  40%|███▉      | 79/200 [36:12<28:04, 13.92s/batch, loss=0.468]  

Total iteration 884, validation loss = 0.5404


Epoch 5:  40%|████      | 80/200 [38:18<1:36:37, 48.31s/batch, loss=0.786]




Epoch 5:  42%|████▏     | 84/200 [38:34<27:17, 14.12s/batch, loss=0.597]  

Total iteration 889, validation loss = 0.5473


Epoch 5:  42%|████▎     | 85/200 [40:44<1:35:25, 49.79s/batch, loss=0.57]




Epoch 5:  44%|████▍     | 89/200 [41:01<27:09, 14.68s/batch, loss=0.699]  

Total iteration 894, validation loss = 0.5486


Epoch 5:  45%|████▌     | 90/200 [43:10<1:31:21, 49.83s/batch, loss=0.622]




Epoch 5:  47%|████▋     | 94/200 [43:27<25:52, 14.65s/batch, loss=0.562]  

Total iteration 899, validation loss = 0.5469


Epoch 5:  48%|████▊     | 95/200 [45:30<1:24:01, 48.02s/batch, loss=0.364]




Epoch 5:  50%|████▉     | 99/200 [45:46<23:37, 14.03s/batch, loss=0.469]  

Total iteration 904, validation loss = 0.5446


Epoch 5:  50%|█████     | 100/200 [47:43<1:16:21, 45.81s/batch, loss=0.484]




Epoch 5:  52%|█████▏    | 104/200 [47:59<21:38, 13.52s/batch, loss=0.372]  

Total iteration 909, validation loss = 0.5453


Epoch 5:  52%|█████▎    | 105/200 [49:59<1:13:21, 46.33s/batch, loss=0.673]




Epoch 5:  55%|█████▍    | 109/200 [50:15<20:40, 13.63s/batch, loss=0.379]  

Total iteration 914, validation loss = 0.5447


Epoch 5:  55%|█████▌    | 110/200 [52:20<1:11:29, 47.66s/batch, loss=0.51]




Epoch 5:  57%|█████▋    | 114/200 [52:36<20:15, 14.14s/batch, loss=0.593]  

Total iteration 919, validation loss = 0.5427


Epoch 5:  57%|█████▊    | 115/200 [54:46<1:10:16, 49.61s/batch, loss=0.559]




Epoch 5:  60%|█████▉    | 119/200 [55:03<19:35, 14.51s/batch, loss=0.83]   

Total iteration 924, validation loss = 0.5406


Epoch 5:  60%|██████    | 120/200 [57:06<1:04:10, 48.13s/batch, loss=0.61]




Epoch 5:  62%|██████▏   | 124/200 [57:23<18:02, 14.24s/batch, loss=0.553]  

Total iteration 929, validation loss = 0.5416


Epoch 5:  62%|██████▎   | 125/200 [59:34<1:02:25, 49.94s/batch, loss=0.709]




Epoch 5:  64%|██████▍   | 129/200 [59:50<17:08, 14.49s/batch, loss=0.4]    

Total iteration 934, validation loss = 0.5416


Epoch 5:  65%|██████▌   | 130/200 [1:01:54<56:19, 48.27s/batch, loss=0.357]




Epoch 5:  67%|██████▋   | 134/200 [1:02:10<15:31, 14.12s/batch, loss=0.382]

Total iteration 939, validation loss = 0.5423


Epoch 5:  68%|██████▊   | 135/200 [1:04:13<51:40, 47.70s/batch, loss=0.519]




Epoch 5:  70%|██████▉   | 139/200 [1:04:30<14:15, 14.03s/batch, loss=0.544]

Total iteration 944, validation loss = 0.5442


Epoch 5:  70%|███████   | 140/200 [1:06:36<48:23, 48.39s/batch, loss=0.383]




Epoch 5:  72%|███████▏  | 144/200 [1:06:53<13:21, 14.31s/batch, loss=0.626]

Total iteration 949, validation loss = 0.5445


Epoch 5:  72%|███████▎  | 145/200 [1:08:57<43:59, 48.00s/batch, loss=0.717]




Epoch 5:  74%|███████▍  | 149/200 [1:09:13<11:55, 14.03s/batch, loss=0.446]

Total iteration 954, validation loss = 0.5491


Epoch 5:  75%|███████▌  | 150/200 [1:11:20<40:37, 48.75s/batch, loss=0.484]




Epoch 5:  77%|███████▋  | 154/200 [1:11:36<10:57, 14.29s/batch, loss=0.531]

Total iteration 959, validation loss = 0.5495


Epoch 5:  78%|███████▊  | 155/200 [1:13:46<37:29, 50.00s/batch, loss=0.555]




Epoch 5:  80%|███████▉  | 159/200 [1:14:02<09:54, 14.49s/batch, loss=0.489]

Total iteration 964, validation loss = 0.5442


Epoch 5:  80%|████████  | 160/200 [1:16:06<32:06, 48.17s/batch, loss=0.462]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 5:  82%|████████▏ | 164/200 [1:16:22<08:28, 14.13s/batch, loss=0.549]

Total iteration 969, validation loss = 0.5372


Epoch 5:  82%|████████▎ | 165/200 [1:18:34<29:17, 50.20s/batch, loss=0.726]




Epoch 5:  84%|████████▍ | 169/200 [1:18:51<07:34, 14.65s/batch, loss=0.494]

Total iteration 974, validation loss = 0.5354


Epoch 5:  85%|████████▌ | 170/200 [1:20:55<24:06, 48.23s/batch, loss=0.805]




Epoch 5:  87%|████████▋ | 174/200 [1:21:11<06:08, 14.15s/batch, loss=0.682]

Total iteration 979, validation loss = 0.5362


Epoch 5:  88%|████████▊ | 175/200 [1:23:15<20:00, 48.01s/batch, loss=0.517]




Epoch 5:  90%|████████▉ | 179/200 [1:23:31<04:55, 14.06s/batch, loss=0.471]

Total iteration 984, validation loss = 0.5369


Epoch 5:  90%|█████████ | 180/200 [1:25:39<16:18, 48.94s/batch, loss=0.61] 




Epoch 5:  92%|█████████▏| 184/200 [1:25:55<03:47, 14.25s/batch, loss=0.58] 

Total iteration 989, validation loss = 0.5370


Epoch 5:  92%|█████████▎| 185/200 [1:28:00<12:04, 48.30s/batch, loss=0.674]




Epoch 5:  94%|█████████▍| 189/200 [1:28:16<02:34, 14.09s/batch, loss=0.489]

Total iteration 994, validation loss = 0.5370


Epoch 5:  95%|█████████▌| 190/200 [1:30:23<08:08, 48.81s/batch, loss=0.66] 




Epoch 5:  97%|█████████▋| 194/200 [1:30:39<01:25, 14.24s/batch, loss=0.49] 

Total iteration 999, validation loss = 0.5375


Epoch 5:  98%|█████████▊| 195/200 [1:32:48<04:06, 49.33s/batch, loss=0.494]




Epoch 5: 100%|█████████▉| 199/200 [1:32:53<00:12, 12.71s/batch, loss=0.587]

Total iteration 1004, validation loss = 0.5371


Epoch 5: 100%|██████████| 200/200 [1:34:57<00:00, 28.49s/batch, loss=0.4]  
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 6:   2%|▏         | 4/200 [00:28<16:14,  4.97s/batch, loss=0.454]

Total iteration 1010, validation loss = 0.5363


Epoch 6:   2%|▎         | 5/200 [02:35<2:42:36, 50.03s/batch, loss=0.502]




Epoch 6:   4%|▍         | 9/200 [02:52<41:57, 13.18s/batch, loss=0.454]  

Total iteration 1015, validation loss = 0.5392


Epoch 6:   5%|▌         | 10/200 [04:58<2:34:43, 48.86s/batch, loss=0.483]




Epoch 6:   7%|▋         | 14/200 [05:14<43:24, 14.00s/batch, loss=0.463]  

Total iteration 1020, validation loss = 0.5419


Epoch 6:   8%|▊         | 15/200 [07:23<2:33:27, 49.77s/batch, loss=0.625]




Epoch 6:  10%|▉         | 19/200 [07:39<43:23, 14.39s/batch, loss=0.529]  

Total iteration 1025, validation loss = 0.5465


Epoch 6:  10%|█         | 20/200 [09:52<2:32:11, 50.73s/batch, loss=0.582]




Epoch 6:  12%|█▏        | 24/200 [10:08<43:01, 14.67s/batch, loss=0.496]  

Total iteration 1030, validation loss = 0.5477


Epoch 6:  12%|█▎        | 25/200 [12:13<2:21:57, 48.67s/batch, loss=0.381]




Epoch 6:  14%|█▍        | 29/200 [12:29<40:23, 14.17s/batch, loss=0.508]  

Total iteration 1035, validation loss = 0.5450


Epoch 6:  15%|█▌        | 30/200 [14:38<2:20:21, 49.54s/batch, loss=0.507]




Epoch 6:  17%|█▋        | 34/200 [14:55<39:57, 14.44s/batch, loss=0.614]  

Total iteration 1040, validation loss = 0.5414


Epoch 6:  18%|█▊        | 35/200 [16:59<2:12:18, 48.11s/batch, loss=0.635]




Epoch 6:  20%|█▉        | 39/200 [17:15<37:40, 14.04s/batch, loss=0.563]  

Total iteration 1045, validation loss = 0.5393


Epoch 6:  20%|██        | 40/200 [19:17<2:06:35, 47.47s/batch, loss=0.412]




Epoch 6:  22%|██▏       | 44/200 [19:33<36:05, 13.88s/batch, loss=0.408]  

Total iteration 1050, validation loss = 0.5394


Epoch 6:  22%|██▎       | 45/200 [21:40<2:05:42, 48.66s/batch, loss=0.362]




Epoch 6:  24%|██▍       | 49/200 [21:56<35:39, 14.17s/batch, loss=0.341]  

Total iteration 1055, validation loss = 0.5393


Epoch 6:  25%|██▌       | 50/200 [24:02<2:01:13, 48.49s/batch, loss=0.582]




Epoch 6:  27%|██▋       | 54/200 [24:19<34:48, 14.30s/batch, loss=0.594]  

Total iteration 1060, validation loss = 0.5392


Epoch 6:  28%|██▊       | 55/200 [26:23<1:56:30, 48.21s/batch, loss=0.58]




Epoch 6:  30%|██▉       | 59/200 [26:39<33:01, 14.06s/batch, loss=0.443]  

Total iteration 1065, validation loss = 0.5391


Epoch 6:  30%|███       | 60/200 [28:37<1:47:11, 45.94s/batch, loss=0.54]




Epoch 6:  32%|███▏      | 64/200 [28:53<30:57, 13.66s/batch, loss=0.484]  

Total iteration 1070, validation loss = 0.5392


Epoch 6:  32%|███▎      | 65/200 [30:52<1:43:44, 46.11s/batch, loss=0.642]




Epoch 6:  34%|███▍      | 69/200 [31:09<29:50, 13.67s/batch, loss=0.669]  

Total iteration 1075, validation loss = 0.5402


Epoch 6:  35%|███▌      | 70/200 [33:07<1:39:37, 45.98s/batch, loss=0.605]




Epoch 6:  37%|███▋      | 74/200 [33:23<28:27, 13.55s/batch, loss=0.422]  

Total iteration 1080, validation loss = 0.5440


Epoch 6:  38%|███▊      | 75/200 [35:35<1:44:00, 49.93s/batch, loss=0.437]




Epoch 6:  40%|███▉      | 79/200 [35:53<29:33, 14.66s/batch, loss=0.425]  

Total iteration 1085, validation loss = 0.5441


Epoch 6:  40%|████      | 80/200 [37:57<1:36:52, 48.44s/batch, loss=0.531]




Epoch 6:  42%|████▏     | 84/200 [38:13<27:17, 14.12s/batch, loss=0.537]  

Total iteration 1090, validation loss = 0.5433


Epoch 6:  42%|████▎     | 85/200 [40:26<1:37:00, 50.62s/batch, loss=0.555]




Epoch 6:  44%|████▍     | 89/200 [40:42<27:07, 14.66s/batch, loss=0.397]  

Total iteration 1095, validation loss = 0.5425


Epoch 6:  45%|████▌     | 90/200 [42:54<1:32:52, 50.66s/batch, loss=0.433]




Epoch 6:  47%|████▋     | 94/200 [43:11<26:16, 14.87s/batch, loss=0.39]   

Total iteration 1100, validation loss = 0.5419


Epoch 6:  48%|████▊     | 95/200 [45:18<1:26:17, 49.31s/batch, loss=0.58]




Epoch 6:  50%|████▉     | 99/200 [45:33<24:09, 14.35s/batch, loss=0.528]  

Total iteration 1105, validation loss = 0.5422


Epoch 6:  50%|█████     | 100/200 [47:34<1:18:25, 47.06s/batch, loss=0.461]




Epoch 6:  52%|█████▏    | 104/200 [47:50<22:05, 13.81s/batch, loss=0.534]  

Total iteration 1110, validation loss = 0.5416


Epoch 6:  52%|█████▎    | 105/200 [50:00<1:18:19, 49.47s/batch, loss=0.399]




Epoch 6:  55%|█████▍    | 109/200 [50:16<21:52, 14.42s/batch, loss=0.584]  

Total iteration 1115, validation loss = 0.5425


Epoch 6:  55%|█████▌    | 110/200 [52:24<1:13:53, 49.26s/batch, loss=0.458]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 6:  57%|█████▋    | 114/200 [52:40<20:34, 14.35s/batch, loss=0.445]

Total iteration 1120, validation loss = 0.5437


Epoch 6:  57%|█████▊    | 115/200 [54:53<1:11:53, 50.75s/batch, loss=0.615]




Epoch 6:  60%|█████▉    | 119/200 [55:10<19:55, 14.76s/batch, loss=0.506]  

Total iteration 1125, validation loss = 0.5458


Epoch 6:  60%|██████    | 120/200 [57:11<1:03:36, 47.71s/batch, loss=0.792]




Epoch 6:  62%|██████▏   | 124/200 [57:27<17:42, 13.98s/batch, loss=0.488]  

Total iteration 1130, validation loss = 0.5421


Epoch 6:  62%|██████▎   | 125/200 [59:33<1:00:16, 48.23s/batch, loss=0.627]




Epoch 6:  64%|██████▍   | 129/200 [59:49<16:43, 14.13s/batch, loss=0.719]  

Total iteration 1135, validation loss = 0.5402


Epoch 6:  65%|██████▌   | 130/200 [1:01:55<56:40, 48.58s/batch, loss=0.656]




Epoch 6:  67%|██████▋   | 134/200 [1:02:12<15:38, 14.23s/batch, loss=0.584]

Total iteration 1140, validation loss = 0.5469


Epoch 6:  68%|██████▊   | 135/200 [1:04:18<52:30, 48.47s/batch, loss=0.613]




Epoch 6:  70%|██████▉   | 139/200 [1:04:34<14:21, 14.13s/batch, loss=0.6]  

Total iteration 1145, validation loss = 0.5536


Epoch 6:  70%|███████   | 140/200 [1:06:41<48:54, 48.91s/batch, loss=0.54]




Epoch 6:  72%|███████▏  | 144/200 [1:06:57<13:17, 14.24s/batch, loss=0.603]

Total iteration 1150, validation loss = 0.5593


Epoch 6:  72%|███████▎  | 145/200 [1:09:04<44:50, 48.92s/batch, loss=0.495]




Epoch 6:  74%|███████▍  | 149/200 [1:09:21<12:14, 14.40s/batch, loss=0.705]

Total iteration 1155, validation loss = 0.5637


Epoch 6:  75%|███████▌  | 150/200 [1:11:28<40:59, 49.19s/batch, loss=0.435]




Epoch 6:  77%|███████▋  | 154/200 [1:11:45<11:02, 14.41s/batch, loss=0.685]

Total iteration 1160, validation loss = 0.5646


Epoch 6:  78%|███████▊  | 155/200 [1:13:52<36:52, 49.17s/batch, loss=0.507]




Epoch 6:  80%|███████▉  | 159/200 [1:14:09<09:54, 14.50s/batch, loss=0.439]

Total iteration 1165, validation loss = 0.5548


Epoch 6:  80%|████████  | 160/200 [1:16:13<32:08, 48.22s/batch, loss=0.554]




Epoch 6:  82%|████████▏ | 164/200 [1:16:30<08:30, 14.19s/batch, loss=0.595]

Total iteration 1170, validation loss = 0.5419


Epoch 6:  82%|████████▎ | 165/200 [1:18:28<27:02, 46.34s/batch, loss=0.646]




Epoch 6:  84%|████████▍ | 169/200 [1:18:45<07:06, 13.76s/batch, loss=0.54] 

Total iteration 1175, validation loss = 0.5411


Epoch 6:  85%|████████▌ | 170/200 [1:20:44<23:05, 46.17s/batch, loss=0.556]




Epoch 6:  87%|████████▋ | 174/200 [1:21:01<05:58, 13.78s/batch, loss=0.71] 

Total iteration 1180, validation loss = 0.5415


Epoch 6:  88%|████████▊ | 175/200 [1:23:07<20:02, 48.11s/batch, loss=0.644]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 1185, validation loss = 0.5339


Epoch 6:  90%|█████████ | 180/200 [1:25:25<15:45, 47.26s/batch, loss=0.298]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 6:  92%|█████████▏| 184/200 [1:25:41<03:41, 13.85s/batch, loss=0.493]

Total iteration 1190, validation loss = 0.5334


Epoch 6:  92%|█████████▎| 185/200 [1:27:43<11:48, 47.26s/batch, loss=0.582]




Epoch 6:  94%|█████████▍| 189/200 [1:28:00<02:33, 13.93s/batch, loss=0.985]

Total iteration 1195, validation loss = 0.5375


Epoch 6:  95%|█████████▌| 190/200 [1:30:01<07:50, 47.07s/batch, loss=0.558]




Epoch 6:  97%|█████████▋| 194/200 [1:30:17<01:22, 13.83s/batch, loss=0.452]

Total iteration 1200, validation loss = 0.5392


Epoch 6:  98%|█████████▊| 195/200 [1:32:18<03:53, 46.69s/batch, loss=0.461]




Epoch 6: 100%|█████████▉| 199/200 [1:32:23<00:12, 12.02s/batch, loss=0.54] 

Total iteration 1205, validation loss = 0.5434


Epoch 6: 100%|██████████| 200/200 [1:34:37<00:00, 28.39s/batch, loss=0.494]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 7:   2%|▏         | 4/200 [00:27<16:12,  4.96s/batch, loss=0.413]

Total iteration 1211, validation loss = 0.5442


Epoch 7:   2%|▎         | 5/200 [02:27<2:33:38, 47.27s/batch, loss=0.464]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 7:   4%|▍         | 9/200 [02:44<40:14, 12.64s/batch, loss=0.684]  

Total iteration 1216, validation loss = 0.5419


Epoch 7:   5%|▌         | 10/200 [04:56<2:39:22, 50.33s/batch, loss=0.483]




Epoch 7:   7%|▋         | 14/200 [05:12<44:57, 14.50s/batch, loss=0.462]  

Total iteration 1221, validation loss = 0.5392


Epoch 7:   8%|▊         | 15/200 [07:16<2:28:47, 48.26s/batch, loss=0.52]




Epoch 7:  10%|▉         | 19/200 [07:32<42:25, 14.06s/batch, loss=0.496]  

Total iteration 1226, validation loss = 0.5373


Epoch 7:  10%|█         | 20/200 [09:37<2:24:16, 48.09s/batch, loss=0.371]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 7:  12%|█▏        | 24/200 [09:54<41:47, 14.25s/batch, loss=0.593]  

Total iteration 1231, validation loss = 0.5368


Epoch 7:  12%|█▎        | 25/200 [11:59<2:20:40, 48.23s/batch, loss=0.503]




Epoch 7:  14%|█▍        | 29/200 [12:15<40:09, 14.09s/batch, loss=0.553]  

Total iteration 1236, validation loss = 0.5356


Epoch 7:  15%|█▌        | 30/200 [14:21<2:17:46, 48.63s/batch, loss=0.615]




Epoch 7:  17%|█▋        | 34/200 [14:37<39:10, 14.16s/batch, loss=0.546]  

Total iteration 1241, validation loss = 0.5368


Epoch 7:  18%|█▊        | 35/200 [16:48<2:18:01, 50.19s/batch, loss=0.513]




Epoch 7:  20%|█▉        | 39/200 [17:04<39:00, 14.54s/batch, loss=0.519]  

Total iteration 1246, validation loss = 0.5368


Epoch 7:  20%|██        | 40/200 [19:08<2:07:59, 48.00s/batch, loss=0.407]




Epoch 7:  22%|██▏       | 44/200 [19:24<36:28, 14.03s/batch, loss=0.716]  

Total iteration 1251, validation loss = 0.5368


Epoch 7:  22%|██▎       | 45/200 [21:27<2:02:52, 47.56s/batch, loss=0.414]




Epoch 7:  24%|██▍       | 49/200 [21:43<35:02, 13.92s/batch, loss=0.374]  

Total iteration 1256, validation loss = 0.5363


Epoch 7:  25%|██▌       | 50/200 [23:49<2:00:52, 48.35s/batch, loss=0.461]




Epoch 7:  27%|██▋       | 54/200 [24:04<34:18, 14.10s/batch, loss=0.459]  

Total iteration 1261, validation loss = 0.5371


Epoch 7:  28%|██▊       | 55/200 [26:10<1:56:39, 48.27s/batch, loss=0.656]




Epoch 7:  30%|██▉       | 59/200 [26:27<33:24, 14.21s/batch, loss=0.563]  

Total iteration 1266, validation loss = 0.5375


Epoch 7:  30%|███       | 60/200 [28:34<1:54:15, 48.97s/batch, loss=0.294]




Epoch 7:  32%|███▏      | 64/200 [28:51<32:35, 14.38s/batch, loss=0.507]  

Total iteration 1271, validation loss = 0.5390


Epoch 7:  32%|███▎      | 65/200 [30:54<1:47:52, 47.94s/batch, loss=0.389]




Epoch 7:  34%|███▍      | 69/200 [31:11<31:00, 14.20s/batch, loss=0.402]  

Total iteration 1276, validation loss = 0.5381


Epoch 7:  35%|███▌      | 70/200 [33:17<1:45:09, 48.54s/batch, loss=0.56]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 7:  37%|███▋      | 74/200 [33:33<29:44, 14.16s/batch, loss=0.559]  

Total iteration 1281, validation loss = 0.5392


Epoch 7:  38%|███▊      | 75/200 [35:37<1:39:48, 47.91s/batch, loss=0.451]




Epoch 7:  40%|███▉      | 79/200 [35:53<28:15, 14.01s/batch, loss=0.435]  

Total iteration 1286, validation loss = 0.5379


Epoch 7:  40%|████      | 80/200 [38:00<1:37:38, 48.82s/batch, loss=0.283]




Epoch 7:  42%|████▏     | 84/200 [38:17<27:45, 14.36s/batch, loss=0.53]   

Total iteration 1291, validation loss = 0.5390


Epoch 7:  42%|████▎     | 85/200 [40:22<1:32:31, 48.27s/batch, loss=0.402]




Epoch 7:  44%|████▍     | 89/200 [40:38<26:02, 14.08s/batch, loss=0.516]  

Total iteration 1296, validation loss = 0.5393


Epoch 7:  45%|████▌     | 90/200 [42:46<1:30:01, 49.10s/batch, loss=0.489]




Epoch 7:  47%|████▋     | 94/200 [43:03<25:34, 14.47s/batch, loss=0.332]  

Total iteration 1301, validation loss = 0.5385


Epoch 7:  48%|████▊     | 95/200 [45:10<1:25:52, 49.08s/batch, loss=0.414]




Epoch 7:  50%|████▉     | 99/200 [45:26<24:07, 14.33s/batch, loss=0.48]   

Total iteration 1306, validation loss = 0.5389


Epoch 7:  50%|█████     | 100/200 [47:25<1:17:40, 46.60s/batch, loss=0.306]




Epoch 7:  52%|█████▏    | 104/200 [47:41<21:53, 13.68s/batch, loss=0.466]  

Total iteration 1311, validation loss = 0.5402


Epoch 7:  52%|█████▎    | 105/200 [49:49<1:17:18, 48.83s/batch, loss=0.542]




Epoch 7:  55%|█████▍    | 109/200 [50:05<21:33, 14.21s/batch, loss=0.529]  

Total iteration 1316, validation loss = 0.5385


Epoch 7:  55%|█████▌    | 110/200 [52:11<1:12:39, 48.44s/batch, loss=0.369]




Epoch 7:  57%|█████▋    | 114/200 [52:27<20:14, 14.12s/batch, loss=0.359]  

Total iteration 1321, validation loss = 0.5355


Epoch 7:  57%|█████▊    | 115/200 [54:32<1:08:34, 48.40s/batch, loss=0.435]




Epoch 7:  60%|█████▉    | 119/200 [54:49<19:14, 14.25s/batch, loss=0.422]  

Total iteration 1326, validation loss = 0.5343


Epoch 7:  60%|██████    | 120/200 [56:55<1:05:00, 48.75s/batch, loss=0.608]




Epoch 7:  62%|██████▏   | 124/200 [57:12<18:14, 14.41s/batch, loss=0.437]  

Total iteration 1331, validation loss = 0.5348


Epoch 7:  62%|██████▎   | 125/200 [59:18<1:00:49, 48.66s/batch, loss=0.367]




Epoch 7:  64%|██████▍   | 129/200 [59:34<16:45, 14.17s/batch, loss=0.544]  

Total iteration 1336, validation loss = 0.5357


Epoch 7:  65%|██████▌   | 130/200 [1:01:43<57:42, 49.47s/batch, loss=0.518]




Epoch 7:  67%|██████▋   | 134/200 [1:01:59<15:48, 14.37s/batch, loss=0.473]

Total iteration 1341, validation loss = 0.5383


Epoch 7:  68%|██████▊   | 135/200 [1:04:06<53:17, 49.19s/batch, loss=0.402]




Epoch 7:  70%|██████▉   | 139/200 [1:04:22<14:32, 14.30s/batch, loss=0.317]

Total iteration 1346, validation loss = 0.5393


Epoch 7:  70%|███████   | 140/200 [1:06:25<47:45, 47.76s/batch, loss=0.481]




Epoch 7:  72%|███████▏  | 144/200 [1:06:42<13:11, 14.13s/batch, loss=0.486]

Total iteration 1351, validation loss = 0.5408


Epoch 7:  72%|███████▎  | 145/200 [1:08:44<43:18, 47.24s/batch, loss=0.786]




Epoch 7:  74%|███████▍  | 149/200 [1:09:00<11:45, 13.84s/batch, loss=0.589]

Total iteration 1356, validation loss = 0.5418


Epoch 7:  75%|███████▌  | 150/200 [1:11:01<38:56, 46.72s/batch, loss=0.559]




Epoch 7:  77%|███████▋  | 154/200 [1:11:17<10:31, 13.73s/batch, loss=0.396]

Total iteration 1361, validation loss = 0.5409


Epoch 7:  78%|███████▊  | 155/200 [1:13:20<35:32, 47.39s/batch, loss=0.495]




Epoch 7:  80%|███████▉  | 159/200 [1:13:36<09:28, 13.87s/batch, loss=0.532]

Total iteration 1366, validation loss = 0.5412


Epoch 7:  80%|████████  | 160/200 [1:15:41<32:07, 48.20s/batch, loss=0.631]




Epoch 7:  82%|████████▏ | 164/200 [1:15:57<08:27, 14.09s/batch, loss=0.476]

Total iteration 1371, validation loss = 0.5391


Epoch 7:  82%|████████▎ | 165/200 [1:18:08<29:06, 49.89s/batch, loss=0.61] 




Epoch 7:  84%|████████▍ | 169/200 [1:18:24<07:28, 14.48s/batch, loss=0.629]

Total iteration 1376, validation loss = 0.5405


Epoch 7:  85%|████████▌ | 170/200 [1:20:27<23:59, 47.98s/batch, loss=0.386]




Epoch 7:  87%|████████▋ | 174/200 [1:20:43<06:04, 14.01s/batch, loss=0.812]

Total iteration 1381, validation loss = 0.5427


Epoch 7:  88%|████████▊ | 175/200 [1:22:48<20:03, 48.13s/batch, loss=0.336]




Epoch 7:  90%|████████▉ | 179/200 [1:23:04<04:55, 14.05s/batch, loss=0.427]

Total iteration 1386, validation loss = 0.5401


Epoch 7:  90%|█████████ | 180/200 [1:25:09<16:02, 48.14s/batch, loss=0.502]




Epoch 7:  92%|█████████▏| 184/200 [1:25:25<03:44, 14.05s/batch, loss=0.404]

Total iteration 1391, validation loss = 0.5373


Epoch 7:  92%|█████████▎| 185/200 [1:27:29<11:56, 47.75s/batch, loss=0.509]




Epoch 7:  94%|█████████▍| 189/200 [1:27:45<02:33, 13.97s/batch, loss=0.552]

Total iteration 1396, validation loss = 0.5369


Epoch 7:  95%|█████████▌| 190/200 [1:29:47<07:52, 47.25s/batch, loss=0.398]




Epoch 7:  97%|█████████▋| 194/200 [1:30:03<01:23, 13.85s/batch, loss=0.621]

Total iteration 1401, validation loss = 0.5381


Epoch 7:  98%|█████████▊| 195/200 [1:32:10<04:03, 48.69s/batch, loss=0.465]




Epoch 7: 100%|█████████▉| 199/200 [1:32:15<00:12, 12.50s/batch, loss=0.403]

Total iteration 1406, validation loss = 0.5394


Epoch 7: 100%|██████████| 200/200 [1:34:15<00:00, 28.28s/batch, loss=0.629]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 8:   2%|▏         | 4/200 [00:28<16:11,  4.96s/batch, loss=0.512]

Total iteration 1412, validation loss = 0.5412


Epoch 8:   2%|▎         | 5/200 [02:26<2:31:53, 46.74s/batch, loss=0.66]




Epoch 8:   4%|▍         | 9/200 [02:42<39:16, 12.34s/batch, loss=0.478]  

Total iteration 1417, validation loss = 0.5429


Epoch 8:   5%|▌         | 10/200 [04:43<2:28:50, 47.00s/batch, loss=0.388]




Epoch 8:   7%|▋         | 14/200 [05:01<42:22, 13.67s/batch, loss=0.353]  

Total iteration 1422, validation loss = 0.5419


Epoch 8:   8%|▊         | 15/200 [07:04<2:27:16, 47.76s/batch, loss=0.474]




Epoch 8:  10%|▉         | 19/200 [07:20<41:59, 13.92s/batch, loss=0.507]  

Total iteration 1427, validation loss = 0.5393


Epoch 8:  10%|█         | 20/200 [09:26<2:24:46, 48.26s/batch, loss=0.403]




Epoch 8:  12%|█▏        | 24/200 [09:43<41:43, 14.22s/batch, loss=0.531]  

Total iteration 1432, validation loss = 0.5387


Epoch 8:  12%|█▎        | 25/200 [11:52<2:24:17, 49.47s/batch, loss=0.466]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 8:  14%|█▍        | 29/200 [12:09<41:27, 14.55s/batch, loss=0.431]  

Total iteration 1437, validation loss = 0.5378


Epoch 8:  15%|█▌        | 30/200 [14:14<2:17:41, 48.60s/batch, loss=0.405]




Epoch 8:  17%|█▋        | 34/200 [14:31<39:30, 14.28s/batch, loss=0.377]  

Total iteration 1442, validation loss = 0.5357


Epoch 8:  18%|█▊        | 35/200 [16:36<2:13:04, 48.39s/batch, loss=0.279]




Epoch 8:  20%|█▉        | 39/200 [16:52<37:51, 14.11s/batch, loss=0.368]  

Total iteration 1447, validation loss = 0.5378


Epoch 8:  20%|██        | 40/200 [19:02<2:12:32, 49.70s/batch, loss=0.489]




Epoch 8:  22%|██▏       | 44/200 [19:18<37:35, 14.46s/batch, loss=0.515]  

Total iteration 1452, validation loss = 0.5371


Epoch 8:  22%|██▎       | 45/200 [21:27<2:08:10, 49.62s/batch, loss=0.48]




Epoch 8:  24%|██▍       | 49/200 [21:43<36:39, 14.57s/batch, loss=0.529]  

Total iteration 1457, validation loss = 0.5370


Epoch 8:  25%|██▌       | 50/200 [23:47<2:00:06, 48.05s/batch, loss=0.468]




Epoch 8:  27%|██▋       | 54/200 [24:03<34:13, 14.06s/batch, loss=0.439]  

Total iteration 1462, validation loss = 0.5373


Epoch 8:  28%|██▊       | 55/200 [26:06<1:55:14, 47.68s/batch, loss=0.383]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 8:  30%|██▉       | 59/200 [26:22<32:47, 13.95s/batch, loss=0.478]

Total iteration 1467, validation loss = 0.5372


Epoch 8:  30%|███       | 60/200 [28:32<1:55:54, 49.68s/batch, loss=0.502]




Epoch 8:  32%|███▏      | 64/200 [28:49<33:04, 14.59s/batch, loss=0.412]  

Total iteration 1472, validation loss = 0.5386


Epoch 8:  32%|███▎      | 65/200 [30:58<1:51:34, 49.59s/batch, loss=0.405]




Epoch 8:  34%|███▍      | 69/200 [31:14<31:34, 14.46s/batch, loss=0.353]  

Total iteration 1477, validation loss = 0.5398


Epoch 8:  35%|███▌      | 70/200 [33:22<1:46:57, 49.36s/batch, loss=0.317]




Epoch 8:  37%|███▋      | 74/200 [33:38<30:09, 14.36s/batch, loss=0.398]  

Total iteration 1482, validation loss = 0.5402


Epoch 8:  38%|███▊      | 75/200 [35:42<1:39:37, 47.82s/batch, loss=0.43]




Epoch 8:  40%|███▉      | 79/200 [35:58<28:15, 14.01s/batch, loss=0.548]  

Total iteration 1487, validation loss = 0.5394


Epoch 8:  40%|████      | 80/200 [38:00<1:34:22, 47.19s/batch, loss=0.593]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 1492, validation loss = 0.5394


Epoch 8:  42%|████▎     | 85/200 [40:18<1:30:43, 47.33s/batch, loss=0.226]




Epoch 8:  44%|████▍     | 89/200 [40:35<26:03, 14.08s/batch, loss=0.444]  

Total iteration 1497, validation loss = 0.5425


Epoch 8:  45%|████▌     | 90/200 [42:43<1:29:52, 49.02s/batch, loss=0.511]




Epoch 8:  47%|████▋     | 94/200 [42:59<25:16, 14.30s/batch, loss=0.524]  

Total iteration 1502, validation loss = 0.5441


Epoch 8:  48%|████▊     | 95/200 [45:09<1:27:01, 49.73s/batch, loss=0.537]




Epoch 8:  50%|████▉     | 99/200 [45:25<24:23, 14.49s/batch, loss=0.685]  

Total iteration 1507, validation loss = 0.5414


Epoch 8:  50%|█████     | 100/200 [47:25<1:18:09, 46.89s/batch, loss=0.47]




Epoch 8:  52%|█████▏    | 104/200 [47:42<22:16, 13.93s/batch, loss=0.475]  

Total iteration 1512, validation loss = 0.5399


Epoch 8:  52%|█████▎    | 105/200 [49:41<1:13:33, 46.46s/batch, loss=0.518]




Epoch 8:  55%|█████▍    | 109/200 [49:57<20:43, 13.66s/batch, loss=0.603]  

Total iteration 1517, validation loss = 0.5412


Epoch 8:  55%|█████▌    | 110/200 [52:05<1:13:01, 48.68s/batch, loss=0.582]




Epoch 8:  57%|█████▋    | 114/200 [52:21<20:19, 14.18s/batch, loss=0.502]  

Total iteration 1522, validation loss = 0.5433


Epoch 8:  57%|█████▊    | 115/200 [54:26<1:08:30, 48.36s/batch, loss=0.677]




Epoch 8:  60%|█████▉    | 119/200 [54:42<19:02, 14.11s/batch, loss=0.363]  

Total iteration 1527, validation loss = 0.5435


Epoch 8:  60%|██████    | 120/200 [56:54<1:06:51, 50.15s/batch, loss=0.511]




Epoch 8:  62%|██████▏   | 124/200 [57:10<18:24, 14.53s/batch, loss=0.613]  

Total iteration 1532, validation loss = 0.5395


Epoch 8:  62%|██████▎   | 125/200 [59:16<1:01:18, 49.04s/batch, loss=0.435]




Epoch 8:  64%|██████▍   | 129/200 [59:32<16:52, 14.26s/batch, loss=0.71]   

Total iteration 1537, validation loss = 0.5400


Epoch 8:  65%|██████▌   | 130/200 [1:01:41<57:41, 49.46s/batch, loss=0.53]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 8:  67%|██████▋   | 134/200 [1:01:57<15:50, 14.40s/batch, loss=0.243]

Total iteration 1542, validation loss = 0.5412


Epoch 8:  68%|██████▊   | 135/200 [1:04:04<52:50, 48.77s/batch, loss=0.528]




Epoch 8:  70%|██████▉   | 139/200 [1:04:20<14:26, 14.21s/batch, loss=0.544]

Total iteration 1547, validation loss = 0.5427


Epoch 8:  70%|███████   | 140/200 [1:06:28<49:23, 49.40s/batch, loss=0.57] 




Epoch 8:  72%|███████▏  | 144/200 [1:06:44<13:24, 14.36s/batch, loss=0.436]

Total iteration 1552, validation loss = 0.5427


Epoch 8:  72%|███████▎  | 145/200 [1:08:53<45:27, 49.60s/batch, loss=0.451]




Epoch 8:  74%|███████▍  | 149/200 [1:09:09<12:14, 14.41s/batch, loss=0.549]

Total iteration 1557, validation loss = 0.5420


Epoch 8:  75%|███████▌  | 150/200 [1:11:19<41:28, 49.76s/batch, loss=0.57] 




Epoch 8:  77%|███████▋  | 154/200 [1:11:36<11:09, 14.56s/batch, loss=0.639]

Total iteration 1562, validation loss = 0.5396


Epoch 8:  78%|███████▊  | 155/200 [1:13:43<36:51, 49.15s/batch, loss=0.56] 




Epoch 8:  80%|███████▉  | 159/200 [1:13:59<09:46, 14.30s/batch, loss=0.438]

Total iteration 1567, validation loss = 0.5405


Epoch 8:  80%|████████  | 160/200 [1:16:11<33:43, 50.60s/batch, loss=0.518]




Epoch 8:  82%|████████▏ | 164/200 [1:16:28<08:54, 14.83s/batch, loss=0.457]

Total iteration 1572, validation loss = 0.5459


Epoch 8:  82%|████████▎ | 165/200 [1:18:34<28:30, 48.88s/batch, loss=0.498]




Epoch 8:  84%|████████▍ | 169/200 [1:18:50<07:20, 14.22s/batch, loss=0.558]

Total iteration 1577, validation loss = 0.5461


Epoch 8:  85%|████████▌ | 170/200 [1:20:59<24:46, 49.55s/batch, loss=0.398]




Epoch 8:  87%|████████▋ | 174/200 [1:21:15<06:14, 14.39s/batch, loss=0.668]

Total iteration 1582, validation loss = 0.5486


Epoch 8:  88%|████████▊ | 175/200 [1:23:20<20:13, 48.53s/batch, loss=0.556]




Epoch 8:  90%|████████▉ | 179/200 [1:23:36<04:57, 14.18s/batch, loss=0.366]

Total iteration 1587, validation loss = 0.5505


Epoch 8:  90%|█████████ | 180/200 [1:25:39<15:49, 47.48s/batch, loss=0.622]




Epoch 8:  92%|█████████▏| 184/200 [1:25:55<03:42, 13.93s/batch, loss=0.405]

Total iteration 1592, validation loss = 0.5462


Epoch 8:  92%|█████████▎| 185/200 [1:27:59<11:59, 47.98s/batch, loss=0.438]




Epoch 8:  94%|█████████▍| 189/200 [1:28:15<02:34, 14.04s/batch, loss=0.569]

Total iteration 1597, validation loss = 0.5407


Epoch 8:  95%|█████████▌| 190/200 [1:30:22<08:07, 48.78s/batch, loss=0.65] 




Epoch 8:  97%|█████████▋| 194/200 [1:30:39<01:25, 14.23s/batch, loss=0.475]

Total iteration 1602, validation loss = 0.5416


Epoch 8:  98%|█████████▊| 195/200 [1:32:41<03:58, 47.62s/batch, loss=0.375]




Epoch 8: 100%|█████████▉| 199/200 [1:32:47<00:12, 12.24s/batch, loss=0.316]

Total iteration 1607, validation loss = 0.5392


Epoch 8: 100%|██████████| 200/200 [1:34:44<00:00, 28.42s/batch, loss=0.423]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 9:   2%|▏         | 4/200 [00:28<16:40,  5.10s/batch, loss=0.496]

Total iteration 1613, validation loss = 0.5399


Epoch 9:   2%|▎         | 5/200 [02:32<2:38:16, 48.70s/batch, loss=0.534]




Epoch 9:   4%|▍         | 9/200 [02:48<40:57, 12.87s/batch, loss=0.645]  

Total iteration 1618, validation loss = 0.5397


Epoch 9:   5%|▌         | 10/200 [04:53<2:32:29, 48.16s/batch, loss=0.292]




Epoch 9:   7%|▋         | 14/200 [05:09<42:51, 13.83s/batch, loss=0.499]  

Total iteration 1623, validation loss = 0.5384


Epoch 9:   8%|▊         | 15/200 [07:11<2:26:20, 47.46s/batch, loss=0.373]




Epoch 9:  10%|▉         | 19/200 [07:27<41:48, 13.86s/batch, loss=0.4]    

Total iteration 1628, validation loss = 0.5368


Epoch 9:  10%|█         | 20/200 [09:35<2:26:49, 48.94s/batch, loss=0.473]




Epoch 9:  12%|█▏        | 24/200 [09:51<41:47, 14.25s/batch, loss=0.555]  

Total iteration 1633, validation loss = 0.5374


Epoch 9:  12%|█▎        | 25/200 [12:03<2:26:38, 50.27s/batch, loss=0.53]




Epoch 9:  14%|█▍        | 29/200 [12:19<41:29, 14.56s/batch, loss=0.543]  

Total iteration 1638, validation loss = 0.5386


Epoch 9:  15%|█▌        | 30/200 [14:27<2:20:40, 49.65s/batch, loss=0.551]




Epoch 9:  17%|█▋        | 34/200 [14:44<40:06, 14.50s/batch, loss=0.475]  

Total iteration 1643, validation loss = 0.5384


Epoch 9:  18%|█▊        | 35/200 [16:52<2:15:38, 49.32s/batch, loss=0.449]




Epoch 9:  20%|█▉        | 39/200 [17:08<38:27, 14.33s/batch, loss=0.536]  

Total iteration 1648, validation loss = 0.5356


Epoch 9:  20%|██        | 40/200 [19:20<2:14:49, 50.56s/batch, loss=0.466]




Epoch 9:  22%|██▏       | 44/200 [19:36<38:04, 14.64s/batch, loss=0.529]  

Total iteration 1653, validation loss = 0.5360


Epoch 9:  22%|██▎       | 45/200 [21:42<2:06:04, 48.80s/batch, loss=0.542]




Epoch 9:  24%|██▍       | 49/200 [21:58<35:59, 14.30s/batch, loss=0.285]  

Total iteration 1658, validation loss = 0.5365


Epoch 9:  25%|██▌       | 50/200 [24:04<2:01:38, 48.66s/batch, loss=0.489]




Epoch 9:  27%|██▋       | 54/200 [24:20<34:31, 14.19s/batch, loss=0.39]   

Total iteration 1663, validation loss = 0.5360


Epoch 9:  28%|██▊       | 55/200 [26:27<1:58:16, 48.94s/batch, loss=0.309]




Epoch 9:  30%|██▉       | 59/200 [26:43<33:27, 14.24s/batch, loss=0.467]  

Total iteration 1668, validation loss = 0.5359


Epoch 9:  30%|███       | 60/200 [28:48<1:52:14, 48.10s/batch, loss=0.379]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 9:  32%|███▏      | 64/200 [29:04<31:52, 14.06s/batch, loss=0.34]   

Total iteration 1673, validation loss = 0.5374


Epoch 9:  32%|███▎      | 65/200 [31:12<1:50:31, 49.13s/batch, loss=0.509]




Epoch 9:  34%|███▍      | 69/200 [31:28<31:11, 14.29s/batch, loss=0.454]  

Total iteration 1678, validation loss = 0.5385


Epoch 9:  35%|███▌      | 70/200 [33:38<1:48:19, 50.00s/batch, loss=0.508]




Epoch 9:  37%|███▋      | 74/200 [33:55<30:45, 14.65s/batch, loss=0.605]  

Total iteration 1683, validation loss = 0.5429


Epoch 9:  38%|███▊      | 75/200 [35:57<1:39:29, 47.75s/batch, loss=0.614]




Epoch 9:  40%|███▉      | 79/200 [36:14<28:23, 14.08s/batch, loss=0.585]  

Total iteration 1688, validation loss = 0.5434


Epoch 9:  40%|████      | 80/200 [38:17<1:34:52, 47.44s/batch, loss=0.413]




Epoch 9:  42%|████▏     | 84/200 [38:33<26:52, 13.90s/batch, loss=0.321]  

Total iteration 1693, validation loss = 0.5409


Epoch 9:  42%|████▎     | 85/200 [40:41<1:33:43, 48.90s/batch, loss=0.437]




Epoch 9:  44%|████▍     | 89/200 [40:57<26:36, 14.38s/batch, loss=0.32]   

Total iteration 1698, validation loss = 0.5401


Epoch 9:  45%|████▌     | 90/200 [43:04<1:29:20, 48.73s/batch, loss=0.299]




Epoch 9:  47%|████▋     | 94/200 [43:19<25:02, 14.18s/batch, loss=0.481]  

Total iteration 1703, validation loss = 0.5442


Epoch 9:  48%|████▊     | 95/200 [45:23<1:23:27, 47.69s/batch, loss=0.429]




Epoch 9:  50%|████▉     | 99/200 [45:39<23:40, 14.07s/batch, loss=0.34]   

Total iteration 1708, validation loss = 0.5413


Epoch 9:  50%|█████     | 100/200 [47:45<1:20:37, 48.38s/batch, loss=0.513]




Epoch 9:  52%|█████▏    | 104/200 [48:01<22:33, 14.10s/batch, loss=0.364]  

Total iteration 1713, validation loss = 0.5419


Epoch 9:  52%|█████▎    | 105/200 [50:08<1:17:14, 48.79s/batch, loss=0.655]




Epoch 9:  55%|█████▍    | 109/200 [50:25<21:43, 14.33s/batch, loss=0.37]   

Total iteration 1718, validation loss = 0.5434


Epoch 9:  55%|█████▌    | 110/200 [52:30<1:12:43, 48.49s/batch, loss=0.414]




Epoch 9:  57%|█████▋    | 114/200 [52:46<20:14, 14.12s/batch, loss=0.363]  

Total iteration 1723, validation loss = 0.5424


Epoch 9:  57%|█████▊    | 115/200 [54:57<1:11:00, 50.13s/batch, loss=0.619]




Epoch 9:  60%|█████▉    | 119/200 [55:14<19:53, 14.74s/batch, loss=0.594]  

Total iteration 1728, validation loss = 0.5428


Epoch 9:  60%|██████    | 120/200 [57:19<1:04:43, 48.55s/batch, loss=0.502]




Epoch 9:  62%|██████▏   | 124/200 [57:35<18:01, 14.22s/batch, loss=0.588]  

Total iteration 1733, validation loss = 0.5451


Epoch 9:  62%|██████▎   | 125/200 [59:40<1:00:24, 48.33s/batch, loss=0.521]




Epoch 9:  64%|██████▍   | 129/200 [59:56<16:42, 14.12s/batch, loss=0.644]  

Total iteration 1738, validation loss = 0.5459


Epoch 9:  65%|██████▌   | 130/200 [1:02:04<57:10, 49.00s/batch, loss=0.393]




Epoch 9:  67%|██████▋   | 134/200 [1:02:20<15:42, 14.28s/batch, loss=0.375]

Total iteration 1743, validation loss = 0.5446


Epoch 9:  68%|██████▊   | 135/200 [1:04:28<53:12, 49.11s/batch, loss=0.382]




Epoch 9:  70%|██████▉   | 139/200 [1:04:44<14:31, 14.29s/batch, loss=0.264]

Total iteration 1748, validation loss = 0.5422


Epoch 9:  70%|███████   | 140/200 [1:06:52<49:26, 49.45s/batch, loss=0.452]




Epoch 9:  72%|███████▏  | 144/200 [1:07:08<13:24, 14.36s/batch, loss=0.592]

Total iteration 1753, validation loss = 0.5400


Epoch 9:  72%|███████▎  | 145/200 [1:09:16<44:58, 49.06s/batch, loss=0.655]




Epoch 9:  74%|███████▍  | 149/200 [1:09:31<12:07, 14.26s/batch, loss=0.468]

Total iteration 1758, validation loss = 0.5399


Epoch 9:  75%|███████▌  | 150/200 [1:11:40<41:13, 49.47s/batch, loss=0.68] 




Epoch 9:  77%|███████▋  | 154/200 [1:11:56<11:00, 14.36s/batch, loss=0.524]

Total iteration 1763, validation loss = 0.5429


Epoch 9:  78%|███████▊  | 155/200 [1:14:08<37:43, 50.31s/batch, loss=0.508]




Epoch 9:  80%|███████▉  | 159/200 [1:14:24<09:57, 14.57s/batch, loss=0.212]

Total iteration 1768, validation loss = 0.5457


Epoch 9:  80%|████████  | 160/200 [1:16:28<32:11, 48.28s/batch, loss=0.371]




Epoch 9:  82%|████████▏ | 164/200 [1:16:44<08:30, 14.19s/batch, loss=0.222]

Total iteration 1773, validation loss = 0.5456


Epoch 9:  82%|████████▎ | 165/200 [1:18:54<28:53, 49.54s/batch, loss=0.357]




Epoch 9:  84%|████████▍ | 169/200 [1:19:09<07:25, 14.38s/batch, loss=0.349]

Total iteration 1778, validation loss = 0.5452


Epoch 9:  85%|████████▌ | 170/200 [1:21:08<23:18, 46.61s/batch, loss=0.509]




Epoch 9:  87%|████████▋ | 174/200 [1:21:24<05:56, 13.70s/batch, loss=0.342]

Total iteration 1783, validation loss = 0.5463


Epoch 9:  88%|████████▊ | 175/200 [1:23:28<19:50, 47.62s/batch, loss=0.323]




Epoch 9:  90%|████████▉ | 179/200 [1:23:44<04:52, 13.95s/batch, loss=0.67] 

Total iteration 1788, validation loss = 0.5457


Epoch 9:  90%|█████████ | 180/200 [1:25:49<15:57, 47.89s/batch, loss=0.44]




Epoch 9:  92%|█████████▏| 184/200 [1:26:05<03:43, 14.00s/batch, loss=0.504]

Total iteration 1793, validation loss = 0.5416


Epoch 9:  92%|█████████▎| 185/200 [1:28:10<12:04, 48.28s/batch, loss=0.519]




Epoch 9:  94%|█████████▍| 189/200 [1:28:26<02:34, 14.09s/batch, loss=0.413]

Total iteration 1798, validation loss = 0.5410


Epoch 9:  95%|█████████▌| 190/200 [1:30:31<08:00, 48.01s/batch, loss=0.409]




Epoch 9:  97%|█████████▋| 194/200 [1:30:47<01:24, 14.05s/batch, loss=0.51] 

Total iteration 1803, validation loss = 0.5416


Epoch 9:  98%|█████████▊| 195/200 [1:32:52<04:00, 48.15s/batch, loss=0.566]




Epoch 9: 100%|█████████▉| 199/200 [1:32:57<00:12, 12.36s/batch, loss=0.431]

Total iteration 1808, validation loss = 0.5449


Epoch 9: 100%|██████████| 200/200 [1:35:03<00:00, 28.52s/batch, loss=0.632]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 10:   2%|▏         | 4/200 [00:28<16:27,  5.04s/batch, loss=0.437]

Total iteration 1814, validation loss = 0.5468


Epoch 10:   2%|▎         | 5/200 [02:32<2:38:42, 48.83s/batch, loss=0.313]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:   4%|▍         | 9/200 [02:48<40:44, 12.80s/batch, loss=0.549]  

Total iteration 1819, validation loss = 0.5464


Epoch 10:   5%|▌         | 10/200 [04:49<2:29:06, 47.09s/batch, loss=0.403]




Epoch 10:   7%|▋         | 14/200 [05:05<42:11, 13.61s/batch, loss=0.389]  

Total iteration 1824, validation loss = 0.5439


Epoch 10:   8%|▊         | 15/200 [07:19<2:36:35, 50.79s/batch, loss=0.356]




Epoch 10:  10%|▉         | 19/200 [07:36<44:18, 14.69s/batch, loss=0.377]  

Total iteration 1829, validation loss = 0.5415


Epoch 10:  10%|█         | 20/200 [09:49<2:33:09, 51.05s/batch, loss=0.34]




Epoch 10:  12%|█▏        | 24/200 [10:05<43:34, 14.86s/batch, loss=0.43]   

Total iteration 1834, validation loss = 0.5407


Epoch 10:  12%|█▎        | 25/200 [12:12<2:23:42, 49.27s/batch, loss=0.348]




Epoch 10:  14%|█▍        | 29/200 [12:28<41:03, 14.41s/batch, loss=0.496]  

Total iteration 1839, validation loss = 0.5433


Epoch 10:  15%|█▌        | 30/200 [14:34<2:18:00, 48.71s/batch, loss=0.496]




Epoch 10:  17%|█▋        | 34/200 [14:51<39:14, 14.18s/batch, loss=0.59]   

Total iteration 1844, validation loss = 0.5449


Epoch 10:  18%|█▊        | 35/200 [17:00<2:16:37, 49.68s/batch, loss=0.243]




Epoch 10:  20%|█▉        | 39/200 [17:16<38:39, 14.41s/batch, loss=0.255]  

Total iteration 1849, validation loss = 0.5396


Epoch 10:  20%|██        | 40/200 [19:25<2:12:36, 49.73s/batch, loss=0.278]




Epoch 10:  22%|██▏       | 44/200 [19:42<37:51, 14.56s/batch, loss=0.316]  

Total iteration 1854, validation loss = 0.5379


Epoch 10:  22%|██▎       | 45/200 [21:48<2:05:55, 48.74s/batch, loss=0.477]




Epoch 10:  24%|██▍       | 49/200 [22:04<35:57, 14.29s/batch, loss=0.471]  

Total iteration 1859, validation loss = 0.5380


Epoch 10:  25%|██▌       | 50/200 [24:15<2:05:20, 50.14s/batch, loss=0.67]




Epoch 10:  27%|██▋       | 54/200 [24:31<35:26, 14.56s/batch, loss=0.464]  

Total iteration 1864, validation loss = 0.5391


Epoch 10:  28%|██▊       | 55/200 [26:35<1:56:09, 48.07s/batch, loss=0.486]




Epoch 10:  30%|██▉       | 59/200 [26:51<33:11, 14.12s/batch, loss=0.506]  

Total iteration 1869, validation loss = 0.5375


Epoch 10:  30%|███       | 60/200 [28:55<1:52:04, 48.03s/batch, loss=0.334]




Epoch 10:  32%|███▏      | 64/200 [29:11<31:50, 14.05s/batch, loss=0.354]  

Total iteration 1874, validation loss = 0.5364


Epoch 10:  32%|███▎      | 65/200 [31:16<1:47:54, 47.96s/batch, loss=0.354]




Epoch 10:  34%|███▍      | 69/200 [31:32<30:37, 14.03s/batch, loss=0.666]  

Total iteration 1879, validation loss = 0.5359


Epoch 10:  35%|███▌      | 70/200 [33:40<1:46:18, 49.06s/batch, loss=0.601]




Epoch 10:  37%|███▋      | 74/200 [33:56<30:02, 14.31s/batch, loss=0.376]  

Total iteration 1884, validation loss = 0.5364


Epoch 10:  38%|███▊      | 75/200 [36:04<1:42:28, 49.19s/batch, loss=0.456]




Epoch 10:  40%|███▉      | 79/200 [36:20<28:50, 14.31s/batch, loss=0.359]  

Total iteration 1889, validation loss = 0.5369


Epoch 10:  40%|████      | 80/200 [38:29<1:39:10, 49.59s/batch, loss=0.437]




Epoch 10:  42%|████▏     | 84/200 [38:45<28:04, 14.52s/batch, loss=0.44]   

Total iteration 1894, validation loss = 0.5368


Epoch 10:  42%|████▎     | 85/200 [40:58<1:37:03, 50.64s/batch, loss=0.463]




Epoch 10:  44%|████▍     | 89/200 [41:14<27:06, 14.66s/batch, loss=0.518]  

Total iteration 1899, validation loss = 0.5372


Epoch 10:  45%|████▌     | 90/200 [43:17<1:28:19, 48.18s/batch, loss=0.347]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:  47%|████▋     | 94/200 [43:34<25:03, 14.18s/batch, loss=0.427]

Total iteration 1904, validation loss = 0.5390


Epoch 10:  48%|████▊     | 95/200 [45:38<1:23:51, 47.92s/batch, loss=0.506]




Epoch 10:  50%|████▉     | 99/200 [45:54<23:32, 13.99s/batch, loss=0.538]  

Total iteration 1909, validation loss = 0.5385


Epoch 10:  50%|█████     | 100/200 [47:56<1:18:41, 47.21s/batch, loss=0.437]




Epoch 10:  52%|█████▏    | 104/200 [48:11<22:06, 13.81s/batch, loss=0.617]  

Total iteration 1914, validation loss = 0.5382


Epoch 10:  52%|█████▎    | 105/200 [50:20<1:17:39, 49.04s/batch, loss=0.385]




Epoch 10:  55%|█████▍    | 109/200 [50:36<21:40, 14.30s/batch, loss=0.242]  

Total iteration 1919, validation loss = 0.5387


Epoch 10:  55%|█████▌    | 110/200 [52:45<1:14:17, 49.53s/batch, loss=0.385]




Epoch 10:  57%|█████▋    | 114/200 [53:01<20:36, 14.38s/batch, loss=0.517]  

Total iteration 1924, validation loss = 0.5389


Epoch 10:  57%|█████▊    | 115/200 [55:07<1:08:47, 48.56s/batch, loss=0.397]




Epoch 10:  60%|█████▉    | 119/200 [55:22<19:04, 14.13s/batch, loss=0.337]  

Total iteration 1929, validation loss = 0.5380


Epoch 10:  60%|██████    | 120/200 [57:29<1:04:57, 48.72s/batch, loss=0.543]




Epoch 10:  62%|██████▏   | 124/200 [57:46<18:06, 14.30s/batch, loss=0.398]  

Total iteration 1934, validation loss = 0.5378


Epoch 10:  62%|██████▎   | 125/200 [59:56<1:02:21, 49.88s/batch, loss=0.538]




Epoch 10:  64%|██████▍   | 129/200 [1:00:12<17:08, 14.48s/batch, loss=0.456]

Total iteration 1939, validation loss = 0.5393


Epoch 10:  65%|██████▌   | 130/200 [1:02:19<57:16, 49.09s/batch, loss=0.513]




Epoch 10:  67%|██████▋   | 134/200 [1:02:36<15:52, 14.43s/batch, loss=0.405]

Total iteration 1944, validation loss = 0.5421


Epoch 10:  68%|██████▊   | 135/200 [1:04:35<50:27, 46.57s/batch, loss=0.618]




Epoch 10:  70%|██████▉   | 139/200 [1:04:51<13:54, 13.68s/batch, loss=0.437]

Total iteration 1949, validation loss = 0.5465


Epoch 10:  70%|███████   | 140/200 [1:06:52<46:50, 46.85s/batch, loss=0.484]




Epoch 10:  72%|███████▏  | 144/200 [1:07:08<12:51, 13.78s/batch, loss=0.502]

Total iteration 1954, validation loss = 0.5465


Epoch 10:  72%|███████▎  | 145/200 [1:09:10<43:08, 47.06s/batch, loss=0.529]




Epoch 10:  74%|███████▍  | 149/200 [1:09:26<11:44, 13.81s/batch, loss=0.346]

Total iteration 1959, validation loss = 0.5431


Epoch 10:  75%|███████▌  | 150/200 [1:11:29<39:28, 47.37s/batch, loss=0.429]




Epoch 10:  77%|███████▋  | 154/200 [1:11:45<10:38, 13.89s/batch, loss=0.471]

Total iteration 1964, validation loss = 0.5403


Epoch 10:  78%|███████▊  | 155/200 [1:13:55<37:08, 49.52s/batch, loss=0.513]




Epoch 10:  80%|███████▉  | 159/200 [1:14:11<09:51, 14.42s/batch, loss=0.355]

Total iteration 1969, validation loss = 0.5395


Epoch 10:  80%|████████  | 160/200 [1:16:23<33:40, 50.52s/batch, loss=0.693]




Epoch 10:  82%|████████▏ | 164/200 [1:16:39<08:46, 14.61s/batch, loss=0.412]

Total iteration 1974, validation loss = 0.5407


Epoch 10:  82%|████████▎ | 165/200 [1:18:50<29:25, 50.44s/batch, loss=0.509]




Epoch 10:  84%|████████▍ | 169/200 [1:19:06<07:32, 14.61s/batch, loss=0.482]

Total iteration 1979, validation loss = 0.5425


Epoch 10:  85%|████████▌ | 170/200 [1:21:12<24:23, 48.77s/batch, loss=0.405]




Epoch 10:  87%|████████▋ | 174/200 [1:21:27<06:08, 14.19s/batch, loss=0.467]

Total iteration 1984, validation loss = 0.5437


Epoch 10:  88%|████████▊ | 175/200 [1:23:36<20:35, 49.43s/batch, loss=0.485]




Epoch 10:  90%|████████▉ | 179/200 [1:23:53<05:04, 14.49s/batch, loss=0.582]

Total iteration 1989, validation loss = 0.5416


Epoch 10:  90%|█████████ | 180/200 [1:25:58<16:06, 48.33s/batch, loss=0.533]




Epoch 10:  92%|█████████▏| 184/200 [1:26:14<03:47, 14.20s/batch, loss=0.416]

Total iteration 1994, validation loss = 0.5416


Epoch 10:  92%|█████████▎| 185/200 [1:28:16<11:51, 47.47s/batch, loss=0.355]




Epoch 10:  94%|█████████▍| 189/200 [1:28:33<02:33, 13.99s/batch, loss=0.457]

Total iteration 1999, validation loss = 0.5435


Epoch 10:  95%|█████████▌| 190/200 [1:30:36<07:54, 47.43s/batch, loss=0.546]




Epoch 10:  97%|█████████▋| 194/200 [1:30:52<01:23, 13.93s/batch, loss=0.62] 

Total iteration 2004, validation loss = 0.5410


Epoch 10:  98%|█████████▊| 195/200 [1:32:54<03:55, 47.19s/batch, loss=0.43]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10: 100%|█████████▉| 199/200 [1:32:59<00:12, 12.13s/batch, loss=0.129]

Total iteration 2009, validation loss = 0.5382


Epoch 10: 100%|██████████| 200/200 [1:35:04<00:00, 28.52s/batch, loss=0.377]







In [11]:
torch.save(model2.state_dict(), '../runs/experiment_att/experiment_final_model.pt')

In [None]:
# a = torch.randn(4, 40, 512, 512)
# print(a.size())
# transforms.Resize(size=(256, 256))(a).size()

# Experiment 2: Residual/Skip Connection after Self-Attention

In [17]:
# Make log directory and checkpoint directory (DIFFERENT DIRECTORY FROM BASELINE AND EXPERIMENT 1)
dir_nm = datetime.now(tz=pytz.utc).astimezone(timezone('US/Pacific')).strftime('%Y-%m-%d_%H-%M-%S')
# dir_nm = "first_mini_c2fc2"
# log_dir = os.path.join('../runs/baseline', dir_nm) # running from this notebook since the other one gives cuda memory errors
log_dir = os.path.join('../runs/experiment_res', dir_nm)
os.mkdir(log_dir)
os.mkdir(os.path.join(log_dir, 'Checkpoints'))


# Model, optimizer, criterion
# model = baseline_3DCNN(in_num_ch=1)
model3 = resattn_3DCNN(in_num_ch=1)
optimizer3 = optim.Adam(model3.parameters(), lr = 1e-4)
criterion3 = torch.nn.BCEWithLogitsLoss()

In [18]:
# Experimental model
train_loss_dict3, val_loss_dict3 = train(model3, optimizer3, criterion3, loader_train, loader_val, log_dir, device=device, epochs=10, val_every=5)

Epoch 1:   2%|▏         | 4/200 [00:20<11:57,  3.66s/batch, loss=0.698]

Total iteration 5, validation loss = 0.6742


Epoch 1:   2%|▎         | 5/200 [02:04<2:11:56, 40.60s/batch, loss=0.754]




Epoch 1:   4%|▍         | 9/200 [02:17<34:20, 10.79s/batch, loss=0.722]  

Total iteration 10, validation loss = 0.6685


Epoch 1:   5%|▌         | 10/200 [04:00<2:06:04, 39.81s/batch, loss=0.669]




Epoch 1:   7%|▋         | 14/200 [04:12<35:03, 11.31s/batch, loss=0.8]    

Total iteration 15, validation loss = 0.6655


Epoch 1:   8%|▊         | 15/200 [05:57<2:03:37, 40.09s/batch, loss=0.705]




Epoch 1:  10%|▉         | 19/200 [06:10<34:51, 11.55s/batch, loss=0.585]  

Total iteration 20, validation loss = 0.6775


Epoch 1:  10%|█         | 20/200 [07:58<2:03:43, 41.24s/batch, loss=0.759]




Epoch 1:  12%|█▏        | 24/200 [08:11<34:40, 11.82s/batch, loss=0.674]  

Total iteration 25, validation loss = 0.6782


Epoch 1:  12%|█▎        | 25/200 [09:54<1:56:31, 39.95s/batch, loss=0.636]




Epoch 1:  14%|█▍        | 29/200 [10:06<32:50, 11.52s/batch, loss=0.692]  

Total iteration 30, validation loss = 0.6813


Epoch 1:  15%|█▌        | 30/200 [11:50<1:52:52, 39.84s/batch, loss=0.696]




Epoch 1:  17%|█▋        | 34/200 [12:01<31:37, 11.43s/batch, loss=0.631]  

Total iteration 35, validation loss = 0.6725


Epoch 1:  18%|█▊        | 35/200 [13:44<1:48:26, 39.43s/batch, loss=0.642]




Epoch 1:  20%|█▉        | 39/200 [13:58<30:52, 11.50s/batch, loss=0.6]    

Total iteration 40, validation loss = 0.6598


Epoch 1:  20%|██        | 40/200 [15:34<1:40:27, 37.67s/batch, loss=0.75]




Epoch 1:  22%|██▏       | 44/200 [15:46<28:24, 10.93s/batch, loss=0.601]  

Total iteration 45, validation loss = 0.6417


Epoch 1:  22%|██▎       | 45/200 [17:19<1:33:35, 36.23s/batch, loss=0.586]




Epoch 1:  24%|██▍       | 49/200 [17:31<26:39, 10.59s/batch, loss=0.702]  

Total iteration 50, validation loss = 0.6309


Epoch 1:  25%|██▌       | 50/200 [19:02<1:27:58, 35.19s/batch, loss=0.651]




Epoch 1:  27%|██▋       | 54/200 [19:14<25:21, 10.42s/batch, loss=0.678]  

Total iteration 55, validation loss = 0.6377


Epoch 1:  28%|██▊       | 55/200 [20:45<1:24:33, 34.99s/batch, loss=0.708]




Epoch 1:  30%|██▉       | 59/200 [20:56<24:05, 10.25s/batch, loss=0.675]  

Total iteration 60, validation loss = 0.6429


Epoch 1:  30%|███       | 60/200 [22:30<1:23:47, 35.91s/batch, loss=0.738]




Epoch 1:  32%|███▏      | 64/200 [22:44<24:16, 10.71s/batch, loss=0.717]  

Total iteration 65, validation loss = 0.6409


Epoch 1:  32%|███▎      | 65/200 [24:24<1:26:12, 38.32s/batch, loss=0.695]




Epoch 1:  34%|███▍      | 69/200 [24:36<24:10, 11.07s/batch, loss=0.679]  

Total iteration 70, validation loss = 0.6453


Epoch 1:  35%|███▌      | 70/200 [26:18<1:24:02, 38.79s/batch, loss=0.691]




Epoch 1:  37%|███▋      | 74/200 [26:31<23:38, 11.26s/batch, loss=0.558]  

Total iteration 75, validation loss = 0.6472


Epoch 1:  38%|███▊      | 75/200 [28:07<1:18:15, 37.56s/batch, loss=0.669]




Epoch 1:  40%|███▉      | 79/200 [28:20<22:03, 10.94s/batch, loss=0.739]  

Total iteration 80, validation loss = 0.6432


Epoch 1:  40%|████      | 80/200 [30:00<1:16:41, 38.35s/batch, loss=0.633]




Epoch 1:  42%|████▏     | 84/200 [30:13<21:36, 11.18s/batch, loss=0.748]  

Total iteration 85, validation loss = 0.6441


Epoch 1:  42%|████▎     | 85/200 [31:57<1:16:30, 39.92s/batch, loss=0.739]




Epoch 1:  44%|████▍     | 89/200 [32:11<21:55, 11.86s/batch, loss=0.66]   

Total iteration 90, validation loss = 0.6365


Epoch 1:  45%|████▌     | 90/200 [33:50<1:10:52, 38.66s/batch, loss=0.681]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 95, validation loss = 0.6288


Epoch 1:  48%|████▊     | 95/200 [35:43<1:07:33, 38.60s/batch, loss=0.687]




Epoch 1:  50%|████▉     | 99/200 [35:55<18:47, 11.17s/batch, loss=0.576]  

Total iteration 100, validation loss = 0.6271


Epoch 1:  50%|█████     | 100/200 [37:33<1:02:49, 37.69s/batch, loss=0.556]




Epoch 1:  52%|█████▏    | 104/200 [37:45<17:32, 10.97s/batch, loss=0.664]  

Total iteration 105, validation loss = 0.6358


Epoch 1:  52%|█████▎    | 105/200 [39:26<1:01:20, 38.74s/batch, loss=0.619]




Epoch 1:  55%|█████▍    | 109/200 [39:40<17:26, 11.50s/batch, loss=0.639]  

Total iteration 110, validation loss = 0.6404


Epoch 1:  55%|█████▌    | 110/200 [41:21<58:40, 39.12s/batch, loss=0.743]




Epoch 1:  57%|█████▋    | 114/200 [41:35<16:29, 11.50s/batch, loss=0.695]

Total iteration 115, validation loss = 0.6418


Epoch 1:  57%|█████▊    | 115/200 [43:20<56:56, 40.20s/batch, loss=0.635]




Epoch 1:  60%|█████▉    | 119/200 [43:32<15:33, 11.52s/batch, loss=0.665]

Total iteration 120, validation loss = 0.6450


Epoch 1:  60%|██████    | 120/200 [45:15<52:53, 39.67s/batch, loss=0.621]




Epoch 1:  62%|██████▏   | 124/200 [45:28<14:41, 11.60s/batch, loss=0.594]

Total iteration 125, validation loss = 0.6360


Epoch 1:  62%|██████▎   | 125/200 [47:00<45:36, 36.48s/batch, loss=0.595]




Epoch 1:  64%|██████▍   | 129/200 [47:12<12:40, 10.70s/batch, loss=0.672]

Total iteration 130, validation loss = 0.6249


Epoch 1:  65%|██████▌   | 130/200 [48:44<41:35, 35.65s/batch, loss=0.593]




Epoch 1:  67%|██████▋   | 134/200 [48:55<11:18, 10.28s/batch, loss=0.573]

Total iteration 135, validation loss = 0.6143


Epoch 1:  68%|██████▊   | 135/200 [50:33<40:11, 37.10s/batch, loss=0.498]




Epoch 1:  70%|██████▉   | 139/200 [50:46<11:16, 11.09s/batch, loss=0.577]

Total iteration 140, validation loss = 0.6126


Epoch 1:  70%|███████   | 140/200 [52:25<37:59, 37.99s/batch, loss=0.675]




Epoch 1:  72%|███████▏  | 144/200 [52:37<10:16, 11.00s/batch, loss=0.58] 

Total iteration 145, validation loss = 0.6196


Epoch 1:  72%|███████▎  | 145/200 [54:13<34:04, 37.16s/batch, loss=0.758]




Epoch 1:  74%|███████▍  | 149/200 [54:25<09:08, 10.76s/batch, loss=0.604]

Total iteration 150, validation loss = 0.6084


Epoch 1:  75%|███████▌  | 150/200 [56:06<31:53, 38.28s/batch, loss=0.625]




Epoch 1:  77%|███████▋  | 154/200 [56:17<08:28, 11.07s/batch, loss=0.623]

Total iteration 155, validation loss = 0.6110


Epoch 1:  78%|███████▊  | 155/200 [57:55<28:09, 37.55s/batch, loss=0.635]




Epoch 1:  80%|███████▉  | 159/200 [58:08<07:40, 11.22s/batch, loss=0.578]

Total iteration 160, validation loss = 0.6161


Epoch 1:  80%|████████  | 160/200 [59:47<25:16, 37.91s/batch, loss=0.709]




Epoch 1:  82%|████████▏ | 164/200 [59:59<06:40, 11.14s/batch, loss=0.608]

Total iteration 165, validation loss = 0.6150


Epoch 1:  82%|████████▎ | 165/200 [1:01:42<22:53, 39.23s/batch, loss=0.643]




Epoch 1:  84%|████████▍ | 169/200 [1:01:55<05:58, 11.55s/batch, loss=0.65] 

Total iteration 170, validation loss = 0.6065


Epoch 1:  85%|████████▌ | 170/200 [1:03:41<20:14, 40.48s/batch, loss=0.78]




Epoch 1:  87%|████████▋ | 174/200 [1:03:54<05:03, 11.67s/batch, loss=0.594]

Total iteration 175, validation loss = 0.6066


Epoch 1:  88%|████████▊ | 175/200 [1:05:38<16:39, 39.99s/batch, loss=0.544]




Epoch 1:  90%|████████▉ | 179/200 [1:05:50<04:00, 11.44s/batch, loss=0.552]

Total iteration 180, validation loss = 0.6055


Epoch 1:  90%|█████████ | 180/200 [1:07:34<13:15, 39.78s/batch, loss=0.552]




Epoch 1:  92%|█████████▏| 184/200 [1:07:46<03:05, 11.56s/batch, loss=0.718]

Total iteration 185, validation loss = 0.6032


Epoch 1:  92%|█████████▎| 185/200 [1:09:31<10:03, 40.23s/batch, loss=0.68] 




Epoch 1:  94%|█████████▍| 189/200 [1:09:44<02:11, 11.94s/batch, loss=0.553]

Total iteration 190, validation loss = 0.6057


Epoch 1:  95%|█████████▌| 190/200 [1:11:32<06:50, 41.03s/batch, loss=0.626]




Epoch 1:  97%|█████████▋| 194/200 [1:11:44<01:10, 11.72s/batch, loss=0.664]

Total iteration 195, validation loss = 0.5996


Epoch 1:  98%|█████████▊| 195/200 [1:13:28<03:20, 40.05s/batch, loss=0.754]




Epoch 1: 100%|█████████▉| 199/200 [1:13:32<00:10, 10.37s/batch, loss=0.726]

Total iteration 200, validation loss = 0.6014


Epoch 1: 100%|██████████| 200/200 [1:15:13<00:00, 22.57s/batch, loss=0.628]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 2:   2%|▏         | 4/200 [00:20<12:06,  3.71s/batch, loss=0.745]

Total iteration 206, validation loss = 0.6091


Epoch 2:   2%|▎         | 5/200 [02:03<2:10:11, 40.06s/batch, loss=0.548]




Epoch 2:   4%|▍         | 9/200 [02:14<32:40, 10.26s/batch, loss=0.608]  

Total iteration 211, validation loss = 0.6114


Epoch 2:   5%|▌         | 10/200 [04:03<2:10:36, 41.24s/batch, loss=0.714]




Epoch 2:   7%|▋         | 14/200 [04:16<36:24, 11.75s/batch, loss=0.487]  

Total iteration 216, validation loss = 0.6125


Epoch 2:   8%|▊         | 15/200 [06:00<2:03:53, 40.18s/batch, loss=0.683]




Epoch 2:  10%|▉         | 19/200 [06:12<34:55, 11.58s/batch, loss=0.751]  

Total iteration 221, validation loss = 0.6195


Epoch 2:  10%|█         | 20/200 [07:57<2:00:25, 40.14s/batch, loss=0.713]




Epoch 2:  12%|█▏        | 24/200 [08:09<34:04, 11.61s/batch, loss=0.84]   

Total iteration 226, validation loss = 0.6125


Epoch 2:  12%|█▎        | 25/200 [09:55<1:58:17, 40.56s/batch, loss=0.549]




Epoch 2:  14%|█▍        | 29/200 [10:07<33:15, 11.67s/batch, loss=0.673]  

Total iteration 231, validation loss = 0.6218


Epoch 2:  15%|█▌        | 30/200 [11:53<1:54:36, 40.45s/batch, loss=0.689]




Epoch 2:  17%|█▋        | 34/200 [12:06<32:25, 11.72s/batch, loss=0.64]   

Total iteration 236, validation loss = 0.6263


Epoch 2:  18%|█▊        | 35/200 [13:45<1:46:26, 38.70s/batch, loss=0.663]




Epoch 2:  20%|█▉        | 39/200 [13:58<30:05, 11.22s/batch, loss=0.658]  

Total iteration 241, validation loss = 0.6333


Epoch 2:  20%|██        | 40/200 [15:47<1:49:47, 41.17s/batch, loss=0.748]




Epoch 2:  22%|██▏       | 44/200 [16:00<30:47, 11.84s/batch, loss=0.489]  

Total iteration 246, validation loss = 0.6370


Epoch 2:  22%|██▎       | 45/200 [17:38<1:39:49, 38.64s/batch, loss=0.727]




Epoch 2:  24%|██▍       | 49/200 [17:51<28:15, 11.23s/batch, loss=0.66]   

Total iteration 251, validation loss = 0.6471


Epoch 2:  25%|██▌       | 50/200 [19:35<1:39:19, 39.73s/batch, loss=0.71]




Epoch 2:  27%|██▋       | 54/200 [19:47<28:17, 11.63s/batch, loss=0.662]  

Total iteration 256, validation loss = 0.6531


Epoch 2:  28%|██▊       | 55/200 [21:29<1:34:43, 39.20s/batch, loss=0.657]




Epoch 2:  30%|██▉       | 59/200 [21:42<26:31, 11.28s/batch, loss=0.645]  

Total iteration 261, validation loss = 0.6441


Epoch 2:  30%|███       | 60/200 [23:28<1:35:02, 40.73s/batch, loss=0.656]




Epoch 2:  32%|███▏      | 64/200 [23:41<26:37, 11.75s/batch, loss=0.696]  

Total iteration 266, validation loss = 0.6367


Epoch 2:  32%|███▎      | 65/200 [25:29<1:33:09, 41.41s/batch, loss=0.62]




Epoch 2:  34%|███▍      | 69/200 [25:42<26:04, 11.94s/batch, loss=0.641]  

Total iteration 271, validation loss = 0.6324


Epoch 2:  35%|███▌      | 70/200 [27:20<1:23:05, 38.35s/batch, loss=0.609]




Epoch 2:  37%|███▋      | 74/200 [27:33<23:55, 11.39s/batch, loss=0.599]  

Total iteration 276, validation loss = 0.6226


Epoch 2:  38%|███▊      | 75/200 [29:19<1:23:45, 40.21s/batch, loss=0.57]




Epoch 2:  40%|███▉      | 79/200 [29:32<23:45, 11.78s/batch, loss=0.75]   

Total iteration 281, validation loss = 0.6181


Epoch 2:  40%|████      | 80/200 [31:17<1:20:52, 40.44s/batch, loss=0.64]




Epoch 2:  42%|████▏     | 84/200 [31:29<22:36, 11.69s/batch, loss=0.756]  

Total iteration 286, validation loss = 0.6282


Epoch 2:  42%|████▎     | 85/200 [33:14<1:17:02, 40.19s/batch, loss=0.572]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:  44%|████▍     | 89/200 [33:28<21:50, 11.80s/batch, loss=0.64] 

Total iteration 291, validation loss = 0.6470


Epoch 2:  45%|████▌     | 90/200 [35:12<1:13:41, 40.20s/batch, loss=0.695]




Epoch 2:  47%|████▋     | 94/200 [35:24<20:32, 11.62s/batch, loss=0.703]  

Total iteration 296, validation loss = 0.6351


Epoch 2:  48%|████▊     | 95/200 [37:12<1:11:39, 40.95s/batch, loss=0.66]




Epoch 2:  50%|████▉     | 99/200 [37:25<19:44, 11.73s/batch, loss=0.709]  

Total iteration 301, validation loss = 0.6374


Epoch 2:  50%|█████     | 100/200 [39:12<1:08:44, 41.25s/batch, loss=0.644]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 2:  52%|█████▏    | 104/200 [39:25<18:59, 11.87s/batch, loss=0.654]

Total iteration 306, validation loss = 0.6363


Epoch 2:  52%|█████▎    | 105/200 [41:13<1:05:54, 41.62s/batch, loss=0.735]




Epoch 2:  55%|█████▍    | 109/200 [41:26<18:29, 12.20s/batch, loss=0.676]  

Total iteration 311, validation loss = 0.6419


Epoch 2:  55%|█████▌    | 110/200 [43:11<1:00:34, 40.38s/batch, loss=0.689]




Epoch 2:  57%|█████▋    | 114/200 [43:24<17:04, 11.92s/batch, loss=0.565]  

Total iteration 316, validation loss = 0.6444


Epoch 2:  57%|█████▊    | 115/200 [45:12<58:36, 41.38s/batch, loss=0.649]




Epoch 2:  60%|█████▉    | 119/200 [45:26<16:17, 12.06s/batch, loss=0.798]

Total iteration 321, validation loss = 0.6386


Epoch 2:  60%|██████    | 120/200 [47:09<53:40, 40.25s/batch, loss=0.732]




Epoch 2:  62%|██████▏   | 124/200 [47:22<14:36, 11.54s/batch, loss=0.714]

Total iteration 326, validation loss = 0.6285


Epoch 2:  62%|██████▎   | 125/200 [49:07<50:38, 40.51s/batch, loss=0.65] 




Epoch 2:  64%|██████▍   | 129/200 [49:20<14:00, 11.84s/batch, loss=0.564]

Total iteration 331, validation loss = 0.6243


Epoch 2:  65%|██████▌   | 130/200 [50:58<44:26, 38.09s/batch, loss=0.674]




Epoch 2:  67%|██████▋   | 134/200 [51:10<12:19, 11.20s/batch, loss=0.564]

Total iteration 336, validation loss = 0.6173


Epoch 2:  68%|██████▊   | 135/200 [52:52<42:10, 38.93s/batch, loss=0.659]




Epoch 2:  70%|██████▉   | 139/200 [53:05<11:33, 11.36s/batch, loss=0.609]

Total iteration 341, validation loss = 0.6133


Epoch 2:  70%|███████   | 140/200 [54:51<40:20, 40.34s/batch, loss=0.655]




Epoch 2:  72%|███████▏  | 144/200 [55:04<10:54, 11.69s/batch, loss=0.649]

Total iteration 346, validation loss = 0.6104


Epoch 2:  72%|███████▎  | 145/200 [56:47<36:27, 39.77s/batch, loss=0.662]




Epoch 2:  74%|███████▍  | 149/200 [57:01<10:04, 11.86s/batch, loss=0.736]

Total iteration 351, validation loss = 0.6094


Epoch 2:  75%|███████▌  | 150/200 [58:46<33:49, 40.59s/batch, loss=0.663]




Epoch 2:  77%|███████▋  | 154/200 [58:59<09:06, 11.89s/batch, loss=0.682]

Total iteration 356, validation loss = 0.6196


Epoch 2:  78%|███████▊  | 155/200 [1:00:44<30:22, 40.50s/batch, loss=0.719]




Epoch 2:  80%|███████▉  | 159/200 [1:00:57<08:00, 11.72s/batch, loss=0.712]

Total iteration 361, validation loss = 0.6196


Epoch 2:  80%|████████  | 160/200 [1:02:41<26:37, 39.93s/batch, loss=0.612]




Epoch 2:  82%|████████▏ | 164/200 [1:02:55<07:04, 11.80s/batch, loss=0.597]

Total iteration 366, validation loss = 0.6142


Epoch 2:  82%|████████▎ | 165/200 [1:04:40<23:39, 40.56s/batch, loss=0.646]




Epoch 2:  84%|████████▍ | 169/200 [1:04:54<06:04, 11.74s/batch, loss=0.647]

Total iteration 371, validation loss = 0.6166


Epoch 2:  85%|████████▌ | 170/200 [1:06:37<19:56, 39.88s/batch, loss=0.593]




Epoch 2:  87%|████████▋ | 174/200 [1:06:50<05:05, 11.77s/batch, loss=0.655]

Total iteration 376, validation loss = 0.6138


Epoch 2:  88%|████████▊ | 175/200 [1:08:39<17:18, 41.55s/batch, loss=0.685]




Epoch 2:  90%|████████▉ | 179/200 [1:08:52<04:09, 11.89s/batch, loss=0.708]

Total iteration 381, validation loss = 0.6246


Epoch 2:  90%|█████████ | 180/200 [1:10:33<13:06, 39.31s/batch, loss=0.778]




Epoch 2:  92%|█████████▏| 184/200 [1:10:45<03:00, 11.29s/batch, loss=0.625]

Total iteration 386, validation loss = 0.6437


Epoch 2:  92%|█████████▎| 185/200 [1:12:25<09:40, 38.69s/batch, loss=0.642]




Epoch 2:  94%|█████████▍| 189/200 [1:12:38<02:04, 11.34s/batch, loss=0.657]

Total iteration 391, validation loss = 0.6293


Epoch 2:  95%|█████████▌| 190/200 [1:14:20<06:29, 38.98s/batch, loss=0.586]




Epoch 2:  97%|█████████▋| 194/200 [1:14:32<01:08, 11.43s/batch, loss=0.567]

Total iteration 396, validation loss = 0.6081


Epoch 2:  98%|█████████▊| 195/200 [1:16:14<03:14, 38.96s/batch, loss=0.531]




Epoch 2: 100%|█████████▉| 199/200 [1:16:18<00:10, 10.10s/batch, loss=0.536]

Total iteration 401, validation loss = 0.5998


Epoch 2: 100%|██████████| 200/200 [1:18:02<00:00, 23.41s/batch, loss=0.534]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 3:   2%|▏         | 4/200 [00:21<12:09,  3.72s/batch, loss=0.541]

Total iteration 407, validation loss = 0.5975


Epoch 3:   2%|▎         | 5/200 [02:00<2:07:14, 39.15s/batch, loss=0.64]




Epoch 3:   4%|▍         | 9/200 [02:13<32:33, 10.23s/batch, loss=0.592]  

Total iteration 412, validation loss = 0.5976


Epoch 3:   5%|▌         | 10/200 [03:53<2:01:47, 38.46s/batch, loss=0.476]




Epoch 3:   7%|▋         | 14/200 [04:05<34:10, 11.02s/batch, loss=0.558]  

Total iteration 417, validation loss = 0.6024


Epoch 3:   8%|▊         | 15/200 [05:50<2:03:18, 39.99s/batch, loss=0.642]




Epoch 3:  10%|▉         | 19/200 [06:02<34:38, 11.48s/batch, loss=0.608]  

Total iteration 422, validation loss = 0.6062


Epoch 3:  10%|█         | 20/200 [07:43<1:56:48, 38.94s/batch, loss=0.724]




Epoch 3:  12%|█▏        | 24/200 [07:56<33:23, 11.38s/batch, loss=0.602]  

Total iteration 427, validation loss = 0.6130


Epoch 3:  12%|█▎        | 25/200 [09:32<1:49:38, 37.59s/batch, loss=0.703]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 3:  14%|█▍        | 29/200 [09:44<31:03, 10.90s/batch, loss=0.647]  

Total iteration 432, validation loss = 0.6250


Epoch 3:  15%|█▌        | 30/200 [11:28<1:51:04, 39.20s/batch, loss=0.635]




Epoch 3:  17%|█▋        | 34/200 [11:39<31:12, 11.28s/batch, loss=0.721]  

Total iteration 437, validation loss = 0.6298


Epoch 3:  18%|█▊        | 35/200 [13:18<1:44:19, 37.94s/batch, loss=0.642]




Epoch 3:  20%|█▉        | 39/200 [13:29<29:19, 10.93s/batch, loss=0.68]   

Total iteration 442, validation loss = 0.6308


Epoch 3:  20%|██        | 40/200 [15:17<1:47:51, 40.44s/batch, loss=0.579]




Epoch 3:  22%|██▏       | 44/200 [15:29<30:19, 11.66s/batch, loss=0.617]  

Total iteration 447, validation loss = 0.6156


Epoch 3:  22%|██▎       | 45/200 [17:14<1:43:54, 40.22s/batch, loss=0.807]




Epoch 3:  24%|██▍       | 49/200 [17:25<29:05, 11.56s/batch, loss=0.641]  

Total iteration 452, validation loss = 0.6080


Epoch 3:  25%|██▌       | 50/200 [19:13<1:42:11, 40.88s/batch, loss=0.61]




Epoch 3:  27%|██▋       | 54/200 [19:26<29:04, 11.95s/batch, loss=0.618]  

Total iteration 457, validation loss = 0.6044


Epoch 3:  28%|██▊       | 55/200 [21:05<1:33:33, 38.72s/batch, loss=0.646]




Epoch 3:  30%|██▉       | 59/200 [21:17<26:05, 11.10s/batch, loss=0.663]  

Total iteration 462, validation loss = 0.6042


Epoch 3:  30%|███       | 60/200 [22:58<1:30:20, 38.72s/batch, loss=0.62]




Epoch 3:  32%|███▏      | 64/200 [23:10<25:12, 11.12s/batch, loss=0.646]  

Total iteration 467, validation loss = 0.6038


Epoch 3:  32%|███▎      | 65/200 [24:53<1:28:35, 39.38s/batch, loss=0.621]




Epoch 3:  34%|███▍      | 69/200 [25:06<25:01, 11.46s/batch, loss=0.591]  

Total iteration 472, validation loss = 0.6067


Epoch 3:  35%|███▌      | 70/200 [26:48<1:24:52, 39.18s/batch, loss=0.598]




Epoch 3:  37%|███▋      | 74/200 [27:00<23:46, 11.32s/batch, loss=0.563]  

Total iteration 477, validation loss = 0.6062


Epoch 3:  38%|███▊      | 75/200 [28:42<1:21:25, 39.09s/batch, loss=0.65]




Epoch 3:  40%|███▉      | 79/200 [28:53<22:38, 11.23s/batch, loss=0.616]  

Total iteration 482, validation loss = 0.5973


Epoch 3:  40%|████      | 80/200 [30:41<1:21:30, 40.76s/batch, loss=0.571]




Epoch 3:  42%|████▏     | 84/200 [30:54<22:46, 11.78s/batch, loss=0.59]   

Total iteration 487, validation loss = 0.5947


Epoch 3:  42%|████▎     | 85/200 [32:32<1:13:38, 38.42s/batch, loss=0.647]




Epoch 3:  44%|████▍     | 89/200 [32:45<20:50, 11.26s/batch, loss=0.729]  

Total iteration 492, validation loss = 0.5973


Epoch 3:  45%|████▌     | 90/200 [34:19<1:07:13, 36.67s/batch, loss=0.623]




Epoch 3:  47%|████▋     | 94/200 [34:31<19:00, 10.76s/batch, loss=0.553]  

Total iteration 497, validation loss = 0.5904


Epoch 3:  48%|████▊     | 95/200 [36:13<1:07:34, 38.61s/batch, loss=0.625]




Epoch 3:  50%|████▉     | 99/200 [36:24<18:33, 11.02s/batch, loss=0.615]  

Total iteration 502, validation loss = 0.5917


Epoch 3:  50%|█████     | 100/200 [38:05<1:04:15, 38.56s/batch, loss=0.588]




Epoch 3:  52%|█████▏    | 104/200 [38:17<17:56, 11.22s/batch, loss=0.589]  

Total iteration 507, validation loss = 0.5967


Epoch 3:  52%|█████▎    | 105/200 [39:58<1:00:54, 38.47s/batch, loss=0.644]




Epoch 3:  55%|█████▍    | 109/200 [40:10<16:52, 11.13s/batch, loss=0.662]  

Total iteration 512, validation loss = 0.6038


Epoch 3:  55%|█████▌    | 110/200 [41:48<56:56, 37.97s/batch, loss=0.625]




Epoch 3:  57%|█████▋    | 114/200 [42:02<16:12, 11.30s/batch, loss=0.681]

Total iteration 517, validation loss = 0.6082


Epoch 3:  57%|█████▊    | 115/200 [43:41<54:40, 38.60s/batch, loss=0.597]




Epoch 3:  60%|█████▉    | 119/200 [43:53<14:55, 11.06s/batch, loss=0.674]

Total iteration 522, validation loss = 0.6099


Epoch 3:  60%|██████    | 120/200 [45:37<52:54, 39.69s/batch, loss=0.599]




Epoch 3:  62%|██████▏   | 124/200 [45:49<14:21, 11.33s/batch, loss=0.677]

Total iteration 527, validation loss = 0.6059


Epoch 3:  62%|██████▎   | 125/200 [47:32<49:25, 39.54s/batch, loss=0.654]




Epoch 3:  64%|██████▍   | 129/200 [47:44<13:25, 11.34s/batch, loss=0.623]

Total iteration 532, validation loss = 0.6112


Epoch 3:  65%|██████▌   | 130/200 [49:23<44:41, 38.31s/batch, loss=0.609]




Epoch 3:  67%|██████▋   | 134/200 [49:36<12:18, 11.19s/batch, loss=0.665]

Total iteration 537, validation loss = 0.6169


Epoch 3:  68%|██████▊   | 135/200 [51:15<41:14, 38.07s/batch, loss=0.517]




Epoch 3:  70%|██████▉   | 139/200 [51:27<11:14, 11.06s/batch, loss=0.58] 

Total iteration 542, validation loss = 0.6146


Epoch 3:  70%|███████   | 140/200 [53:11<39:30, 39.51s/batch, loss=0.794]




Epoch 3:  72%|███████▏  | 144/200 [53:23<10:36, 11.37s/batch, loss=0.632]

Total iteration 547, validation loss = 0.6194


Epoch 3:  72%|███████▎  | 145/200 [55:04<35:30, 38.74s/batch, loss=0.694]




Epoch 3:  74%|███████▍  | 149/200 [55:15<09:30, 11.20s/batch, loss=0.654]

Total iteration 552, validation loss = 0.6210


Epoch 3:  75%|███████▌  | 150/200 [56:56<32:11, 38.62s/batch, loss=0.582]




Epoch 3:  77%|███████▋  | 154/200 [57:08<08:35, 11.20s/batch, loss=0.695]

Total iteration 557, validation loss = 0.6101


Epoch 3:  78%|███████▊  | 155/200 [58:50<29:16, 39.04s/batch, loss=0.717]




Epoch 3:  80%|███████▉  | 159/200 [59:02<07:40, 11.23s/batch, loss=0.619]

Total iteration 562, validation loss = 0.6029


Epoch 3:  80%|████████  | 160/200 [1:00:36<24:29, 36.73s/batch, loss=0.703]




Epoch 3:  82%|████████▏ | 164/200 [1:00:48<06:21, 10.59s/batch, loss=0.611]

Total iteration 567, validation loss = 0.5965


Epoch 3:  82%|████████▎ | 165/200 [1:02:28<22:15, 38.16s/batch, loss=0.697]




Epoch 3:  84%|████████▍ | 169/200 [1:02:39<05:38, 10.91s/batch, loss=0.64] 

Total iteration 572, validation loss = 0.5991


Epoch 3:  85%|████████▌ | 170/200 [1:04:06<17:01, 34.06s/batch, loss=0.568]




Epoch 3:  87%|████████▋ | 174/200 [1:04:17<04:19, 10.00s/batch, loss=0.469]

Total iteration 577, validation loss = 0.6073


Epoch 3:  88%|████████▊ | 175/200 [1:05:53<15:05, 36.21s/batch, loss=0.493]




Epoch 3:  90%|████████▉ | 179/200 [1:06:05<03:42, 10.60s/batch, loss=0.499]

Total iteration 582, validation loss = 0.6081


Epoch 3:  90%|█████████ | 180/200 [1:07:45<12:37, 37.88s/batch, loss=0.49] 




Epoch 3:  92%|█████████▏| 184/200 [1:07:57<02:55, 11.00s/batch, loss=0.566]

Total iteration 587, validation loss = 0.5963


Epoch 3:  92%|█████████▎| 185/200 [1:09:35<09:24, 37.66s/batch, loss=0.705]




Epoch 3:  94%|█████████▍| 189/200 [1:09:47<01:59, 10.88s/batch, loss=0.714]

Total iteration 592, validation loss = 0.5863


Epoch 3:  95%|█████████▌| 190/200 [1:11:27<06:24, 38.47s/batch, loss=0.58] 




Epoch 3:  97%|█████████▋| 194/200 [1:11:40<01:07, 11.17s/batch, loss=0.644]

Total iteration 597, validation loss = 0.5901


Epoch 3:  98%|█████████▊| 195/200 [1:13:21<03:14, 38.91s/batch, loss=0.613]




Epoch 3: 100%|█████████▉| 199/200 [1:13:26<00:10, 10.08s/batch, loss=0.548]

Total iteration 602, validation loss = 0.5902


Epoch 3: 100%|██████████| 200/200 [1:15:02<00:00, 22.51s/batch, loss=0.582]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 4:   2%|▏         | 4/200 [00:20<12:15,  3.75s/batch, loss=0.937]

Total iteration 608, validation loss = 0.5878


Epoch 4:   2%|▎         | 5/200 [01:56<2:03:21, 37.96s/batch, loss=0.653]




Epoch 4:   4%|▍         | 9/200 [02:08<31:19,  9.84s/batch, loss=0.65]   

Total iteration 613, validation loss = 0.5929


Epoch 4:   5%|▌         | 10/200 [03:51<2:03:58, 39.15s/batch, loss=0.545]




Epoch 4:   7%|▋         | 14/200 [04:04<34:23, 11.09s/batch, loss=0.587]  

Total iteration 618, validation loss = 0.5996


Epoch 4:   8%|▊         | 15/200 [05:40<1:55:44, 37.54s/batch, loss=0.595]




Epoch 4:  10%|▉         | 19/200 [05:52<32:43, 10.85s/batch, loss=0.651]  

Total iteration 623, validation loss = 0.6057


Epoch 4:  10%|█         | 20/200 [07:28<1:51:50, 37.28s/batch, loss=0.68]




Epoch 4:  12%|█▏        | 24/200 [07:41<32:25, 11.05s/batch, loss=0.611]  

Total iteration 628, validation loss = 0.6026


Epoch 4:  12%|█▎        | 25/200 [09:22<1:52:26, 38.55s/batch, loss=0.74]




Epoch 4:  14%|█▍        | 29/200 [09:34<31:50, 11.17s/batch, loss=0.651]  

Total iteration 633, validation loss = 0.6059


Epoch 4:  15%|█▌        | 30/200 [11:10<1:45:40, 37.30s/batch, loss=0.658]




Epoch 4:  17%|█▋        | 34/200 [11:23<30:37, 11.07s/batch, loss=0.545]  

Total iteration 638, validation loss = 0.6290


Epoch 4:  18%|█▊        | 35/200 [13:02<1:44:28, 37.99s/batch, loss=0.667]




Epoch 4:  20%|█▉        | 39/200 [13:14<29:45, 11.09s/batch, loss=0.62]   

Total iteration 643, validation loss = 0.6357


Epoch 4:  20%|██        | 40/200 [14:51<1:39:21, 37.26s/batch, loss=0.707]




Epoch 4:  22%|██▏       | 44/200 [15:04<28:53, 11.11s/batch, loss=0.707]  

Total iteration 648, validation loss = 0.6284


Epoch 4:  22%|██▎       | 45/200 [16:41<1:37:00, 37.55s/batch, loss=0.645]




Epoch 4:  24%|██▍       | 49/200 [16:54<27:57, 11.11s/batch, loss=0.655]  

Total iteration 653, validation loss = 0.6131


Epoch 4:  25%|██▌       | 50/200 [18:32<1:34:01, 37.61s/batch, loss=0.671]




Epoch 4:  27%|██▋       | 54/200 [18:45<26:26, 10.87s/batch, loss=0.493]  

Total iteration 658, validation loss = 0.6034


Epoch 4:  28%|██▊       | 55/200 [20:26<1:33:49, 38.83s/batch, loss=0.747]




Epoch 4:  30%|██▉       | 59/200 [20:37<26:05, 11.10s/batch, loss=0.5]    

Total iteration 663, validation loss = 0.5991


Epoch 4:  30%|███       | 60/200 [22:20<1:31:05, 39.04s/batch, loss=0.675]




Epoch 4:  32%|███▏      | 64/200 [22:32<25:36, 11.30s/batch, loss=0.555]  

Total iteration 668, validation loss = 0.5899


Epoch 4:  32%|███▎      | 65/200 [24:13<1:27:09, 38.73s/batch, loss=0.651]




Epoch 4:  34%|███▍      | 69/200 [24:25<24:28, 11.21s/batch, loss=0.642]  

Total iteration 673, validation loss = 0.5876


Epoch 4:  35%|███▌      | 70/200 [26:03<1:22:33, 38.10s/batch, loss=0.639]




Epoch 4:  37%|███▋      | 74/200 [26:15<23:05, 11.00s/batch, loss=0.7]    

Total iteration 678, validation loss = 0.5888


Epoch 4:  38%|███▊      | 75/200 [27:56<1:20:01, 38.41s/batch, loss=0.582]




Epoch 4:  40%|███▉      | 79/200 [28:09<22:26, 11.13s/batch, loss=0.534]  

Total iteration 683, validation loss = 0.5905


Epoch 4:  40%|████      | 80/200 [29:49<1:17:35, 38.80s/batch, loss=0.756]




Epoch 4:  42%|████▏     | 84/200 [30:01<21:39, 11.20s/batch, loss=0.541]  

Total iteration 688, validation loss = 0.5908


Epoch 4:  42%|████▎     | 85/200 [31:42<1:14:17, 38.76s/batch, loss=0.606]




Epoch 4:  44%|████▍     | 89/200 [31:54<20:44, 11.21s/batch, loss=0.596]  

Total iteration 693, validation loss = 0.5916


Epoch 4:  45%|████▌     | 90/200 [33:33<1:09:45, 38.05s/batch, loss=0.629]




Epoch 4:  47%|████▋     | 94/200 [33:46<19:51, 11.24s/batch, loss=0.569]  

Total iteration 698, validation loss = 0.5942


Epoch 4:  48%|████▊     | 95/200 [35:26<1:07:13, 38.41s/batch, loss=0.564]




Epoch 4:  50%|████▉     | 99/200 [35:38<18:36, 11.05s/batch, loss=0.577]  

Total iteration 703, validation loss = 0.5931


Epoch 4:  50%|█████     | 100/200 [37:16<1:03:07, 37.88s/batch, loss=0.655]




Epoch 4:  52%|█████▏    | 104/200 [37:28<17:32, 10.97s/batch, loss=0.696]  

Total iteration 708, validation loss = 0.5893


Epoch 4:  52%|█████▎    | 105/200 [39:11<1:01:56, 39.12s/batch, loss=0.468]




Epoch 4:  55%|█████▍    | 109/200 [39:23<17:05, 11.27s/batch, loss=0.533]  

Total iteration 713, validation loss = 0.5893


Epoch 4:  55%|█████▌    | 110/200 [41:05<58:56, 39.29s/batch, loss=0.732]




Epoch 4:  57%|█████▋    | 114/200 [41:17<16:16, 11.36s/batch, loss=0.447]

Total iteration 718, validation loss = 0.5941


Epoch 4:  57%|█████▊    | 115/200 [43:02<56:30, 39.89s/batch, loss=0.571]




Epoch 4:  60%|█████▉    | 119/200 [43:14<15:24, 11.42s/batch, loss=0.618]

Total iteration 723, validation loss = 0.5928


Epoch 4:  60%|██████    | 120/200 [45:01<54:06, 40.58s/batch, loss=0.571]




Epoch 4:  62%|██████▏   | 124/200 [45:12<14:47, 11.67s/batch, loss=0.623]

Total iteration 728, validation loss = 0.5963


Epoch 4:  62%|██████▎   | 125/200 [46:58<50:25, 40.34s/batch, loss=0.552]




Epoch 4:  64%|██████▍   | 129/200 [47:10<13:44, 11.62s/batch, loss=0.583]

Total iteration 733, validation loss = 0.5909


Epoch 4:  65%|██████▌   | 130/200 [48:57<47:31, 40.73s/batch, loss=0.828]




Epoch 4:  67%|██████▋   | 134/200 [49:08<12:52, 11.70s/batch, loss=0.603]

Total iteration 738, validation loss = 0.5932


Epoch 4:  68%|██████▊   | 135/200 [50:57<44:45, 41.31s/batch, loss=0.629]




Epoch 4:  70%|██████▉   | 139/200 [51:10<12:06, 11.91s/batch, loss=0.582]

Total iteration 743, validation loss = 0.5930


Epoch 4:  70%|███████   | 140/200 [52:53<40:03, 40.05s/batch, loss=0.7]  




Epoch 4:  72%|███████▏  | 144/200 [53:06<10:49, 11.59s/batch, loss=0.567]

Total iteration 748, validation loss = 0.5877


Epoch 4:  72%|███████▎  | 145/200 [54:49<36:21, 39.66s/batch, loss=0.531]




Epoch 4:  74%|███████▍  | 149/200 [55:02<09:48, 11.54s/batch, loss=0.577]

Total iteration 753, validation loss = 0.5881


Epoch 4:  75%|███████▌  | 150/200 [56:46<33:15, 39.90s/batch, loss=0.662]




Epoch 4:  77%|███████▋  | 154/200 [57:00<09:05, 11.86s/batch, loss=0.583]

Total iteration 758, validation loss = 0.5868


Epoch 4:  78%|███████▊  | 155/200 [58:41<29:32, 39.40s/batch, loss=0.55] 




Epoch 4:  80%|███████▉  | 159/200 [58:54<07:52, 11.52s/batch, loss=0.555]

Total iteration 763, validation loss = 0.5892


Epoch 4:  80%|████████  | 160/200 [1:00:40<27:03, 40.58s/batch, loss=0.781]




Epoch 4:  82%|████████▏ | 164/200 [1:00:53<07:04, 11.78s/batch, loss=0.599]

Total iteration 768, validation loss = 0.5904


Epoch 4:  82%|████████▎ | 165/200 [1:02:39<23:38, 40.53s/batch, loss=0.667]




Epoch 4:  84%|████████▍ | 169/200 [1:02:51<06:01, 11.67s/batch, loss=0.564]

Total iteration 773, validation loss = 0.5900


Epoch 4:  85%|████████▌ | 170/200 [1:04:35<20:02, 40.10s/batch, loss=0.576]




Epoch 4:  87%|████████▋ | 174/200 [1:04:49<05:07, 11.83s/batch, loss=0.65] 

Total iteration 778, validation loss = 0.5893


Epoch 4:  88%|████████▊ | 175/200 [1:06:34<16:53, 40.53s/batch, loss=0.66]




Epoch 4:  90%|████████▉ | 179/200 [1:06:46<04:03, 11.60s/batch, loss=0.558]

Total iteration 783, validation loss = 0.5852


Epoch 4:  90%|█████████ | 180/200 [1:08:35<13:44, 41.21s/batch, loss=0.616]




Epoch 4:  92%|█████████▏| 184/200 [1:08:47<03:08, 11.79s/batch, loss=0.608]

Total iteration 788, validation loss = 0.5797


Epoch 4:  92%|█████████▎| 185/200 [1:10:30<09:56, 39.77s/batch, loss=0.665]




Epoch 4:  94%|█████████▍| 189/200 [1:10:43<02:06, 11.49s/batch, loss=0.619]

Total iteration 793, validation loss = 0.5801


Epoch 4:  95%|█████████▌| 190/200 [1:12:29<06:47, 40.73s/batch, loss=0.676]




Epoch 4:  97%|█████████▋| 194/200 [1:12:42<01:10, 11.71s/batch, loss=0.604]

Total iteration 798, validation loss = 0.5845


Epoch 4:  98%|█████████▊| 195/200 [1:14:28<03:23, 40.70s/batch, loss=0.81] 




Epoch 4: 100%|█████████▉| 199/200 [1:14:33<00:10, 10.53s/batch, loss=0.54] 

Total iteration 803, validation loss = 0.5887


Epoch 4: 100%|██████████| 200/200 [1:16:06<00:00, 22.83s/batch, loss=0.622]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 5:   2%|▏         | 4/200 [00:19<11:28,  3.51s/batch, loss=0.587]

Total iteration 809, validation loss = 0.5839


Epoch 5:   2%|▎         | 5/200 [02:01<2:09:10, 39.75s/batch, loss=0.516]




Epoch 5:   4%|▍         | 9/200 [02:12<32:23, 10.18s/batch, loss=0.699]  

Total iteration 814, validation loss = 0.5828


Epoch 5:   5%|▌         | 10/200 [03:55<2:04:31, 39.32s/batch, loss=0.572]




Epoch 5:   7%|▋         | 14/200 [04:07<34:36, 11.16s/batch, loss=0.594]  

Total iteration 819, validation loss = 0.5818


Epoch 5:   8%|▊         | 15/200 [05:46<1:58:29, 38.43s/batch, loss=0.579]




Epoch 5:  10%|▉         | 19/200 [05:58<33:12, 11.01s/batch, loss=0.471]  

Total iteration 824, validation loss = 0.5791


Epoch 5:  10%|█         | 20/200 [07:40<1:56:39, 38.89s/batch, loss=0.618]




Epoch 5:  12%|█▏        | 24/200 [07:52<33:01, 11.26s/batch, loss=0.587]  

Total iteration 829, validation loss = 0.5770


Epoch 5:  12%|█▎        | 25/200 [09:32<1:52:16, 38.49s/batch, loss=0.557]




Epoch 5:  14%|█▍        | 29/200 [09:43<31:16, 10.98s/batch, loss=0.615]  

Total iteration 834, validation loss = 0.5774


Epoch 5:  15%|█▌        | 30/200 [11:23<1:48:03, 38.14s/batch, loss=0.536]




Epoch 5:  17%|█▋        | 34/200 [11:35<30:22, 10.98s/batch, loss=0.798]  

Total iteration 839, validation loss = 0.5815


Epoch 5:  18%|█▊        | 35/200 [13:13<1:43:54, 37.78s/batch, loss=0.735]




Epoch 5:  20%|█▉        | 39/200 [13:26<29:21, 10.94s/batch, loss=0.659]  

Total iteration 844, validation loss = 0.5938


Epoch 5:  20%|██        | 40/200 [15:00<1:37:45, 36.66s/batch, loss=0.649]




Epoch 5:  22%|██▏       | 44/200 [15:12<27:48, 10.70s/batch, loss=0.591]  

Total iteration 849, validation loss = 0.6094


Epoch 5:  22%|██▎       | 45/200 [16:54<1:40:17, 38.82s/batch, loss=0.652]




Epoch 5:  24%|██▍       | 49/200 [17:05<28:05, 11.16s/batch, loss=0.58]   

Total iteration 854, validation loss = 0.6059


Epoch 5:  25%|██▌       | 50/200 [18:48<1:38:12, 39.28s/batch, loss=0.636]




Epoch 5:  27%|██▋       | 54/200 [19:00<27:28, 11.29s/batch, loss=0.615]  

Total iteration 859, validation loss = 0.5944


Epoch 5:  28%|██▊       | 55/200 [20:47<1:37:34, 40.37s/batch, loss=0.65]




Epoch 5:  30%|██▉       | 59/200 [20:59<27:12, 11.58s/batch, loss=0.751]  

Total iteration 864, validation loss = 0.5831


Epoch 5:  30%|███       | 60/200 [22:45<1:34:38, 40.56s/batch, loss=0.547]




Epoch 5:  32%|███▏      | 64/200 [22:57<26:34, 11.73s/batch, loss=0.605]  

Total iteration 869, validation loss = 0.5813


Epoch 5:  32%|███▎      | 65/200 [24:45<1:32:48, 41.25s/batch, loss=0.642]




Epoch 5:  34%|███▍      | 69/200 [24:57<25:37, 11.74s/batch, loss=0.725]  

Total iteration 874, validation loss = 0.5903


Epoch 5:  35%|███▌      | 70/200 [26:38<1:25:08, 39.29s/batch, loss=0.818]




Epoch 5:  37%|███▋      | 74/200 [26:51<23:51, 11.36s/batch, loss=0.671]  

Total iteration 879, validation loss = 0.6049


Epoch 5:  38%|███▊      | 75/200 [28:32<1:21:08, 38.95s/batch, loss=0.522]




Epoch 5:  40%|███▉      | 79/200 [28:44<22:31, 11.17s/batch, loss=0.565]  

Total iteration 884, validation loss = 0.5985


Epoch 5:  40%|████      | 80/200 [30:26<1:18:03, 39.03s/batch, loss=0.629]




Epoch 5:  42%|████▏     | 84/200 [30:37<21:46, 11.26s/batch, loss=0.595]  

Total iteration 889, validation loss = 0.5904


Epoch 5:  42%|████▎     | 85/200 [32:23<1:16:41, 40.02s/batch, loss=0.697]




Epoch 5:  44%|████▍     | 89/200 [32:35<21:15, 11.49s/batch, loss=0.581]  

Total iteration 894, validation loss = 0.5853


Epoch 5:  45%|████▌     | 90/200 [34:17<1:12:14, 39.41s/batch, loss=0.625]




Epoch 5:  47%|████▋     | 94/200 [34:29<20:03, 11.36s/batch, loss=0.612]  

Total iteration 899, validation loss = 0.5825


Epoch 5:  48%|████▊     | 95/200 [36:15<1:10:26, 40.25s/batch, loss=0.579]




Epoch 5:  50%|████▉     | 99/200 [36:27<19:27, 11.56s/batch, loss=0.6]    

Total iteration 904, validation loss = 0.5828


Epoch 5:  50%|█████     | 100/200 [38:10<1:06:07, 39.68s/batch, loss=0.665]




Epoch 5:  52%|█████▏    | 104/200 [38:23<18:20, 11.47s/batch, loss=0.595]  

Total iteration 909, validation loss = 0.5809


Epoch 5:  52%|█████▎    | 105/200 [39:57<58:22, 36.87s/batch, loss=0.571]




Epoch 5:  55%|█████▍    | 109/200 [40:08<16:15, 10.72s/batch, loss=0.755]

Total iteration 914, validation loss = 0.5841


Epoch 5:  55%|█████▌    | 110/200 [41:51<58:18, 38.88s/batch, loss=0.639]




Epoch 5:  57%|█████▋    | 114/200 [42:02<15:52, 11.07s/batch, loss=0.607]

Total iteration 919, validation loss = 0.5999


Epoch 5:  57%|█████▊    | 115/200 [43:42<54:29, 38.47s/batch, loss=0.65] 




Epoch 5:  60%|█████▉    | 119/200 [43:55<15:07, 11.20s/batch, loss=0.544]

Total iteration 924, validation loss = 0.6007


Epoch 5:  60%|██████    | 120/200 [45:34<50:54, 38.18s/batch, loss=0.714]




Epoch 5:  62%|██████▏   | 124/200 [45:46<14:06, 11.14s/batch, loss=0.494]

Total iteration 929, validation loss = 0.5924


Epoch 5:  62%|██████▎   | 125/200 [47:24<46:58, 37.58s/batch, loss=0.601]




Epoch 5:  64%|██████▍   | 129/200 [47:36<12:51, 10.87s/batch, loss=0.659]

Total iteration 934, validation loss = 0.5790


Epoch 5:  65%|██████▌   | 130/200 [49:16<44:52, 38.46s/batch, loss=0.695]




Epoch 5:  67%|██████▋   | 134/200 [49:29<12:21, 11.24s/batch, loss=0.519]

Total iteration 939, validation loss = 0.5787


Epoch 5:  68%|██████▊   | 135/200 [51:06<40:39, 37.53s/batch, loss=0.574]




Epoch 5:  70%|██████▉   | 139/200 [51:18<11:03, 10.88s/batch, loss=0.574]

Total iteration 944, validation loss = 0.5766


Epoch 5:  70%|███████   | 140/200 [52:58<38:18, 38.31s/batch, loss=0.507]




Epoch 5:  72%|███████▏  | 144/200 [53:11<10:29, 11.24s/batch, loss=0.627]

Total iteration 949, validation loss = 0.5760


Epoch 5:  72%|███████▎  | 145/200 [54:48<34:29, 37.62s/batch, loss=0.558]




Epoch 5:  74%|███████▍  | 149/200 [55:01<09:15, 10.88s/batch, loss=0.543]

Total iteration 954, validation loss = 0.5721


Epoch 5:  75%|███████▌  | 150/200 [56:44<32:52, 39.44s/batch, loss=0.545]




Epoch 5:  77%|███████▋  | 154/200 [56:56<08:39, 11.28s/batch, loss=0.493]

Total iteration 959, validation loss = 0.5731


Epoch 5:  78%|███████▊  | 155/200 [58:34<28:28, 37.96s/batch, loss=0.757]




Epoch 5:  80%|███████▉  | 159/200 [58:46<07:33, 11.06s/batch, loss=0.501]

Total iteration 964, validation loss = 0.5747


Epoch 5:  80%|████████  | 160/200 [1:00:28<25:50, 38.77s/batch, loss=0.63]




Epoch 5:  82%|████████▏ | 164/200 [1:00:40<06:46, 11.30s/batch, loss=0.619]

Total iteration 969, validation loss = 0.5793


Epoch 5:  82%|████████▎ | 165/200 [1:02:21<22:33, 38.68s/batch, loss=0.602]




Epoch 5:  84%|████████▍ | 169/200 [1:02:34<05:52, 11.37s/batch, loss=0.641]

Total iteration 974, validation loss = 0.5871


Epoch 5:  85%|████████▌ | 170/200 [1:04:16<19:38, 39.28s/batch, loss=0.619]




Epoch 5:  87%|████████▋ | 174/200 [1:04:29<04:57, 11.46s/batch, loss=0.665]

Total iteration 979, validation loss = 0.6058


Epoch 5:  88%|████████▊ | 175/200 [1:06:08<15:55, 38.21s/batch, loss=0.645]




Epoch 5:  90%|████████▉ | 179/200 [1:06:21<03:57, 11.31s/batch, loss=0.709]

Total iteration 984, validation loss = 0.6108


Epoch 5:  90%|█████████ | 180/200 [1:08:03<13:06, 39.33s/batch, loss=0.674]




Epoch 5:  92%|█████████▏| 184/200 [1:08:16<03:03, 11.47s/batch, loss=0.713]

Total iteration 989, validation loss = 0.6065


Epoch 5:  92%|█████████▎| 185/200 [1:10:03<10:12, 40.86s/batch, loss=0.594]




Epoch 5:  94%|█████████▍| 189/200 [1:10:16<02:09, 11.75s/batch, loss=0.562]

Total iteration 994, validation loss = 0.5894


Epoch 5:  95%|█████████▌| 190/200 [1:12:03<06:50, 41.07s/batch, loss=0.564]




Epoch 5:  97%|█████████▋| 194/200 [1:12:16<01:12, 12.04s/batch, loss=0.641]

Total iteration 999, validation loss = 0.5807


Epoch 5:  98%|█████████▊| 195/200 [1:13:59<03:19, 39.99s/batch, loss=0.603]




Epoch 5: 100%|█████████▉| 199/200 [1:14:05<00:10, 10.41s/batch, loss=0.574]

Total iteration 1004, validation loss = 0.5795


Epoch 5: 100%|██████████| 200/200 [1:15:46<00:00, 22.73s/batch, loss=0.596]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 6:   2%|▏         | 4/200 [00:20<12:36,  3.86s/batch, loss=0.723]

Total iteration 1010, validation loss = 0.5754


Epoch 6:   2%|▎         | 5/200 [01:59<2:06:05, 38.80s/batch, loss=0.636]




Epoch 6:   4%|▍         | 9/200 [02:12<32:39, 10.26s/batch, loss=0.67]   

Total iteration 1015, validation loss = 0.5748


Epoch 6:   5%|▌         | 10/200 [03:54<2:04:42, 39.38s/batch, loss=0.642]




Epoch 6:   7%|▋         | 14/200 [04:06<34:23, 11.10s/batch, loss=0.57]   

Total iteration 1020, validation loss = 0.5753


Epoch 6:   8%|▊         | 15/200 [05:48<2:00:11, 38.98s/batch, loss=0.471]




Epoch 6:  10%|▉         | 19/200 [06:00<33:49, 11.21s/batch, loss=0.585]  

Total iteration 1025, validation loss = 0.5773


Epoch 6:  10%|█         | 20/200 [07:46<2:00:53, 40.30s/batch, loss=0.56]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 6:  12%|█▏        | 24/200 [07:59<34:53, 11.89s/batch, loss=0.571]  

Total iteration 1030, validation loss = 0.5855


Epoch 6:  12%|█▎        | 25/200 [09:45<1:58:12, 40.53s/batch, loss=0.68]




Epoch 6:  14%|█▍        | 29/200 [09:56<33:04, 11.61s/batch, loss=0.667]  

Total iteration 1035, validation loss = 0.5930


Epoch 6:  15%|█▌        | 30/200 [11:42<1:54:51, 40.54s/batch, loss=0.643]




Epoch 6:  17%|█▋        | 34/200 [11:56<32:54, 11.90s/batch, loss=0.596]  

Total iteration 1040, validation loss = 0.5955


Epoch 6:  18%|█▊        | 35/200 [13:41<1:51:01, 40.37s/batch, loss=0.591]




Epoch 6:  20%|█▉        | 39/200 [13:53<30:54, 11.52s/batch, loss=0.766]  

Total iteration 1045, validation loss = 0.6008


Epoch 6:  20%|██        | 40/200 [15:31<1:42:05, 38.28s/batch, loss=0.623]




Epoch 6:  22%|██▏       | 44/200 [15:43<28:46, 11.07s/batch, loss=0.546]  

Total iteration 1050, validation loss = 0.5938


Epoch 6:  22%|██▎       | 45/200 [17:26<1:41:18, 39.22s/batch, loss=0.612]




Epoch 6:  24%|██▍       | 49/200 [17:39<28:42, 11.41s/batch, loss=0.67]   

Total iteration 1055, validation loss = 0.5894


Epoch 6:  25%|██▌       | 50/200 [19:27<1:42:34, 41.03s/batch, loss=0.546]




Epoch 6:  27%|██▋       | 54/200 [19:38<28:26, 11.69s/batch, loss=0.518]  

Total iteration 1060, validation loss = 0.5847


Epoch 6:  28%|██▊       | 55/200 [21:27<1:39:47, 41.30s/batch, loss=0.494]




Epoch 6:  30%|██▉       | 59/200 [21:39<27:39, 11.77s/batch, loss=0.608]  

Total iteration 1065, validation loss = 0.5792


Epoch 6:  30%|███       | 60/200 [23:26<1:35:35, 40.97s/batch, loss=0.614]




Epoch 6:  32%|███▏      | 64/200 [23:38<26:40, 11.77s/batch, loss=0.665]  

Total iteration 1070, validation loss = 0.5806


Epoch 6:  32%|███▎      | 65/200 [25:19<1:27:38, 38.95s/batch, loss=0.583]




Epoch 6:  34%|███▍      | 69/200 [25:30<24:20, 11.15s/batch, loss=0.606]  

Total iteration 1075, validation loss = 0.5790


Epoch 6:  35%|███▌      | 70/200 [27:15<1:26:19, 39.85s/batch, loss=0.69]




Epoch 6:  37%|███▋      | 74/200 [27:27<24:04, 11.46s/batch, loss=0.647]  

Total iteration 1080, validation loss = 0.5830


Epoch 6:  38%|███▊      | 75/200 [29:10<1:22:12, 39.46s/batch, loss=0.647]




Epoch 6:  40%|███▉      | 79/200 [29:22<23:04, 11.45s/batch, loss=0.547]  

Total iteration 1085, validation loss = 0.5923


Epoch 6:  40%|████      | 80/200 [31:05<1:19:08, 39.57s/batch, loss=0.597]




Epoch 6:  42%|████▏     | 84/200 [31:19<22:18, 11.54s/batch, loss=0.662]  

Total iteration 1090, validation loss = 0.5949


Epoch 6:  42%|████▎     | 85/200 [33:02<1:16:26, 39.88s/batch, loss=0.564]




Epoch 6:  44%|████▍     | 89/200 [33:15<21:42, 11.73s/batch, loss=0.621]  

Total iteration 1095, validation loss = 0.5909


Epoch 6:  45%|████▌     | 90/200 [34:55<1:10:48, 38.62s/batch, loss=0.62]




Epoch 6:  47%|████▋     | 94/200 [35:07<19:40, 11.14s/batch, loss=0.596] 

Total iteration 1100, validation loss = 0.5879


Epoch 6:  48%|████▊     | 95/200 [36:45<1:06:01, 37.73s/batch, loss=0.711]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 1105, validation loss = 0.5863


Epoch 6:  50%|█████     | 100/200 [38:37<1:03:38, 38.18s/batch, loss=0.547]




Epoch 6:  52%|█████▏    | 104/200 [38:49<17:45, 11.10s/batch, loss=0.668]  

Total iteration 1110, validation loss = 0.5861


Epoch 6:  52%|█████▎    | 105/200 [40:31<1:01:46, 39.02s/batch, loss=0.595]




Epoch 6:  55%|█████▍    | 109/200 [40:43<17:03, 11.25s/batch, loss=0.728]  

Total iteration 1115, validation loss = 0.5901


Epoch 6:  55%|█████▌    | 110/200 [42:23<57:27, 38.30s/batch, loss=0.638]




Epoch 6:  57%|█████▋    | 114/200 [42:35<15:57, 11.13s/batch, loss=0.614]

Total iteration 1120, validation loss = 0.5964


Epoch 6:  57%|█████▊    | 115/200 [44:18<55:45, 39.36s/batch, loss=0.678]




Epoch 6:  60%|█████▉    | 119/200 [44:30<15:19, 11.35s/batch, loss=0.65] 

Total iteration 1125, validation loss = 0.5979


Epoch 6:  60%|██████    | 120/200 [46:07<50:00, 37.50s/batch, loss=0.555]




Epoch 6:  62%|██████▏   | 124/200 [46:20<14:11, 11.21s/batch, loss=0.585]

Total iteration 1130, validation loss = 0.5903


Epoch 6:  62%|██████▎   | 125/200 [47:59<47:42, 38.17s/batch, loss=0.621]




Epoch 6:  64%|██████▍   | 129/200 [48:11<13:10, 11.13s/batch, loss=0.581]

Total iteration 1135, validation loss = 0.5852


Epoch 6:  65%|██████▌   | 130/200 [49:51<44:33, 38.19s/batch, loss=0.655]




Epoch 6:  67%|██████▋   | 134/200 [50:03<12:04, 10.97s/batch, loss=0.569]

Total iteration 1140, validation loss = 0.5822


Epoch 6:  68%|██████▊   | 135/200 [51:44<42:01, 38.79s/batch, loss=0.643]




Epoch 6:  70%|██████▉   | 139/200 [51:57<11:26, 11.26s/batch, loss=0.57] 

Total iteration 1145, validation loss = 0.5799


Epoch 6:  70%|███████   | 140/200 [53:39<39:07, 39.13s/batch, loss=0.548]




Epoch 6:  72%|███████▏  | 144/200 [53:51<10:31, 11.28s/batch, loss=0.685]

Total iteration 1150, validation loss = 0.5816


Epoch 6:  72%|███████▎  | 145/200 [55:36<36:41, 40.03s/batch, loss=0.724]




Epoch 6:  74%|███████▍  | 149/200 [55:47<09:43, 11.44s/batch, loss=0.577]

Total iteration 1155, validation loss = 0.5912


Epoch 6:  75%|███████▌  | 150/200 [57:28<32:17, 38.76s/batch, loss=0.668]




Epoch 6:  77%|███████▋  | 154/200 [57:40<08:37, 11.25s/batch, loss=0.696]

Total iteration 1160, validation loss = 0.6066


Epoch 6:  78%|███████▊  | 155/200 [59:26<30:11, 40.26s/batch, loss=0.585]




Epoch 6:  80%|███████▉  | 159/200 [59:38<07:50, 11.47s/batch, loss=0.677]

Total iteration 1165, validation loss = 0.6147


Epoch 6:  80%|████████  | 160/200 [1:01:22<26:37, 39.93s/batch, loss=0.648]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 1170, validation loss = 0.6003


Epoch 6:  82%|████████▎ | 165/200 [1:03:22<23:53, 40.96s/batch, loss=0.584]




Epoch 6:  84%|████████▍ | 169/200 [1:03:35<06:09, 11.93s/batch, loss=0.553]

Total iteration 1175, validation loss = 0.5838


Epoch 6:  85%|████████▌ | 170/200 [1:05:21<20:20, 40.69s/batch, loss=0.582]




Epoch 6:  87%|████████▋ | 174/200 [1:05:34<05:10, 11.94s/batch, loss=0.528]

Total iteration 1180, validation loss = 0.5718


Epoch 6:  88%|████████▊ | 175/200 [1:07:18<16:49, 40.36s/batch, loss=0.515]




Epoch 6:  90%|████████▉ | 179/200 [1:07:31<04:00, 11.47s/batch, loss=0.565]

Total iteration 1185, validation loss = 0.5700


Epoch 6:  90%|█████████ | 180/200 [1:09:16<13:28, 40.44s/batch, loss=1.02] 




Epoch 6:  92%|█████████▏| 184/200 [1:09:28<03:06, 11.67s/batch, loss=0.526]

Total iteration 1190, validation loss = 0.5763


Epoch 6:  92%|█████████▎| 185/200 [1:11:10<09:47, 39.17s/batch, loss=0.524]




Epoch 6:  94%|█████████▍| 189/200 [1:11:22<02:04, 11.31s/batch, loss=0.633]

Total iteration 1195, validation loss = 0.5795


Epoch 6:  95%|█████████▌| 190/200 [1:13:04<06:32, 39.23s/batch, loss=0.697]




Epoch 6:  97%|█████████▋| 194/200 [1:13:16<01:08, 11.42s/batch, loss=0.589]

Total iteration 1200, validation loss = 0.5836


Epoch 6:  98%|█████████▊| 195/200 [1:14:59<03:16, 39.39s/batch, loss=0.617]




Epoch 6: 100%|█████████▉| 199/200 [1:15:04<00:10, 10.21s/batch, loss=0.636]

Total iteration 1205, validation loss = 0.5881


Epoch 6: 100%|██████████| 200/200 [1:16:42<00:00, 23.01s/batch, loss=0.573]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 7:   2%|▏         | 4/200 [00:19<11:44,  3.59s/batch, loss=0.588]

Total iteration 1211, validation loss = 0.5881


Epoch 7:   2%|▎         | 5/200 [02:01<2:09:01, 39.70s/batch, loss=0.577]




Epoch 7:   4%|▍         | 9/200 [02:13<32:35, 10.24s/batch, loss=0.571]  

Total iteration 1216, validation loss = 0.5936


Epoch 7:   5%|▌         | 10/200 [03:53<2:02:32, 38.70s/batch, loss=0.534]




Epoch 7:   7%|▋         | 14/200 [04:06<34:19, 11.07s/batch, loss=0.638]  

Total iteration 1221, validation loss = 0.5855


Epoch 7:   8%|▊         | 15/200 [05:49<2:02:11, 39.63s/batch, loss=0.584]




Epoch 7:  10%|▉         | 19/200 [06:01<34:11, 11.33s/batch, loss=0.499]  

Total iteration 1226, validation loss = 0.5820


Epoch 7:  10%|█         | 20/200 [07:46<1:59:48, 39.94s/batch, loss=0.715]




Epoch 7:  12%|█▏        | 24/200 [07:59<34:08, 11.64s/batch, loss=0.593]  

Total iteration 1231, validation loss = 0.5867


Epoch 7:  12%|█▎        | 25/200 [09:39<1:53:11, 38.81s/batch, loss=0.767]




Epoch 7:  14%|█▍        | 29/200 [09:51<31:57, 11.21s/batch, loss=0.637]  

Total iteration 1236, validation loss = 0.5877


Epoch 7:  15%|█▌        | 30/200 [11:34<1:51:30, 39.36s/batch, loss=0.523]




Epoch 7:  17%|█▋        | 34/200 [11:47<31:48, 11.50s/batch, loss=0.581]  

Total iteration 1241, validation loss = 0.5813


Epoch 7:  18%|█▊        | 35/200 [13:23<1:43:15, 37.55s/batch, loss=0.599]




Epoch 7:  20%|█▉        | 39/200 [13:35<29:23, 10.95s/batch, loss=0.541]  

Total iteration 1246, validation loss = 0.5746


Epoch 7:  20%|██        | 40/200 [15:16<1:42:33, 38.46s/batch, loss=0.702]




Epoch 7:  22%|██▏       | 44/200 [15:28<29:00, 11.15s/batch, loss=0.647]  

Total iteration 1251, validation loss = 0.5744


Epoch 7:  22%|██▎       | 45/200 [17:12<1:42:08, 39.54s/batch, loss=0.731]




Epoch 7:  24%|██▍       | 49/200 [17:25<29:20, 11.66s/batch, loss=0.67]   

Total iteration 1256, validation loss = 0.5798


Epoch 7:  25%|██▌       | 50/200 [19:06<1:37:08, 38.86s/batch, loss=0.605]




Epoch 7:  27%|██▋       | 54/200 [19:17<27:16, 11.21s/batch, loss=0.58]   

Total iteration 1261, validation loss = 0.5877


Epoch 7:  28%|██▊       | 55/200 [21:02<1:35:57, 39.71s/batch, loss=0.578]




Epoch 7:  30%|██▉       | 59/200 [21:13<26:40, 11.35s/batch, loss=0.653]  

Total iteration 1266, validation loss = 0.5815


Epoch 7:  30%|███       | 60/200 [22:57<1:32:25, 39.61s/batch, loss=0.539]




Epoch 7:  32%|███▏      | 64/200 [23:09<25:42, 11.34s/batch, loss=0.619]  

Total iteration 1271, validation loss = 0.5725


Epoch 7:  32%|███▎      | 65/200 [24:46<1:25:15, 37.89s/batch, loss=0.718]




Epoch 7:  34%|███▍      | 69/200 [24:58<23:55, 10.96s/batch, loss=0.716]  

Total iteration 1276, validation loss = 0.5714


Epoch 7:  35%|███▌      | 70/200 [26:38<1:23:05, 38.35s/batch, loss=0.713]




Epoch 7:  37%|███▋      | 74/200 [26:51<23:18, 11.10s/batch, loss=0.66]   

Total iteration 1281, validation loss = 0.5832


Epoch 7:  38%|███▊      | 75/200 [28:26<1:16:46, 36.85s/batch, loss=0.525]




Epoch 7:  40%|███▉      | 79/200 [28:37<21:30, 10.67s/batch, loss=0.55]   

Total iteration 1286, validation loss = 0.5832


Epoch 7:  40%|████      | 80/200 [30:17<1:15:46, 37.89s/batch, loss=0.658]




Epoch 7:  42%|████▏     | 84/200 [30:29<21:20, 11.04s/batch, loss=0.567]  

Total iteration 1291, validation loss = 0.5839


Epoch 7:  42%|████▎     | 85/200 [32:12<1:15:31, 39.40s/batch, loss=0.607]




Epoch 7:  44%|████▍     | 89/200 [32:25<21:09, 11.44s/batch, loss=0.598]  

Total iteration 1296, validation loss = 0.5880


Epoch 7:  45%|████▌     | 90/200 [34:06<1:11:28, 38.99s/batch, loss=0.664]




Epoch 7:  47%|████▋     | 94/200 [34:19<20:21, 11.52s/batch, loss=0.751]  

Total iteration 1301, validation loss = 0.5861


Epoch 7:  48%|████▊     | 95/200 [36:01<1:08:28, 39.13s/batch, loss=0.597]




Epoch 7:  50%|████▉     | 99/200 [36:13<19:01, 11.31s/batch, loss=0.597]  

Total iteration 1306, validation loss = 0.5915


Epoch 7:  50%|█████     | 100/200 [37:58<1:06:39, 39.99s/batch, loss=0.66]




Epoch 7:  52%|█████▏    | 104/200 [38:10<18:30, 11.56s/batch, loss=0.71]   

Total iteration 1311, validation loss = 0.5947


Epoch 7:  52%|█████▎    | 105/200 [39:47<1:00:03, 37.93s/batch, loss=0.646]




Epoch 7:  55%|█████▍    | 109/200 [39:59<16:36, 10.95s/batch, loss=0.532]  

Total iteration 1316, validation loss = 0.5921


Epoch 7:  55%|█████▌    | 110/200 [41:35<55:46, 37.18s/batch, loss=0.695]




Epoch 7:  57%|█████▋    | 114/200 [41:47<15:34, 10.87s/batch, loss=0.574]

Total iteration 1321, validation loss = 0.5751


Epoch 7:  57%|█████▊    | 115/200 [43:26<53:22, 37.68s/batch, loss=0.591]




Epoch 7:  60%|█████▉    | 119/200 [43:39<14:57, 11.08s/batch, loss=0.713]

Total iteration 1326, validation loss = 0.5691


Epoch 7:  60%|██████    | 120/200 [45:23<52:47, 39.60s/batch, loss=0.52] 




Epoch 7:  62%|██████▏   | 124/200 [45:35<14:44, 11.63s/batch, loss=0.529]

Total iteration 1331, validation loss = 0.5675


Epoch 7:  62%|██████▎   | 125/200 [47:21<50:41, 40.55s/batch, loss=0.58] 




Epoch 7:  64%|██████▍   | 129/200 [47:33<13:45, 11.62s/batch, loss=0.727]

Total iteration 1336, validation loss = 0.5685


Epoch 7:  65%|██████▌   | 130/200 [49:20<47:22, 40.61s/batch, loss=0.532]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 7:  67%|██████▋   | 134/200 [49:33<13:04, 11.88s/batch, loss=0.5]  

Total iteration 1341, validation loss = 0.5698


Epoch 7:  68%|██████▊   | 135/200 [51:17<43:31, 40.18s/batch, loss=0.626]




Epoch 7:  70%|██████▉   | 139/200 [51:31<12:02, 11.84s/batch, loss=0.484]

Total iteration 1346, validation loss = 0.5702


Epoch 7:  70%|███████   | 140/200 [53:12<39:23, 39.39s/batch, loss=0.637]




Epoch 7:  72%|███████▏  | 144/200 [53:24<10:36, 11.37s/batch, loss=0.644]

Total iteration 1351, validation loss = 0.5793


Epoch 7:  72%|███████▎  | 145/200 [55:04<35:26, 38.66s/batch, loss=0.568]




Epoch 7:  74%|███████▍  | 149/200 [55:16<09:25, 11.09s/batch, loss=0.6]  

Total iteration 1356, validation loss = 0.5828


Epoch 7:  75%|███████▌  | 150/200 [57:01<33:10, 39.82s/batch, loss=0.598]




Epoch 7:  77%|███████▋  | 154/200 [57:13<08:46, 11.44s/batch, loss=0.686]

Total iteration 1361, validation loss = 0.5795


Epoch 7:  78%|███████▊  | 155/200 [58:56<29:38, 39.53s/batch, loss=0.636]




Epoch 7:  80%|███████▉  | 159/200 [59:09<07:59, 11.70s/batch, loss=0.569]

Total iteration 1366, validation loss = 0.5922


Epoch 7:  80%|████████  | 160/200 [1:00:55<26:56, 40.40s/batch, loss=0.569]




Epoch 7:  82%|████████▏ | 164/200 [1:01:07<07:00, 11.69s/batch, loss=0.573]

Total iteration 1371, validation loss = 0.5885


Epoch 7:  82%|████████▎ | 165/200 [1:02:54<23:55, 41.01s/batch, loss=0.582]




Epoch 7:  84%|████████▍ | 169/200 [1:03:07<06:03, 11.72s/batch, loss=0.726]

Total iteration 1376, validation loss = 0.5771


Epoch 7:  85%|████████▌ | 170/200 [1:04:58<21:07, 42.25s/batch, loss=0.611]




Epoch 7:  87%|████████▋ | 174/200 [1:05:10<05:14, 12.09s/batch, loss=0.679]

Total iteration 1381, validation loss = 0.5784


Epoch 7:  88%|████████▊ | 175/200 [1:06:56<16:57, 40.71s/batch, loss=0.701]




Epoch 7:  90%|████████▉ | 179/200 [1:07:09<04:10, 11.94s/batch, loss=0.543]

Total iteration 1386, validation loss = 0.5794


Epoch 7:  90%|█████████ | 180/200 [1:08:51<13:14, 39.70s/batch, loss=0.637]




Epoch 7:  92%|█████████▏| 184/200 [1:09:03<03:02, 11.39s/batch, loss=0.655]

Total iteration 1391, validation loss = 0.5792


Epoch 7:  92%|█████████▎| 185/200 [1:10:50<10:10, 40.71s/batch, loss=0.69] 




Epoch 7:  94%|█████████▍| 189/200 [1:11:03<02:09, 11.74s/batch, loss=0.565]

Total iteration 1396, validation loss = 0.5786


Epoch 7:  95%|█████████▌| 190/200 [1:12:44<06:32, 39.28s/batch, loss=0.5]  




Epoch 7:  97%|█████████▋| 194/200 [1:12:56<01:07, 11.30s/batch, loss=0.704]

Total iteration 1401, validation loss = 0.5856


Epoch 7:  98%|█████████▊| 195/200 [1:14:43<03:23, 40.77s/batch, loss=0.765]




Epoch 7: 100%|█████████▉| 199/200 [1:14:49<00:10, 10.57s/batch, loss=0.672]

Total iteration 1406, validation loss = 0.5933


Epoch 7: 100%|██████████| 200/200 [1:16:40<00:00, 23.00s/batch, loss=0.655]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 8:   2%|▏         | 4/200 [00:21<12:31,  3.83s/batch, loss=0.593]

Total iteration 1412, validation loss = 0.5892


Epoch 8:   2%|▎         | 5/200 [02:09<2:17:25, 42.29s/batch, loss=0.568]




Epoch 8:   4%|▍         | 9/200 [02:22<34:34, 10.86s/batch, loss=0.58]   

Total iteration 1417, validation loss = 0.5843


Epoch 8:   5%|▌         | 10/200 [04:12<2:13:06, 42.03s/batch, loss=0.697]




Epoch 8:   7%|▋         | 14/200 [04:24<36:41, 11.84s/batch, loss=0.541]  

Total iteration 1422, validation loss = 0.5750


Epoch 8:   8%|▊         | 15/200 [06:15<2:10:37, 42.36s/batch, loss=0.67]




Epoch 8:  10%|▉         | 19/200 [06:28<36:45, 12.18s/batch, loss=0.499]  

Total iteration 1427, validation loss = 0.5741


Epoch 8:  10%|█         | 20/200 [08:14<2:03:15, 41.09s/batch, loss=0.577]




Epoch 8:  12%|█▏        | 24/200 [08:27<34:34, 11.79s/batch, loss=0.553]  

Total iteration 1432, validation loss = 0.5751


Epoch 8:  12%|█▎        | 25/200 [10:11<1:56:47, 40.04s/batch, loss=0.638]




Epoch 8:  14%|█▍        | 29/200 [10:23<32:49, 11.52s/batch, loss=0.52]   

Total iteration 1437, validation loss = 0.5777


Epoch 8:  15%|█▌        | 30/200 [12:05<1:51:15, 39.27s/batch, loss=0.684]




Epoch 8:  17%|█▋        | 34/200 [12:18<31:59, 11.56s/batch, loss=0.67]   

Total iteration 1442, validation loss = 0.5848


Epoch 8:  18%|█▊        | 35/200 [14:00<1:48:29, 39.45s/batch, loss=0.598]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 8:  20%|█▉        | 39/200 [14:12<30:24, 11.33s/batch, loss=0.6]    

Total iteration 1447, validation loss = 0.5958


Epoch 8:  20%|██        | 40/200 [15:52<1:42:03, 38.27s/batch, loss=0.486]




Epoch 8:  22%|██▏       | 44/200 [16:04<29:12, 11.23s/batch, loss=0.492]  

Total iteration 1452, validation loss = 0.5937


Epoch 8:  22%|██▎       | 45/200 [17:52<1:45:32, 40.85s/batch, loss=0.496]




Epoch 8:  24%|██▍       | 49/200 [18:05<30:08, 11.98s/batch, loss=0.69]   

Total iteration 1457, validation loss = 0.5791


Epoch 8:  25%|██▌       | 50/200 [19:48<1:39:27, 39.79s/batch, loss=0.639]




Epoch 8:  27%|██▋       | 54/200 [20:00<27:54, 11.47s/batch, loss=0.653]  

Total iteration 1462, validation loss = 0.5768


Epoch 8:  28%|██▊       | 55/200 [21:42<1:34:38, 39.16s/batch, loss=0.619]




Epoch 8:  30%|██▉       | 59/200 [21:55<27:17, 11.61s/batch, loss=0.653]  

Total iteration 1467, validation loss = 0.5850


Epoch 8:  30%|███       | 60/200 [23:38<1:32:19, 39.57s/batch, loss=0.694]




Epoch 8:  32%|███▏      | 64/200 [23:51<26:21, 11.63s/batch, loss=0.586]  

Total iteration 1472, validation loss = 0.5917


Epoch 8:  32%|███▎      | 65/200 [25:34<1:29:05, 39.60s/batch, loss=0.625]




Epoch 8:  34%|███▍      | 69/200 [25:46<25:16, 11.58s/batch, loss=0.621]  

Total iteration 1477, validation loss = 0.6140


Epoch 8:  35%|███▌      | 70/200 [27:32<1:27:33, 40.41s/batch, loss=0.594]




Epoch 8:  37%|███▋      | 74/200 [27:44<24:23, 11.62s/batch, loss=0.661]  

Total iteration 1482, validation loss = 0.5958


Epoch 8:  38%|███▊      | 75/200 [29:25<1:20:46, 38.77s/batch, loss=0.647]




Epoch 8:  40%|███▉      | 79/200 [29:37<22:30, 11.16s/batch, loss=0.672]  

Total iteration 1487, validation loss = 0.5849


Epoch 8:  40%|████      | 80/200 [31:19<1:17:56, 38.97s/batch, loss=0.597]




Epoch 8:  42%|████▏     | 84/200 [31:31<21:48, 11.28s/batch, loss=0.647]  

Total iteration 1492, validation loss = 0.5837


Epoch 8:  42%|████▎     | 85/200 [33:16<1:17:02, 40.20s/batch, loss=0.659]




Epoch 8:  44%|████▍     | 89/200 [33:28<21:23, 11.56s/batch, loss=0.516]  

Total iteration 1497, validation loss = 0.5840


Epoch 8:  45%|████▌     | 90/200 [35:11<1:12:05, 39.32s/batch, loss=0.559]




Epoch 8:  47%|████▋     | 94/200 [35:23<20:02, 11.34s/batch, loss=0.65]   

Total iteration 1502, validation loss = 0.5849


Epoch 8:  48%|████▊     | 95/200 [37:08<1:10:05, 40.06s/batch, loss=0.573]




Epoch 8:  50%|████▉     | 99/200 [37:21<19:57, 11.85s/batch, loss=0.662]  

Total iteration 1507, validation loss = 0.5843


Epoch 8:  50%|█████     | 100/200 [39:08<1:08:01, 40.82s/batch, loss=0.471]




Epoch 8:  52%|█████▏    | 104/200 [39:21<19:03, 11.92s/batch, loss=0.579]  

Total iteration 1512, validation loss = 0.5726


Epoch 8:  52%|█████▎    | 105/200 [41:06<1:03:55, 40.37s/batch, loss=0.423]




Epoch 8:  55%|█████▍    | 109/200 [41:18<17:42, 11.68s/batch, loss=0.549]  

Total iteration 1517, validation loss = 0.5697


Epoch 8:  55%|█████▌    | 110/200 [43:07<1:02:16, 41.52s/batch, loss=0.579]




Epoch 8:  57%|█████▋    | 114/200 [43:19<17:05, 11.92s/batch, loss=0.688]  

Total iteration 1522, validation loss = 0.5683


Epoch 8:  57%|█████▊    | 115/200 [45:02<56:20, 39.78s/batch, loss=0.588]




Epoch 8:  60%|█████▉    | 119/200 [45:15<15:30, 11.49s/batch, loss=0.496]

Total iteration 1527, validation loss = 0.5672


Epoch 8:  60%|██████    | 120/200 [47:03<54:48, 41.11s/batch, loss=0.676]




Epoch 8:  62%|██████▏   | 124/200 [47:15<15:01, 11.87s/batch, loss=0.593]

Total iteration 1532, validation loss = 0.5767


Epoch 8:  62%|██████▎   | 125/200 [48:59<49:48, 39.85s/batch, loss=0.552]




Epoch 8:  64%|██████▍   | 129/200 [49:11<13:36, 11.51s/batch, loss=0.609]

Total iteration 1537, validation loss = 0.5887


Epoch 8:  65%|██████▌   | 130/200 [51:03<49:30, 42.43s/batch, loss=0.546]




Epoch 8:  67%|██████▋   | 134/200 [51:16<13:23, 12.18s/batch, loss=0.66] 

Total iteration 1542, validation loss = 0.5895


Epoch 8:  68%|██████▊   | 135/200 [53:07<45:54, 42.37s/batch, loss=0.674]




Epoch 8:  70%|██████▉   | 139/200 [53:20<12:21, 12.16s/batch, loss=0.669]

Total iteration 1547, validation loss = 0.5924


Epoch 8:  70%|███████   | 140/200 [55:09<42:03, 42.07s/batch, loss=0.622]




Epoch 8:  72%|███████▏  | 144/200 [55:23<11:23, 12.21s/batch, loss=0.613]

Total iteration 1552, validation loss = 0.5841


Epoch 8:  72%|███████▎  | 145/200 [57:07<37:06, 40.48s/batch, loss=0.587]




Epoch 8:  74%|███████▍  | 149/200 [57:19<09:58, 11.74s/batch, loss=0.81] 

Total iteration 1557, validation loss = 0.5870


Epoch 8:  75%|███████▌  | 150/200 [59:07<34:19, 41.19s/batch, loss=0.621]




Epoch 8:  77%|███████▋  | 154/200 [59:19<09:01, 11.76s/batch, loss=0.565]

Total iteration 1562, validation loss = 0.5860


Epoch 8:  78%|███████▊  | 155/200 [1:01:05<30:31, 40.71s/batch, loss=0.545]




Epoch 8:  80%|███████▉  | 159/200 [1:01:18<07:59, 11.70s/batch, loss=0.54] 

Total iteration 1567, validation loss = 0.5832


Epoch 8:  80%|████████  | 160/200 [1:03:06<27:29, 41.24s/batch, loss=0.622]




Epoch 8:  82%|████████▏ | 164/200 [1:03:19<07:12, 12.00s/batch, loss=0.612]

Total iteration 1572, validation loss = 0.5781


Epoch 8:  82%|████████▎ | 165/200 [1:05:05<23:51, 40.89s/batch, loss=0.631]




Epoch 8:  84%|████████▍ | 169/200 [1:05:18<06:09, 11.91s/batch, loss=0.677]

Total iteration 1577, validation loss = 0.5725


Epoch 8:  85%|████████▌ | 170/200 [1:07:02<19:59, 39.98s/batch, loss=0.523]




Epoch 8:  87%|████████▋ | 174/200 [1:07:14<04:59, 11.53s/batch, loss=0.612]

Total iteration 1582, validation loss = 0.5690


Epoch 8:  88%|████████▊ | 175/200 [1:08:53<15:56, 38.26s/batch, loss=0.642]




Epoch 8:  90%|████████▉ | 179/200 [1:09:04<03:51, 11.04s/batch, loss=0.646]

Total iteration 1587, validation loss = 0.5688


Epoch 8:  90%|█████████ | 180/200 [1:10:45<12:51, 38.58s/batch, loss=0.505]




Epoch 8:  92%|█████████▏| 184/200 [1:10:56<02:57, 11.11s/batch, loss=0.704]

Total iteration 1592, validation loss = 0.5722


Epoch 8:  92%|█████████▎| 185/200 [1:12:33<09:16, 37.09s/batch, loss=0.662]




Epoch 8:  94%|█████████▍| 189/200 [1:12:45<01:58, 10.81s/batch, loss=0.594]

Total iteration 1597, validation loss = 0.5783


Epoch 8:  95%|█████████▌| 190/200 [1:14:20<06:09, 36.93s/batch, loss=0.706]




Epoch 8:  97%|█████████▋| 194/200 [1:14:32<01:04, 10.78s/batch, loss=0.554]

Total iteration 1602, validation loss = 0.5760


Epoch 8:  98%|█████████▊| 195/200 [1:16:16<03:15, 39.03s/batch, loss=0.495]




Epoch 8: 100%|█████████▉| 199/200 [1:16:20<00:10, 10.10s/batch, loss=0.65] 

Total iteration 1607, validation loss = 0.5697


Epoch 8: 100%|██████████| 200/200 [1:18:02<00:00, 23.41s/batch, loss=0.693]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 9:   2%|▏         | 4/200 [00:19<12:00,  3.67s/batch, loss=0.604]

Total iteration 1613, validation loss = 0.5651


Epoch 9:   2%|▎         | 5/200 [02:05<2:13:37, 41.11s/batch, loss=0.523]




Epoch 9:   4%|▍         | 9/200 [02:18<34:29, 10.84s/batch, loss=0.599]  

Total iteration 1618, validation loss = 0.5635


Epoch 9:   5%|▌         | 10/200 [04:03<2:07:54, 40.39s/batch, loss=0.722]




Epoch 9:   7%|▋         | 14/200 [04:15<35:17, 11.38s/batch, loss=0.55]   

Total iteration 1623, validation loss = 0.5718


Epoch 9:   8%|▊         | 15/200 [05:59<2:03:06, 39.93s/batch, loss=0.699]




Epoch 9:  10%|▉         | 19/200 [06:10<34:19, 11.38s/batch, loss=0.667]  

Total iteration 1628, validation loss = 0.5803


Epoch 9:  10%|█         | 20/200 [07:54<1:59:00, 39.67s/batch, loss=0.575]




Epoch 9:  12%|█▏        | 24/200 [08:07<34:14, 11.68s/batch, loss=0.499]  

Total iteration 1633, validation loss = 0.5794


Epoch 9:  12%|█▎        | 25/200 [09:52<1:57:27, 40.27s/batch, loss=0.544]




Epoch 9:  14%|█▍        | 29/200 [10:03<32:41, 11.47s/batch, loss=0.621]  

Total iteration 1638, validation loss = 0.5745


Epoch 9:  15%|█▌        | 30/200 [11:51<1:55:45, 40.85s/batch, loss=0.558]




Epoch 9:  17%|█▋        | 34/200 [12:03<32:16, 11.67s/batch, loss=0.69]   

Total iteration 1643, validation loss = 0.5754


Epoch 9:  18%|█▊        | 35/200 [13:48<1:51:08, 40.42s/batch, loss=0.568]




Epoch 9:  20%|█▉        | 39/200 [14:00<31:01, 11.56s/batch, loss=0.489]  

Total iteration 1648, validation loss = 0.5755


Epoch 9:  20%|██        | 40/200 [15:44<1:46:36, 39.98s/batch, loss=0.527]




Epoch 9:  22%|██▏       | 44/200 [15:56<29:55, 11.51s/batch, loss=0.602]  

Total iteration 1653, validation loss = 0.5718


Epoch 9:  22%|██▎       | 45/200 [17:36<1:39:54, 38.68s/batch, loss=0.629]




Epoch 9:  24%|██▍       | 49/200 [17:48<28:08, 11.18s/batch, loss=0.594]  

Total iteration 1658, validation loss = 0.5704


Epoch 9:  25%|██▌       | 50/200 [19:31<1:38:00, 39.20s/batch, loss=0.568]




Epoch 9:  27%|██▋       | 54/200 [19:43<27:26, 11.27s/batch, loss=0.575]  

Total iteration 1663, validation loss = 0.5685


Epoch 9:  28%|██▊       | 55/200 [21:22<1:32:00, 38.07s/batch, loss=0.716]




Epoch 9:  30%|██▉       | 59/200 [21:34<26:09, 11.13s/batch, loss=0.636]  

Total iteration 1668, validation loss = 0.5754


Epoch 9:  30%|███       | 60/200 [23:13<1:29:03, 38.17s/batch, loss=0.528]




Epoch 9:  32%|███▏      | 64/200 [23:26<25:15, 11.15s/batch, loss=0.51]   

Total iteration 1673, validation loss = 0.5831


Epoch 9:  32%|███▎      | 65/200 [25:08<1:27:58, 39.10s/batch, loss=0.472]




Epoch 9:  34%|███▍      | 69/200 [25:20<24:39, 11.29s/batch, loss=0.584]  

Total iteration 1678, validation loss = 0.5796


Epoch 9:  35%|███▌      | 70/200 [27:01<1:23:46, 38.67s/batch, loss=0.7]




Epoch 9:  37%|███▋      | 74/200 [27:13<23:39, 11.27s/batch, loss=0.654]  

Total iteration 1683, validation loss = 0.5800


Epoch 9:  38%|███▊      | 75/200 [29:00<1:24:38, 40.62s/batch, loss=0.58]




Epoch 9:  40%|███▉      | 79/200 [29:14<23:59, 11.90s/batch, loss=0.455]  

Total iteration 1688, validation loss = 0.5763


Epoch 9:  40%|████      | 80/200 [30:54<1:18:27, 39.23s/batch, loss=0.684]




Epoch 9:  42%|████▏     | 84/200 [31:07<22:19, 11.55s/batch, loss=0.543]  

Total iteration 1693, validation loss = 0.5742


Epoch 9:  42%|████▎     | 85/200 [32:52<1:16:34, 39.95s/batch, loss=0.611]




Epoch 9:  44%|████▍     | 89/200 [33:05<21:55, 11.85s/batch, loss=0.759]  

Total iteration 1698, validation loss = 0.5807


Epoch 9:  45%|████▌     | 90/200 [34:48<1:12:56, 39.79s/batch, loss=0.596]




Epoch 9:  47%|████▋     | 94/200 [35:02<20:44, 11.74s/batch, loss=0.739]  

Total iteration 1703, validation loss = 0.5959


Epoch 9:  48%|████▊     | 95/200 [36:45<1:09:27, 39.69s/batch, loss=0.565]




Epoch 9:  50%|████▉     | 99/200 [36:57<19:12, 11.41s/batch, loss=0.607]  

Total iteration 1708, validation loss = 0.5898


Epoch 9:  50%|█████     | 100/200 [38:42<1:06:44, 40.05s/batch, loss=0.641]




Epoch 9:  52%|█████▏    | 104/200 [38:53<18:19, 11.46s/batch, loss=0.642]  

Total iteration 1713, validation loss = 0.5766


Epoch 9:  52%|█████▎    | 105/200 [40:41<1:04:36, 40.80s/batch, loss=0.628]




Epoch 9:  55%|█████▍    | 109/200 [40:52<17:39, 11.65s/batch, loss=0.561]  

Total iteration 1718, validation loss = 0.5679


Epoch 9:  55%|█████▌    | 110/200 [42:41<1:01:43, 41.15s/batch, loss=0.473]




Epoch 9:  57%|█████▋    | 114/200 [42:54<17:10, 11.99s/batch, loss=0.601]  

Total iteration 1723, validation loss = 0.5688


Epoch 9:  57%|█████▊    | 115/200 [44:41<58:30, 41.30s/batch, loss=0.643]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)


Total iteration 1728, validation loss = 0.5765


Epoch 9:  60%|██████    | 120/200 [46:42<55:14, 41.44s/batch, loss=0.509]




Epoch 9:  62%|██████▏   | 124/200 [46:54<15:03, 11.89s/batch, loss=0.484]

Total iteration 1733, validation loss = 0.5778


Epoch 9:  62%|██████▎   | 125/200 [48:44<52:22, 41.90s/batch, loss=0.533]




Epoch 9:  64%|██████▍   | 129/200 [48:57<14:11, 11.99s/batch, loss=0.592]

Total iteration 1738, validation loss = 0.5831


Epoch 9:  65%|██████▌   | 130/200 [50:45<48:28, 41.54s/batch, loss=0.747]




Epoch 9:  67%|██████▋   | 134/200 [50:58<13:08, 11.94s/batch, loss=0.604]

Total iteration 1743, validation loss = 0.5829


Epoch 9:  68%|██████▊   | 135/200 [52:42<43:46, 40.41s/batch, loss=0.485]




Epoch 9:  70%|██████▉   | 139/200 [52:54<11:45, 11.57s/batch, loss=0.673]

Total iteration 1748, validation loss = 0.5721


Epoch 9:  70%|███████   | 140/200 [54:39<40:03, 40.06s/batch, loss=0.597]




Epoch 9:  72%|███████▏  | 144/200 [54:50<10:41, 11.45s/batch, loss=0.623]

Total iteration 1753, validation loss = 0.5689


Epoch 9:  72%|███████▎  | 145/200 [56:28<34:45, 37.92s/batch, loss=0.766]




Epoch 9:  74%|███████▍  | 149/200 [56:41<09:39, 11.37s/batch, loss=0.623]

Total iteration 1758, validation loss = 0.5722


Epoch 9:  75%|███████▌  | 150/200 [58:25<32:52, 39.45s/batch, loss=0.584]




Epoch 9:  77%|███████▋  | 154/200 [58:36<08:41, 11.33s/batch, loss=0.688]

Total iteration 1763, validation loss = 0.5817


Epoch 9:  78%|███████▊  | 155/200 [1:00:19<29:26, 39.26s/batch, loss=0.632]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 9:  80%|███████▉  | 159/200 [1:00:32<07:55, 11.61s/batch, loss=0.619]

Total iteration 1768, validation loss = 0.5846


Epoch 9:  80%|████████  | 160/200 [1:02:14<26:14, 39.37s/batch, loss=0.613]




Epoch 9:  82%|████████▏ | 164/200 [1:02:26<06:47, 11.31s/batch, loss=0.836]

Total iteration 1773, validation loss = 0.5811


Epoch 9:  82%|████████▎ | 165/200 [1:04:10<23:13, 39.82s/batch, loss=0.528]




Epoch 9:  84%|████████▍ | 169/200 [1:04:23<05:59, 11.58s/batch, loss=0.612]

Total iteration 1778, validation loss = 0.5772


Epoch 9:  85%|████████▌ | 170/200 [1:06:04<19:29, 38.99s/batch, loss=0.629]




Epoch 9:  87%|████████▋ | 174/200 [1:06:16<04:55, 11.36s/batch, loss=0.687]

Total iteration 1783, validation loss = 0.5713


Epoch 9:  88%|████████▊ | 175/200 [1:07:54<15:47, 37.88s/batch, loss=0.622]




Epoch 9:  90%|████████▉ | 179/200 [1:08:05<03:48, 10.90s/batch, loss=0.596]

Total iteration 1788, validation loss = 0.5733


Epoch 9:  90%|█████████ | 180/200 [1:09:44<12:36, 37.83s/batch, loss=0.565]




Epoch 9:  92%|█████████▏| 184/200 [1:09:56<02:54, 10.88s/batch, loss=0.694]

Total iteration 1793, validation loss = 0.5953


Epoch 9:  92%|█████████▎| 185/200 [1:11:39<09:47, 39.15s/batch, loss=0.659]




Epoch 9:  94%|█████████▍| 189/200 [1:11:50<02:04, 11.30s/batch, loss=0.623]

Total iteration 1798, validation loss = 0.5867


Epoch 9:  95%|█████████▌| 190/200 [1:13:35<06:38, 39.89s/batch, loss=0.633]




Epoch 9:  97%|█████████▋| 194/200 [1:13:47<01:08, 11.43s/batch, loss=0.539]

Total iteration 1803, validation loss = 0.5843


Epoch 9:  98%|█████████▊| 195/200 [1:15:31<03:18, 39.76s/batch, loss=0.726]




Epoch 9: 100%|█████████▉| 199/200 [1:15:36<00:10, 10.31s/batch, loss=0.597]

Total iteration 1808, validation loss = 0.5746


Epoch 9: 100%|██████████| 200/200 [1:17:13<00:00, 23.17s/batch, loss=0.562]
  0%|          | 0/200 [00:00<?, ?batch/s]




Epoch 10:   2%|▏         | 4/200 [00:20<12:22,  3.79s/batch, loss=0.528]

Total iteration 1814, validation loss = 0.5677


Epoch 10:   2%|▎         | 5/200 [02:06<2:14:39, 41.43s/batch, loss=0.581]




Epoch 10:   4%|▍         | 9/200 [02:19<33:58, 10.67s/batch, loss=0.629]  

Total iteration 1819, validation loss = 0.5679


Epoch 10:   5%|▌         | 10/200 [04:01<2:05:42, 39.70s/batch, loss=0.618]




Epoch 10:   7%|▋         | 14/200 [04:13<34:28, 11.12s/batch, loss=0.582]  

Total iteration 1824, validation loss = 0.5780


Epoch 10:   8%|▊         | 15/200 [05:56<2:01:17, 39.34s/batch, loss=0.519]




Epoch 10:  10%|▉         | 19/200 [06:09<34:42, 11.50s/batch, loss=0.599]  

Total iteration 1829, validation loss = 0.5743


Epoch 10:  10%|█         | 20/200 [07:53<2:00:26, 40.15s/batch, loss=0.642]




Epoch 10:  12%|█▏        | 24/200 [08:06<34:15, 11.68s/batch, loss=0.664]  

Total iteration 1834, validation loss = 0.5727


Epoch 10:  12%|█▎        | 25/200 [09:51<1:57:03, 40.13s/batch, loss=0.528]




Epoch 10:  14%|█▍        | 29/200 [10:02<32:47, 11.50s/batch, loss=0.584]  

Total iteration 1839, validation loss = 0.5779


Epoch 10:  15%|█▌        | 30/200 [11:46<1:52:37, 39.75s/batch, loss=0.547]




Epoch 10:  17%|█▋        | 34/200 [11:58<31:23, 11.35s/batch, loss=0.592]  

Total iteration 1844, validation loss = 0.5851


Epoch 10:  18%|█▊        | 35/200 [13:44<1:51:09, 40.42s/batch, loss=0.496]




Epoch 10:  20%|█▉        | 39/200 [13:56<30:54, 11.52s/batch, loss=0.523]  

Total iteration 1849, validation loss = 0.5882


Epoch 10:  20%|██        | 40/200 [15:38<1:44:27, 39.17s/batch, loss=0.604]




Epoch 10:  22%|██▏       | 44/200 [15:49<29:32, 11.36s/batch, loss=0.559]  

Total iteration 1854, validation loss = 0.5829


Epoch 10:  22%|██▎       | 45/200 [17:31<1:40:26, 38.88s/batch, loss=0.564]




Epoch 10:  24%|██▍       | 49/200 [17:42<28:15, 11.23s/batch, loss=0.588]  

Total iteration 1859, validation loss = 0.5725


Epoch 10:  25%|██▌       | 50/200 [19:21<1:34:48, 37.92s/batch, loss=0.566]




Epoch 10:  27%|██▋       | 54/200 [19:33<26:55, 11.06s/batch, loss=0.505]  

Total iteration 1864, validation loss = 0.5715


Epoch 10:  28%|██▊       | 55/200 [21:18<1:35:55, 39.69s/batch, loss=0.643]




Epoch 10:  30%|██▉       | 59/200 [21:30<26:41, 11.36s/batch, loss=0.56]   

Total iteration 1869, validation loss = 0.5689


Epoch 10:  30%|███       | 60/200 [23:11<1:31:18, 39.13s/batch, loss=0.534]




Epoch 10:  32%|███▏      | 64/200 [23:24<25:39, 11.32s/batch, loss=0.691]  

Total iteration 1874, validation loss = 0.5662


Epoch 10:  32%|███▎      | 65/200 [25:06<1:28:11, 39.20s/batch, loss=0.696]




Epoch 10:  34%|███▍      | 69/200 [25:18<24:43, 11.33s/batch, loss=0.688]  

Total iteration 1879, validation loss = 0.5695


Epoch 10:  35%|███▌      | 70/200 [26:56<1:21:59, 37.84s/batch, loss=0.565]




Epoch 10:  37%|███▋      | 74/200 [27:08<23:19, 11.10s/batch, loss=0.528]  

Total iteration 1884, validation loss = 0.5751


Epoch 10:  38%|███▊      | 75/200 [28:50<1:20:52, 38.82s/batch, loss=0.561]




Epoch 10:  40%|███▉      | 79/200 [29:03<22:56, 11.38s/batch, loss=0.583]  

Total iteration 1889, validation loss = 0.5738


Epoch 10:  40%|████      | 80/200 [30:44<1:17:51, 38.93s/batch, loss=0.646]




Epoch 10:  42%|████▏     | 84/200 [30:56<21:40, 11.21s/batch, loss=0.573]  

Total iteration 1894, validation loss = 0.5687


Epoch 10:  42%|████▎     | 85/200 [32:42<1:16:47, 40.06s/batch, loss=0.627]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:  44%|████▍     | 89/200 [32:54<21:20, 11.54s/batch, loss=0.528]

Total iteration 1899, validation loss = 0.5642


Epoch 10:  45%|████▌     | 90/200 [34:36<1:12:05, 39.32s/batch, loss=0.618]




Epoch 10:  47%|████▋     | 94/200 [34:48<20:01, 11.33s/batch, loss=0.81]   

Total iteration 1904, validation loss = 0.5655


Epoch 10:  48%|████▊     | 95/200 [36:30<1:08:20, 39.05s/batch, loss=0.671]




  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
  out=out, **kwargs)
  ret, rcount, out=ret, casting='unsafe', subok=False)
Epoch 10:  50%|████▉     | 99/200 [36:42<18:55, 11.24s/batch, loss=0.489]

Total iteration 1909, validation loss = 0.5760


Epoch 10:  50%|█████     | 100/200 [38:26<1:06:29, 39.89s/batch, loss=0.67]




Epoch 10:  52%|█████▏    | 104/200 [38:38<18:18, 11.44s/batch, loss=0.567]  

Total iteration 1914, validation loss = 0.5829


Epoch 10:  52%|█████▎    | 105/200 [40:23<1:03:13, 39.93s/batch, loss=0.564]




Epoch 10:  55%|█████▍    | 109/200 [40:35<17:35, 11.60s/batch, loss=0.622]  

Total iteration 1919, validation loss = 0.5806


Epoch 10:  55%|█████▌    | 110/200 [42:18<59:15, 39.51s/batch, loss=0.662]




Epoch 10:  57%|█████▋    | 114/200 [42:30<16:15, 11.34s/batch, loss=0.597]

Total iteration 1924, validation loss = 0.5697


Epoch 10:  57%|█████▊    | 115/200 [44:11<55:14, 39.00s/batch, loss=0.704]




Epoch 10:  60%|█████▉    | 119/200 [44:25<15:23, 11.40s/batch, loss=0.574]

Total iteration 1929, validation loss = 0.5649


Epoch 10:  60%|██████    | 120/200 [45:59<49:28, 37.10s/batch, loss=0.602]




Epoch 10:  62%|██████▏   | 124/200 [46:10<13:34, 10.71s/batch, loss=0.577]

Total iteration 1934, validation loss = 0.5642


Epoch 10:  62%|██████▎   | 125/200 [47:47<46:17, 37.04s/batch, loss=0.457]




Epoch 10:  64%|██████▍   | 129/200 [47:59<12:53, 10.89s/batch, loss=0.611]

Total iteration 1939, validation loss = 0.5673


Epoch 10:  65%|██████▌   | 130/200 [49:42<45:34, 39.07s/batch, loss=0.681]




Epoch 10:  67%|██████▋   | 134/200 [49:55<12:26, 11.30s/batch, loss=0.739]

Total iteration 1944, validation loss = 0.5678


Epoch 10:  68%|██████▊   | 135/200 [51:38<42:46, 39.48s/batch, loss=0.499]




Epoch 10:  70%|██████▉   | 139/200 [51:51<11:43, 11.53s/batch, loss=0.509]

Total iteration 1949, validation loss = 0.5687


Epoch 10:  70%|███████   | 140/200 [53:30<38:28, 38.48s/batch, loss=0.562]




Epoch 10:  72%|███████▏  | 144/200 [53:42<10:17, 11.04s/batch, loss=0.647]

Total iteration 1954, validation loss = 0.5786


Epoch 10:  72%|███████▎  | 145/200 [55:18<34:09, 37.27s/batch, loss=0.501]




Epoch 10:  74%|███████▍  | 149/200 [55:31<09:17, 10.92s/batch, loss=0.652]

Total iteration 1959, validation loss = 0.5801


Epoch 10:  75%|███████▌  | 150/200 [57:15<32:55, 39.51s/batch, loss=0.715]




Epoch 10:  77%|███████▋  | 154/200 [57:27<08:44, 11.41s/batch, loss=0.645]

Total iteration 1964, validation loss = 0.5799


Epoch 10:  78%|███████▊  | 155/200 [59:08<29:03, 38.74s/batch, loss=0.571]




Epoch 10:  80%|███████▉  | 159/200 [59:20<07:39, 11.22s/batch, loss=0.655]

Total iteration 1969, validation loss = 0.5722


Epoch 10:  80%|████████  | 160/200 [1:01:03<26:17, 39.43s/batch, loss=0.578]




Epoch 10:  82%|████████▏ | 164/200 [1:01:16<06:54, 11.50s/batch, loss=0.685]

Total iteration 1974, validation loss = 0.5692


Epoch 10:  82%|████████▎ | 165/200 [1:02:56<22:36, 38.75s/batch, loss=0.639]




Epoch 10:  84%|████████▍ | 169/200 [1:03:09<05:52, 11.38s/batch, loss=0.539]

Total iteration 1979, validation loss = 0.5676


Epoch 10:  85%|████████▌ | 170/200 [1:04:44<18:32, 37.09s/batch, loss=0.644]




Epoch 10:  87%|████████▋ | 174/200 [1:04:56<04:41, 10.83s/batch, loss=0.593]

Total iteration 1984, validation loss = 0.5658


Epoch 10:  88%|████████▊ | 175/200 [1:06:42<16:37, 39.88s/batch, loss=0.558]




Epoch 10:  90%|████████▉ | 179/200 [1:06:55<04:06, 11.72s/batch, loss=0.583]

Total iteration 1989, validation loss = 0.5655


Epoch 10:  90%|█████████ | 180/200 [1:08:38<13:11, 39.56s/batch, loss=0.519]




Epoch 10:  92%|█████████▏| 184/200 [1:08:49<03:01, 11.34s/batch, loss=0.501]

Total iteration 1994, validation loss = 0.5631


Epoch 10:  92%|█████████▎| 185/200 [1:10:35<10:02, 40.15s/batch, loss=0.525]




Epoch 10:  94%|█████████▍| 189/200 [1:10:47<02:07, 11.58s/batch, loss=0.649]

Total iteration 1999, validation loss = 0.5630


Epoch 10:  95%|█████████▌| 190/200 [1:12:34<06:47, 40.78s/batch, loss=0.658]




Epoch 10:  97%|█████████▋| 194/200 [1:12:46<01:10, 11.73s/batch, loss=0.586]

Total iteration 2004, validation loss = 0.5727


Epoch 10:  98%|█████████▊| 195/200 [1:14:34<03:25, 41.04s/batch, loss=0.627]




Epoch 10: 100%|█████████▉| 199/200 [1:14:38<00:10, 10.60s/batch, loss=0.746]

Total iteration 2009, validation loss = 0.5746


Epoch 10: 100%|██████████| 200/200 [1:16:19<00:00, 22.90s/batch, loss=0.508]







In [19]:
torch.save(model3.state_dict(), '../runs/experiment_res/experimentres_final_model.pt')

### Test models on test set:

In [7]:
models = []

In [8]:
ckpt_dir = os.path.join('..', 'runs', 'baseline')
ckpt_path = os.path.join(ckpt_dir, 'baseline_final_model.pt') # COLAB
ckpt = torch.load(ckpt_path)

ckpt_model = baseline_3DCNN(in_num_ch=1)
ckpt_model.load_state_dict(ckpt)

models.append(ckpt_model)

In [9]:
ckpt_dir = os.path.join('..', 'runs', 'experiment_att')
ckpt_path = os.path.join(ckpt_dir, 'experiment_final_model.pt') 
ckpt = torch.load(ckpt_path)

ckpt_model = selfattn_3DCNN(in_num_ch=1)
ckpt_model.load_state_dict(ckpt)

models.append(ckpt_model)

In [10]:
ckpt_dir = os.path.join('..', 'runs', 'experiment_res')
ckpt_path = os.path.join(ckpt_dir, 'experimentres_final_model.pt') 
ckpt = torch.load(ckpt_path)

ckpt_model = resattn_3DCNN(in_num_ch=1)
ckpt_model.load_state_dict(ckpt)

models.append(ckpt_model)


In [15]:
model_names = ['baseline', 'experiment_att', 'experiment_res']
criterion = torch.nn.BCEWithLogitsLoss()
gc.collect()

88

In [20]:
test_losses = []
for modname, mod in zip(model_names, models):
    print("Testing start for model:", modname)
    
    log_dir = os.path.join('..', 'runs', modname+'_test')
    writer = SummaryWriter(log_dir)
    mod.eval()
    
    test_loss = test_model(loader_test, mod, criterion, writer, device)
    test_losses.append(test_loss)

  0%|          | 0/140 [00:00<?, ?batch/s]

Testing start for model: baseline


100%|██████████| 140/140 [05:23<00:00,  2.31s/batch]
  0%|          | 0/140 [00:00<?, ?batch/s]

Test loss = 0.5364
Testing start for model: experiment_att


100%|██████████| 140/140 [05:27<00:00,  2.34s/batch]
  0%|          | 0/140 [00:00<?, ?batch/s]

Test loss = 0.5188
Testing start for model: experiment_res


100%|██████████| 140/140 [05:18<00:00,  2.27s/batch]

Test loss = 0.5729



