In [1]:
import numpy as np 
import pandas as pd 
# import necessary libraries for training Electra 
from transformers import * 
import torch 
import torch.nn as nn
import torch.nn.functional as F 
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler 
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
import random 
import time 
import datetime 
from tqdm import tqdm 
import gc 
import matplotlib.pyplot as plt 
import seaborn as sns 
# import necessary libraries for training LSTM 
import tensorflow as tf 
from tensorflow.keras.layers import TimeDistributed, Conv2D, Conv2DTranspose, MaxPooling2D, AveragePooling2D, BatchNormalization, concatenate, Input, ConvLSTM2D, Reshape, Conv3D, Flatten, LSTM, GRU, Dense,Dropout, Add
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Bidirectional, Conv1D, MaxPooling1D, GlobalMaxPooling1D, GlobalMaxPool1D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences 
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam
import re 

## Load Dataset

In [74]:
# import dataframes from the 2018 research 
# train1 contains inconsistent news 
# train2 contains irrelevant news  
# we are not using mismatch news yet.  
train1 = pd.read_csv('mission1_train.csv')
train2 = pd.read_csv('mission2_train.csv')

train = pd.concat([train1,train2], axis = 0)
train = train.dropna() 
train = train.drop(columns=['seqid'])

train['title_length'] = train['title'].apply(lambda x : len(x)) 
train = train[train['title_length'] <= 120] 
train = train[train['title_length'] >= 5] 

print(train.shape)

(98093, 4)


In [75]:
test = pd.read_csv('mindslab_test.csv') 

In [78]:
test.shape

(200, 3)

## Segment data using sliding window method

In [79]:
def clean_text(s): 
    FILTERS = "([~.,!?\"':;(])"
    CHANGE_FILTER = re.compile(FILTERS)
    return re.sub(CHANGE_FILTER, " ", s) 

In [38]:
def split_text(s, overlap = 20, chunk_size = 50): 
    total = [] 
    partial = [] 
    if len(s.split()) // (chunk_size - overlap) > 0:  
        n = len(s.split()) // (chunk_size - overlap) 
    else: 
        n = 1 
    for w in range(n): 
        if w == 0: 
            partial = s.split()[:chunk_size] 
            total.append(" ".join(partial)) 
        else:  
            partial = s.split()[w*(chunk_size - overlap):w*(chunk_size - overlap) + chunk_size]
            total.append(" ".join(partial)) 
    return total 

In [39]:
contents = train['content'].values
titles = train['title'].values 
labels = train['Label'].values 

In [40]:
data = {'title':[], 'content':[], 'Label':[]} 
for i in range(len(contents)): 
    splitted = split_text(clean_text(contents[i]))
    for text in splitted: 
        data['title'].append(titles[i]) 
        data['content'].append(text) 
        data['Label'].append(labels[i]) 

In [41]:
splitted_train = pd.DataFrame(data) 

In [60]:
splitted_train.head()

Unnamed: 0,title,content,Label
0,"박상기 법무장관 후보자, 자동차세·과태료 미납으로 15차례 차량 압류",박상기 법무부장관 후보자가 교통법규 위반으로 부과된 과태료를 7차례 체납하고 자동차...,0.0
1,"박상기 법무장관 후보자, 자동차세·과태료 미납으로 15차례 차량 압류",해서 후보자 차량 압류가 오랜 기간 이뤄졌다’고 지적했다 주 의원에 따르면 박 후보...,0.0
2,"박상기 법무장관 후보자, 자동차세·과태료 미납으로 15차례 차량 압류",2015년 6월 8일 해당 과태료를 납부했다 2011년 4월 22일 부과된 ‘속도위...,0.0
3,"박상기 법무장관 후보자, 자동차세·과태료 미납으로 15차례 차량 압류",또 자동차세와 과태료 미납으로 차량 압류를 15차례 당했던 것으로 확인됐다 2008...,0.0
4,"박상기 법무장관 후보자, 자동차세·과태료 미납으로 15차례 차량 압류",압류 기록은 작년 10월 6일에 있었다 박 후보자는 이에 대해 청문회에서 “저는 대...,0.0


## Tokenize Data

In [48]:
tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator")

In [61]:
def electra_tokenizer_simple(sent1, sent2, MAX_LEN):  
    encoded_dict = tokenizer.encode_plus(
        text = sent1, 
        text_pair = sent2,  
        add_special_tokens = True, # add [CLS] and [SEP]
        pad_to_max_length = False, 
        return_attention_mask = True # constructing attention_masks 
    )  
    
    input_id = encoded_dict['input_ids'] 
    attention_mask = encoded_dict['attention_mask'] # differentiate padding from non padding 
    token_type_id = encoded_dict['token_type_ids'] # differentiate two sentences    
    
    if len(input_id) > 512: 
        input_id = input_id[:129] + input_id[-383:] 
        attention_mask = attention_mask[:129] + attention_mask[-383:]  
        token_type_id = token_type_id[:129] + token_type_id[-383:]   
    elif len(input_id) < 512: 
        input_id = input_id + [0]*(512 - len(input_id)) 
        attention_mask = attention_mask + [0]*(512 - len(attention_mask))
        token_type_id = token_type_id + [0]*(512 - len(token_type_id))  
        
    return np.asarray(input_id), np.asarray(attention_mask), np.asarray(token_type_id) 

In [62]:
MAX_LEN = 512
input_ids = []
attention_masks = [] 
token_type_ids = [] 
cnt = 0
for sent1, sent2 in zip(splitted_train['title'], splitted_train['content']): 
    if cnt%1000 == 0 and cnt > 0: 
        print("Processed {} datapoints".format(cnt)) 
    cnt += 1
    try: 
        input_id, attention_mask, token_type_id = electra_tokenizer_simple(sent1, sent2, MAX_LEN)
        ## check if the number of tokens exceed 510 (excluding [cls] and [sep]) 
        ## if so empirically select the first 129 and the last 383 tokens 
        input_ids.append(input_id) 
        attention_masks.append(attention_mask) 
        token_type_ids.append(token_type_id) 
    except Exception as e:  
        print(e)         
        print(sent1, sent2) 
        pass

Processed 1000 datapoints
Processed 2000 datapoints
Processed 3000 datapoints
Processed 4000 datapoints
Processed 5000 datapoints
Processed 6000 datapoints
Processed 7000 datapoints
Processed 8000 datapoints
Processed 9000 datapoints
Processed 10000 datapoints
Processed 11000 datapoints
Processed 12000 datapoints
Processed 13000 datapoints
Processed 14000 datapoints
Processed 15000 datapoints
Processed 16000 datapoints
Processed 17000 datapoints
Processed 18000 datapoints
Processed 19000 datapoints
Processed 20000 datapoints
Processed 21000 datapoints
Processed 22000 datapoints
Processed 23000 datapoints
Processed 24000 datapoints
Processed 25000 datapoints
Processed 26000 datapoints
Processed 27000 datapoints
Processed 28000 datapoints
Processed 29000 datapoints
Processed 30000 datapoints
Processed 31000 datapoints
Processed 32000 datapoints
Processed 33000 datapoints
Processed 34000 datapoints
Processed 35000 datapoints
Processed 36000 datapoints
Processed 37000 datapoints
Processed 

Processed 298000 datapoints
Processed 299000 datapoints
Processed 300000 datapoints
Processed 301000 datapoints
Processed 302000 datapoints
Processed 303000 datapoints
Processed 304000 datapoints
Processed 305000 datapoints
Processed 306000 datapoints
Processed 307000 datapoints
Processed 308000 datapoints
Processed 309000 datapoints
Processed 310000 datapoints
Processed 311000 datapoints
Processed 312000 datapoints
Processed 313000 datapoints
Processed 314000 datapoints
Processed 315000 datapoints
Processed 316000 datapoints
Processed 317000 datapoints
Processed 318000 datapoints
Processed 319000 datapoints
Processed 320000 datapoints
Processed 321000 datapoints
Processed 322000 datapoints
Processed 323000 datapoints
Processed 324000 datapoints
Processed 325000 datapoints
Processed 326000 datapoints
Processed 327000 datapoints
Processed 328000 datapoints
Processed 329000 datapoints
Processed 330000 datapoints
Processed 331000 datapoints
Processed 332000 datapoints
Processed 333000 dat

Processed 591000 datapoints
Processed 592000 datapoints
Processed 593000 datapoints
Processed 594000 datapoints
Processed 595000 datapoints
Processed 596000 datapoints
Processed 597000 datapoints
Processed 598000 datapoints
Processed 599000 datapoints
Processed 600000 datapoints
Processed 601000 datapoints
Processed 602000 datapoints
Processed 603000 datapoints
Processed 604000 datapoints
Processed 605000 datapoints
Processed 606000 datapoints
Processed 607000 datapoints
Processed 608000 datapoints
Processed 609000 datapoints
Processed 610000 datapoints
Processed 611000 datapoints
Processed 612000 datapoints
Processed 613000 datapoints
Processed 614000 datapoints
Processed 615000 datapoints
Processed 616000 datapoints
Processed 617000 datapoints
Processed 618000 datapoints
Processed 619000 datapoints
Processed 620000 datapoints
Processed 621000 datapoints
Processed 622000 datapoints
Processed 623000 datapoints
Processed 624000 datapoints
Processed 625000 datapoints
Processed 626000 dat

In [63]:
input_ids = np.asarray(input_ids)
attention_masks = np.asarray(attention_masks) 
token_type_ids = np.asarray(token_type_ids)
y_train = splitted_train['Label'].values 

In [64]:
train_inputs, val_inputs, train_labels, val_labels = train_test_split(input_ids, y_train, random_state = 2021, test_size = 0.1, stratify = y_train)

train_masks, val_masks, _, _ = train_test_split(attention_masks, input_ids, random_state = 2021, test_size = 0.1)

train_token_type_ids, val_token_type_ids, _, _ = train_test_split(token_type_ids, input_ids, random_state = 2021, test_size = 0.1)


In [65]:
train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels)
train_masks = torch.tensor(train_masks)
train_token_type_ids = torch.tensor(train_token_type_ids) 

In [66]:
val_inputs = torch.tensor(val_inputs) 
val_labels = torch.tensor(val_labels) 
val_masks = torch.tensor(val_masks) 
val_token_type_ids = torch.tensor(val_token_type_ids)

In [67]:
batch_size = 16

train_data = TensorDataset(train_inputs, train_masks, train_token_type_ids, train_labels) 
train_sampler = RandomSampler(train_data) 
train_dataloader = DataLoader(train_data, sampler = train_sampler, batch_size = batch_size) 

In [68]:
val_data = TensorDataset(val_inputs, val_masks, val_token_type_ids, val_labels) 
val_sampler = SequentialSampler(val_data) 
val_dataloader = DataLoader(val_data, sampler = val_sampler, batch_size = batch_size)  

## Define Model

In [69]:
model = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-base-v3-discriminator")   
model.cuda() 

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

ElectraForSequenceClassification(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(35000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm

In [70]:
def format_time(elapsed): 
    elapsed_rounded = int(round(elapsed)) 
    return str(datetime.timedelta(seconds = elapsed_rounded))

In [71]:
predictions = [] 
def compute_accuracy(model, dataloader, device):
    tqdm()
    model.eval()
    correct_preds, num_samples = 0,0
    with torch.no_grad():
        for i, batch in enumerate(tqdm(dataloader)):
            b_input_ids, b_input_masks, b_token_type_ids, b_labels = tuple(t.to(device) for t in batch) 
            loss, yhat = model(input_ids=b_input_ids, attention_mask=b_input_masks, 
                               token_type_ids = b_token_type_ids, labels=b_labels.long())
            prediction = (torch.sigmoid(yhat[:,1]) > 0.5).long() 
            predictions.append(prediction)
            num_samples += b_labels.size(0)
            correct_preds += (prediction==b_labels.long()).sum()
            del b_input_ids, b_input_masks, b_token_type_ids, b_labels #memory
        torch.cuda.empty_cache() #memory
        gc.collect() # memory 
        return correct_preds.float()/num_samples*100 


In [72]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 
device

device(type='cuda', index=0)

In [73]:
seed_val = 8888 
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

optimizer = AdamW(model.parameters(), lr = 2e-5, eps = 1e-8) 
epochs = 10 # 10 epochs for now. We may transfer learn later 
total_steps = len(train_dataloader) * epochs 
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps) 

losses = [] 
val_losses = [] 
#model.zero_grad() 

for epoch_i in range(0, epochs): 
    print("")
    print("===== Epoch {:} / {:} =====".format(epoch_i + 1, epochs)) 
    print("Training ...") 
    t0 = time.time()
    running_loss = 0 
    train_accuracy = 0
    iteration = 0 
    model.train() 
    for step, batch in enumerate(train_dataloader):
        iteration += 1 
        batch = tuple(t.to(device) for t in batch) 
        b_input_ids, b_input_masks, b_token_type_ids, b_labels = batch 
        optimizer.zero_grad()
        loss, yhat = model(b_input_ids, attention_mask = b_input_masks, 
                        token_type_ids = b_token_type_ids, labels = b_labels.long()) 

        loss.backward()
        optimizer.step()
        scheduler.step()  

        running_loss += float(loss.item())
        del b_input_ids, b_input_masks, b_token_type_ids, b_labels #memory
        
        if not step%25:
            print(f'Epoch: {epoch_i+1:03d}/{epochs:03d} | '
                  f'Batch {step+1:03d}/{len(train_dataloader):03d} | '
                  f'Average Loss in last {iteration} iteration(s): {(running_loss/iteration):.4f} | '
                  f'Elapsed {format_time(time.time()-t0)}')
            running_loss = 0.0
            iteration = 0
        torch.cuda.empty_cache() #memory
        gc.collect() #memory
        losses.append(float(loss.item()))
    
    with torch.no_grad():
        print(f'\nTraining Accuracy: ' f'{compute_accuracy(model, train_dataloader, device):.2f}%')
        
        
    print("Calculating validation metrics...")
    model.eval() 
    eval_loss = 0
    eval_accuracy = 0
    for batch in val_dataloader: 
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_masks, b_token_type_ids, b_labels = batch 
        with torch.no_grad(): 
            loss, yhat = model(b_input_ids, attention_mask = b_input_masks, 
                        token_type_ids = b_token_type_ids, labels = b_labels.long())  
        eval_loss += float(loss.item())  
        del b_input_ids, b_input_masks, b_token_type_ids, b_labels # memory 
        val_losses.append(float(loss.item())) 
        
    avg_val_loss = eval_loss / len(val_dataloader) 
    # avg_val_accuracy = eval_accuracy / len(val_dataloader)   
    
    print("Average validation loss = {}".format(avg_val_loss)) 
    print("Average validation accuracy = {}".format(compute_accuracy(model, val_dataloader, device)))  
    
    # saving model 
    torch.save(model.state_dict(), 'electra_chunked_ver_2_' + str(epoch_i + 1))



===== Epoch 1 / 10 =====
Training ...
Epoch: 001/010 | Batch 001/45709 | Average Loss in last 1 iteration(s): 0.6811 | Elapsed 0:00:01
Epoch: 001/010 | Batch 026/45709 | Average Loss in last 25 iteration(s): 0.6957 | Elapsed 0:00:27
Epoch: 001/010 | Batch 051/45709 | Average Loss in last 25 iteration(s): 0.6914 | Elapsed 0:00:52
Epoch: 001/010 | Batch 076/45709 | Average Loss in last 25 iteration(s): 0.6942 | Elapsed 0:01:21
Epoch: 001/010 | Batch 101/45709 | Average Loss in last 25 iteration(s): 0.6903 | Elapsed 0:01:45
Epoch: 001/010 | Batch 126/45709 | Average Loss in last 25 iteration(s): 0.6901 | Elapsed 0:02:11
Epoch: 001/010 | Batch 151/45709 | Average Loss in last 25 iteration(s): 0.6810 | Elapsed 0:02:37
Epoch: 001/010 | Batch 176/45709 | Average Loss in last 25 iteration(s): 0.6581 | Elapsed 0:03:03
Epoch: 001/010 | Batch 201/45709 | Average Loss in last 25 iteration(s): 0.6332 | Elapsed 0:03:25
Epoch: 001/010 | Batch 226/45709 | Average Loss in last 25 iteration(s): 0.6422 

Epoch: 001/010 | Batch 2076/45709 | Average Loss in last 25 iteration(s): 0.4215 | Elapsed 0:35:49
Epoch: 001/010 | Batch 2101/45709 | Average Loss in last 25 iteration(s): 0.4809 | Elapsed 0:36:13
Epoch: 001/010 | Batch 2126/45709 | Average Loss in last 25 iteration(s): 0.4456 | Elapsed 0:36:40
Epoch: 001/010 | Batch 2151/45709 | Average Loss in last 25 iteration(s): 0.4777 | Elapsed 0:37:06
Epoch: 001/010 | Batch 2176/45709 | Average Loss in last 25 iteration(s): 0.4547 | Elapsed 0:37:32
Epoch: 001/010 | Batch 2201/45709 | Average Loss in last 25 iteration(s): 0.4735 | Elapsed 0:37:55
Epoch: 001/010 | Batch 2226/45709 | Average Loss in last 25 iteration(s): 0.4646 | Elapsed 0:38:23
Epoch: 001/010 | Batch 2251/45709 | Average Loss in last 25 iteration(s): 0.4722 | Elapsed 0:38:50
Epoch: 001/010 | Batch 2276/45709 | Average Loss in last 25 iteration(s): 0.4739 | Elapsed 0:39:17
Epoch: 001/010 | Batch 2301/45709 | Average Loss in last 25 iteration(s): 0.4378 | Elapsed 0:39:43
Epoch: 001

Epoch: 001/010 | Batch 4151/45709 | Average Loss in last 25 iteration(s): 0.4340 | Elapsed 1:11:14
Epoch: 001/010 | Batch 4176/45709 | Average Loss in last 25 iteration(s): 0.4702 | Elapsed 1:11:44
Epoch: 001/010 | Batch 4201/45709 | Average Loss in last 25 iteration(s): 0.4192 | Elapsed 1:12:08
Epoch: 001/010 | Batch 4226/45709 | Average Loss in last 25 iteration(s): 0.4261 | Elapsed 1:12:35
Epoch: 001/010 | Batch 4251/45709 | Average Loss in last 25 iteration(s): 0.4647 | Elapsed 1:13:00
Epoch: 001/010 | Batch 4276/45709 | Average Loss in last 25 iteration(s): 0.4102 | Elapsed 1:13:26
Epoch: 001/010 | Batch 4301/45709 | Average Loss in last 25 iteration(s): 0.4497 | Elapsed 1:13:48
Epoch: 001/010 | Batch 4326/45709 | Average Loss in last 25 iteration(s): 0.4548 | Elapsed 1:14:14
Epoch: 001/010 | Batch 4351/45709 | Average Loss in last 25 iteration(s): 0.4321 | Elapsed 1:14:43
Epoch: 001/010 | Batch 4376/45709 | Average Loss in last 25 iteration(s): 0.3959 | Elapsed 1:15:08
Epoch: 001

Epoch: 001/010 | Batch 6226/45709 | Average Loss in last 25 iteration(s): 0.3738 | Elapsed 1:47:06
Epoch: 001/010 | Batch 6251/45709 | Average Loss in last 25 iteration(s): 0.4278 | Elapsed 1:47:32
Epoch: 001/010 | Batch 6276/45709 | Average Loss in last 25 iteration(s): 0.3936 | Elapsed 1:48:00
Epoch: 001/010 | Batch 6301/45709 | Average Loss in last 25 iteration(s): 0.4023 | Elapsed 1:48:25
Epoch: 001/010 | Batch 6326/45709 | Average Loss in last 25 iteration(s): 0.3347 | Elapsed 1:48:50
Epoch: 001/010 | Batch 6351/45709 | Average Loss in last 25 iteration(s): 0.3689 | Elapsed 1:49:13
Epoch: 001/010 | Batch 6376/45709 | Average Loss in last 25 iteration(s): 0.3909 | Elapsed 1:49:42
Epoch: 001/010 | Batch 6401/45709 | Average Loss in last 25 iteration(s): 0.4338 | Elapsed 1:50:09
Epoch: 001/010 | Batch 6426/45709 | Average Loss in last 25 iteration(s): 0.3315 | Elapsed 1:50:35
Epoch: 001/010 | Batch 6451/45709 | Average Loss in last 25 iteration(s): 0.3833 | Elapsed 1:51:02
Epoch: 001

Epoch: 001/010 | Batch 8301/45709 | Average Loss in last 25 iteration(s): 0.3578 | Elapsed 2:23:04
Epoch: 001/010 | Batch 8326/45709 | Average Loss in last 25 iteration(s): 0.3059 | Elapsed 2:23:32
Epoch: 001/010 | Batch 8351/45709 | Average Loss in last 25 iteration(s): 0.3736 | Elapsed 2:23:57
Epoch: 001/010 | Batch 8376/45709 | Average Loss in last 25 iteration(s): 0.3586 | Elapsed 2:24:21
Epoch: 001/010 | Batch 8401/45709 | Average Loss in last 25 iteration(s): 0.3865 | Elapsed 2:24:50
Epoch: 001/010 | Batch 8426/45709 | Average Loss in last 25 iteration(s): 0.3553 | Elapsed 2:25:16
Epoch: 001/010 | Batch 8451/45709 | Average Loss in last 25 iteration(s): 0.3268 | Elapsed 2:25:42
Epoch: 001/010 | Batch 8476/45709 | Average Loss in last 25 iteration(s): 0.3819 | Elapsed 2:26:09
Epoch: 001/010 | Batch 8501/45709 | Average Loss in last 25 iteration(s): 0.3648 | Elapsed 2:26:33
Epoch: 001/010 | Batch 8526/45709 | Average Loss in last 25 iteration(s): 0.3608 | Elapsed 2:26:56
Epoch: 001

Epoch: 001/010 | Batch 10376/45709 | Average Loss in last 25 iteration(s): 0.3225 | Elapsed 2:59:05
Epoch: 001/010 | Batch 10401/45709 | Average Loss in last 25 iteration(s): 0.3169 | Elapsed 2:59:29
Epoch: 001/010 | Batch 10426/45709 | Average Loss in last 25 iteration(s): 0.3184 | Elapsed 2:59:57
Epoch: 001/010 | Batch 10451/45709 | Average Loss in last 25 iteration(s): 0.3421 | Elapsed 3:00:24
Epoch: 001/010 | Batch 10476/45709 | Average Loss in last 25 iteration(s): 0.3503 | Elapsed 3:00:50
Epoch: 001/010 | Batch 10501/45709 | Average Loss in last 25 iteration(s): 0.3148 | Elapsed 3:01:17
Epoch: 001/010 | Batch 10526/45709 | Average Loss in last 25 iteration(s): 0.3587 | Elapsed 3:01:40
Epoch: 001/010 | Batch 10551/45709 | Average Loss in last 25 iteration(s): 0.3171 | Elapsed 3:02:06
Epoch: 001/010 | Batch 10576/45709 | Average Loss in last 25 iteration(s): 0.2832 | Elapsed 3:02:29
Epoch: 001/010 | Batch 10601/45709 | Average Loss in last 25 iteration(s): 0.2740 | Elapsed 3:02:58


Epoch: 001/010 | Batch 12426/45709 | Average Loss in last 25 iteration(s): 0.3537 | Elapsed 3:34:24
Epoch: 001/010 | Batch 12451/45709 | Average Loss in last 25 iteration(s): 0.3261 | Elapsed 3:34:50
Epoch: 001/010 | Batch 12476/45709 | Average Loss in last 25 iteration(s): 0.2642 | Elapsed 3:35:17
Epoch: 001/010 | Batch 12501/45709 | Average Loss in last 25 iteration(s): 0.3112 | Elapsed 3:35:41
Epoch: 001/010 | Batch 12526/45709 | Average Loss in last 25 iteration(s): 0.2937 | Elapsed 3:36:07
Epoch: 001/010 | Batch 12551/45709 | Average Loss in last 25 iteration(s): 0.3314 | Elapsed 3:36:35
Epoch: 001/010 | Batch 12576/45709 | Average Loss in last 25 iteration(s): 0.2693 | Elapsed 3:37:02
Epoch: 001/010 | Batch 12601/45709 | Average Loss in last 25 iteration(s): 0.2535 | Elapsed 3:37:27
Epoch: 001/010 | Batch 12626/45709 | Average Loss in last 25 iteration(s): 0.2963 | Elapsed 3:37:54
Epoch: 001/010 | Batch 12651/45709 | Average Loss in last 25 iteration(s): 0.3382 | Elapsed 3:38:19


Epoch: 001/010 | Batch 14476/45709 | Average Loss in last 25 iteration(s): 0.2320 | Elapsed 4:09:54
Epoch: 001/010 | Batch 14501/45709 | Average Loss in last 25 iteration(s): 0.2508 | Elapsed 4:10:21
Epoch: 001/010 | Batch 14526/45709 | Average Loss in last 25 iteration(s): 0.2756 | Elapsed 4:10:46
Epoch: 001/010 | Batch 14551/45709 | Average Loss in last 25 iteration(s): 0.2625 | Elapsed 4:11:10
Epoch: 001/010 | Batch 14576/45709 | Average Loss in last 25 iteration(s): 0.3123 | Elapsed 4:11:36
Epoch: 001/010 | Batch 14601/45709 | Average Loss in last 25 iteration(s): 0.3045 | Elapsed 4:12:04
Epoch: 001/010 | Batch 14626/45709 | Average Loss in last 25 iteration(s): 0.3034 | Elapsed 4:12:30
Epoch: 001/010 | Batch 14651/45709 | Average Loss in last 25 iteration(s): 0.3010 | Elapsed 4:12:56
Epoch: 001/010 | Batch 14676/45709 | Average Loss in last 25 iteration(s): 0.3165 | Elapsed 4:13:24
Epoch: 001/010 | Batch 14701/45709 | Average Loss in last 25 iteration(s): 0.3040 | Elapsed 4:13:48


Epoch: 001/010 | Batch 16526/45709 | Average Loss in last 25 iteration(s): 0.2818 | Elapsed 4:45:01
Epoch: 001/010 | Batch 16551/45709 | Average Loss in last 25 iteration(s): 0.2554 | Elapsed 4:45:28
Epoch: 001/010 | Batch 16576/45709 | Average Loss in last 25 iteration(s): 0.2891 | Elapsed 4:45:55
Epoch: 001/010 | Batch 16601/45709 | Average Loss in last 25 iteration(s): 0.2535 | Elapsed 4:46:23
Epoch: 001/010 | Batch 16626/45709 | Average Loss in last 25 iteration(s): 0.3083 | Elapsed 4:46:48
Epoch: 001/010 | Batch 16651/45709 | Average Loss in last 25 iteration(s): 0.2918 | Elapsed 4:47:10
Epoch: 001/010 | Batch 16676/45709 | Average Loss in last 25 iteration(s): 0.2395 | Elapsed 4:47:36
Epoch: 001/010 | Batch 16701/45709 | Average Loss in last 25 iteration(s): 0.2913 | Elapsed 4:48:03
Epoch: 001/010 | Batch 16726/45709 | Average Loss in last 25 iteration(s): 0.2773 | Elapsed 4:48:32
Epoch: 001/010 | Batch 16751/45709 | Average Loss in last 25 iteration(s): 0.3022 | Elapsed 4:49:02


Epoch: 001/010 | Batch 18576/45709 | Average Loss in last 25 iteration(s): 0.2636 | Elapsed 5:20:53
Epoch: 001/010 | Batch 18601/45709 | Average Loss in last 25 iteration(s): 0.2450 | Elapsed 5:21:21
Epoch: 001/010 | Batch 18626/45709 | Average Loss in last 25 iteration(s): 0.2442 | Elapsed 5:21:45
Epoch: 001/010 | Batch 18651/45709 | Average Loss in last 25 iteration(s): 0.2520 | Elapsed 5:22:05
Epoch: 001/010 | Batch 18676/45709 | Average Loss in last 25 iteration(s): 0.2563 | Elapsed 5:22:28
Epoch: 001/010 | Batch 18701/45709 | Average Loss in last 25 iteration(s): 0.2447 | Elapsed 5:22:58
Epoch: 001/010 | Batch 18726/45709 | Average Loss in last 25 iteration(s): 0.2589 | Elapsed 5:23:25
Epoch: 001/010 | Batch 18751/45709 | Average Loss in last 25 iteration(s): 0.3043 | Elapsed 5:23:52
Epoch: 001/010 | Batch 18776/45709 | Average Loss in last 25 iteration(s): 0.2601 | Elapsed 5:24:18
Epoch: 001/010 | Batch 18801/45709 | Average Loss in last 25 iteration(s): 0.2661 | Elapsed 5:24:43


Epoch: 001/010 | Batch 20626/45709 | Average Loss in last 25 iteration(s): 0.2181 | Elapsed 5:55:56
Epoch: 001/010 | Batch 20651/45709 | Average Loss in last 25 iteration(s): 0.2307 | Elapsed 5:56:23
Epoch: 001/010 | Batch 20676/45709 | Average Loss in last 25 iteration(s): 0.2608 | Elapsed 5:56:44
Epoch: 001/010 | Batch 20701/45709 | Average Loss in last 25 iteration(s): 0.2662 | Elapsed 5:57:09
Epoch: 001/010 | Batch 20726/45709 | Average Loss in last 25 iteration(s): 0.2423 | Elapsed 5:57:40
Epoch: 001/010 | Batch 20751/45709 | Average Loss in last 25 iteration(s): 0.2348 | Elapsed 5:58:03
Epoch: 001/010 | Batch 20776/45709 | Average Loss in last 25 iteration(s): 0.2029 | Elapsed 5:58:33
Epoch: 001/010 | Batch 20801/45709 | Average Loss in last 25 iteration(s): 0.3016 | Elapsed 5:59:01
Epoch: 001/010 | Batch 20826/45709 | Average Loss in last 25 iteration(s): 0.2284 | Elapsed 5:59:24
Epoch: 001/010 | Batch 20851/45709 | Average Loss in last 25 iteration(s): 0.2738 | Elapsed 5:59:48


Epoch: 001/010 | Batch 22676/45709 | Average Loss in last 25 iteration(s): 0.2138 | Elapsed 6:31:15
Epoch: 001/010 | Batch 22701/45709 | Average Loss in last 25 iteration(s): 0.2136 | Elapsed 6:31:41
Epoch: 001/010 | Batch 22726/45709 | Average Loss in last 25 iteration(s): 0.2141 | Elapsed 6:32:07
Epoch: 001/010 | Batch 22751/45709 | Average Loss in last 25 iteration(s): 0.2501 | Elapsed 6:32:32
Epoch: 001/010 | Batch 22776/45709 | Average Loss in last 25 iteration(s): 0.2275 | Elapsed 6:32:54
Epoch: 001/010 | Batch 22801/45709 | Average Loss in last 25 iteration(s): 0.2434 | Elapsed 6:33:22
Epoch: 001/010 | Batch 22826/45709 | Average Loss in last 25 iteration(s): 0.2092 | Elapsed 6:33:51
Epoch: 001/010 | Batch 22851/45709 | Average Loss in last 25 iteration(s): 0.2052 | Elapsed 6:34:17
Epoch: 001/010 | Batch 22876/45709 | Average Loss in last 25 iteration(s): 0.2610 | Elapsed 6:34:43
Epoch: 001/010 | Batch 22901/45709 | Average Loss in last 25 iteration(s): 0.2061 | Elapsed 6:35:10


Epoch: 001/010 | Batch 24726/45709 | Average Loss in last 25 iteration(s): 0.2185 | Elapsed 7:06:45
Epoch: 001/010 | Batch 24751/45709 | Average Loss in last 25 iteration(s): 0.2244 | Elapsed 7:07:11
Epoch: 001/010 | Batch 24776/45709 | Average Loss in last 25 iteration(s): 0.2217 | Elapsed 7:07:38
Epoch: 001/010 | Batch 24801/45709 | Average Loss in last 25 iteration(s): 0.2005 | Elapsed 7:08:00
Epoch: 001/010 | Batch 24826/45709 | Average Loss in last 25 iteration(s): 0.2337 | Elapsed 7:08:29
Epoch: 001/010 | Batch 24851/45709 | Average Loss in last 25 iteration(s): 0.2252 | Elapsed 7:09:00
Epoch: 001/010 | Batch 24876/45709 | Average Loss in last 25 iteration(s): 0.2544 | Elapsed 7:09:27
Epoch: 001/010 | Batch 24901/45709 | Average Loss in last 25 iteration(s): 0.2257 | Elapsed 7:09:52
Epoch: 001/010 | Batch 24926/45709 | Average Loss in last 25 iteration(s): 0.1991 | Elapsed 7:10:16
Epoch: 001/010 | Batch 24951/45709 | Average Loss in last 25 iteration(s): 0.1917 | Elapsed 7:10:41


Epoch: 001/010 | Batch 26776/45709 | Average Loss in last 25 iteration(s): 0.1822 | Elapsed 7:41:41
Epoch: 001/010 | Batch 26801/45709 | Average Loss in last 25 iteration(s): 0.1809 | Elapsed 7:42:07
Epoch: 001/010 | Batch 26826/45709 | Average Loss in last 25 iteration(s): 0.2317 | Elapsed 7:42:32
Epoch: 001/010 | Batch 26851/45709 | Average Loss in last 25 iteration(s): 0.1703 | Elapsed 7:42:57
Epoch: 001/010 | Batch 26876/45709 | Average Loss in last 25 iteration(s): 0.2004 | Elapsed 7:43:20
Epoch: 001/010 | Batch 26901/45709 | Average Loss in last 25 iteration(s): 0.2231 | Elapsed 7:43:48
Epoch: 001/010 | Batch 26926/45709 | Average Loss in last 25 iteration(s): 0.1798 | Elapsed 7:44:15
Epoch: 001/010 | Batch 26951/45709 | Average Loss in last 25 iteration(s): 0.2670 | Elapsed 7:44:41
Epoch: 001/010 | Batch 26976/45709 | Average Loss in last 25 iteration(s): 0.2198 | Elapsed 7:45:07
Epoch: 001/010 | Batch 27001/45709 | Average Loss in last 25 iteration(s): 0.2243 | Elapsed 7:45:33


Epoch: 001/010 | Batch 28826/45709 | Average Loss in last 25 iteration(s): 0.2134 | Elapsed 8:17:00
Epoch: 001/010 | Batch 28851/45709 | Average Loss in last 25 iteration(s): 0.1762 | Elapsed 8:17:25
Epoch: 001/010 | Batch 28876/45709 | Average Loss in last 25 iteration(s): 0.1824 | Elapsed 8:17:51
Epoch: 001/010 | Batch 28901/45709 | Average Loss in last 25 iteration(s): 0.2131 | Elapsed 8:18:17
Epoch: 001/010 | Batch 28926/45709 | Average Loss in last 25 iteration(s): 0.1738 | Elapsed 8:18:43
Epoch: 001/010 | Batch 28951/45709 | Average Loss in last 25 iteration(s): 0.1861 | Elapsed 8:19:06
Epoch: 001/010 | Batch 28976/45709 | Average Loss in last 25 iteration(s): 0.1860 | Elapsed 8:19:32
Epoch: 001/010 | Batch 29001/45709 | Average Loss in last 25 iteration(s): 0.2000 | Elapsed 8:20:00
Epoch: 001/010 | Batch 29026/45709 | Average Loss in last 25 iteration(s): 0.2019 | Elapsed 8:20:27
Epoch: 001/010 | Batch 29051/45709 | Average Loss in last 25 iteration(s): 0.1831 | Elapsed 8:20:52


Epoch: 001/010 | Batch 30876/45709 | Average Loss in last 25 iteration(s): 0.1872 | Elapsed 8:53:55
Epoch: 001/010 | Batch 30901/45709 | Average Loss in last 25 iteration(s): 0.1692 | Elapsed 8:54:21
Epoch: 001/010 | Batch 30926/45709 | Average Loss in last 25 iteration(s): 0.1894 | Elapsed 8:54:44
Epoch: 001/010 | Batch 30951/45709 | Average Loss in last 25 iteration(s): 0.1848 | Elapsed 8:55:12
Epoch: 001/010 | Batch 30976/45709 | Average Loss in last 25 iteration(s): 0.1930 | Elapsed 8:55:40
Epoch: 001/010 | Batch 31001/45709 | Average Loss in last 25 iteration(s): 0.1294 | Elapsed 8:56:03
Epoch: 001/010 | Batch 31026/45709 | Average Loss in last 25 iteration(s): 0.2091 | Elapsed 8:56:30
Epoch: 001/010 | Batch 31051/45709 | Average Loss in last 25 iteration(s): 0.1752 | Elapsed 8:56:55
Epoch: 001/010 | Batch 31076/45709 | Average Loss in last 25 iteration(s): 0.1588 | Elapsed 8:57:19
Epoch: 001/010 | Batch 31101/45709 | Average Loss in last 25 iteration(s): 0.1896 | Elapsed 8:57:45


Epoch: 001/010 | Batch 32926/45709 | Average Loss in last 25 iteration(s): 0.2085 | Elapsed 9:29:08
Epoch: 001/010 | Batch 32951/45709 | Average Loss in last 25 iteration(s): 0.1798 | Elapsed 9:29:33
Epoch: 001/010 | Batch 32976/45709 | Average Loss in last 25 iteration(s): 0.1864 | Elapsed 9:29:59
Epoch: 001/010 | Batch 33001/45709 | Average Loss in last 25 iteration(s): 0.2097 | Elapsed 9:30:24
Epoch: 001/010 | Batch 33026/45709 | Average Loss in last 25 iteration(s): 0.1785 | Elapsed 9:30:47
Epoch: 001/010 | Batch 33051/45709 | Average Loss in last 25 iteration(s): 0.1792 | Elapsed 9:31:16
Epoch: 001/010 | Batch 33076/45709 | Average Loss in last 25 iteration(s): 0.1697 | Elapsed 9:31:43
Epoch: 001/010 | Batch 33101/45709 | Average Loss in last 25 iteration(s): 0.2271 | Elapsed 9:32:09
Epoch: 001/010 | Batch 33126/45709 | Average Loss in last 25 iteration(s): 0.1921 | Elapsed 9:32:35
Epoch: 001/010 | Batch 33151/45709 | Average Loss in last 25 iteration(s): 0.1935 | Elapsed 9:33:00


Epoch: 001/010 | Batch 34976/45709 | Average Loss in last 25 iteration(s): 0.1577 | Elapsed 10:04:37
Epoch: 001/010 | Batch 35001/45709 | Average Loss in last 25 iteration(s): 0.1499 | Elapsed 10:05:01
Epoch: 001/010 | Batch 35026/45709 | Average Loss in last 25 iteration(s): 0.1765 | Elapsed 10:05:30
Epoch: 001/010 | Batch 35051/45709 | Average Loss in last 25 iteration(s): 0.1524 | Elapsed 10:05:58
Epoch: 001/010 | Batch 35076/45709 | Average Loss in last 25 iteration(s): 0.1895 | Elapsed 10:06:25
Epoch: 001/010 | Batch 35101/45709 | Average Loss in last 25 iteration(s): 0.1533 | Elapsed 10:06:49
Epoch: 001/010 | Batch 35126/45709 | Average Loss in last 25 iteration(s): 0.1610 | Elapsed 10:07:14
Epoch: 001/010 | Batch 35151/45709 | Average Loss in last 25 iteration(s): 0.2077 | Elapsed 10:07:35
Epoch: 001/010 | Batch 35176/45709 | Average Loss in last 25 iteration(s): 0.1631 | Elapsed 10:07:57
Epoch: 001/010 | Batch 35201/45709 | Average Loss in last 25 iteration(s): 0.1758 | Elapsed

Epoch: 001/010 | Batch 37026/45709 | Average Loss in last 25 iteration(s): 0.1518 | Elapsed 10:39:51
Epoch: 001/010 | Batch 37051/45709 | Average Loss in last 25 iteration(s): 0.1934 | Elapsed 10:40:17
Epoch: 001/010 | Batch 37076/45709 | Average Loss in last 25 iteration(s): 0.1723 | Elapsed 10:40:43
Epoch: 001/010 | Batch 37101/45709 | Average Loss in last 25 iteration(s): 0.1602 | Elapsed 10:41:06
Epoch: 001/010 | Batch 37126/45709 | Average Loss in last 25 iteration(s): 0.1793 | Elapsed 10:41:35
Epoch: 001/010 | Batch 37151/45709 | Average Loss in last 25 iteration(s): 0.1473 | Elapsed 10:42:02
Epoch: 001/010 | Batch 37176/45709 | Average Loss in last 25 iteration(s): 0.1674 | Elapsed 10:42:28
Epoch: 001/010 | Batch 37201/45709 | Average Loss in last 25 iteration(s): 0.1802 | Elapsed 10:42:53
Epoch: 001/010 | Batch 37226/45709 | Average Loss in last 25 iteration(s): 0.1576 | Elapsed 10:43:19
Epoch: 001/010 | Batch 37251/45709 | Average Loss in last 25 iteration(s): 0.1437 | Elapsed

Epoch: 001/010 | Batch 39076/45709 | Average Loss in last 25 iteration(s): 0.1578 | Elapsed 11:15:10
Epoch: 001/010 | Batch 39101/45709 | Average Loss in last 25 iteration(s): 0.1601 | Elapsed 11:15:37
Epoch: 001/010 | Batch 39126/45709 | Average Loss in last 25 iteration(s): 0.2119 | Elapsed 11:16:03
Epoch: 001/010 | Batch 39151/45709 | Average Loss in last 25 iteration(s): 0.1809 | Elapsed 11:16:29
Epoch: 001/010 | Batch 39176/45709 | Average Loss in last 25 iteration(s): 0.1564 | Elapsed 11:16:52
Epoch: 001/010 | Batch 39201/45709 | Average Loss in last 25 iteration(s): 0.1608 | Elapsed 11:17:19
Epoch: 001/010 | Batch 39226/45709 | Average Loss in last 25 iteration(s): 0.1146 | Elapsed 11:17:48
Epoch: 001/010 | Batch 39251/45709 | Average Loss in last 25 iteration(s): 0.1751 | Elapsed 11:18:15
Epoch: 001/010 | Batch 39276/45709 | Average Loss in last 25 iteration(s): 0.1460 | Elapsed 11:18:41
Epoch: 001/010 | Batch 39301/45709 | Average Loss in last 25 iteration(s): 0.1780 | Elapsed

Epoch: 001/010 | Batch 41126/45709 | Average Loss in last 25 iteration(s): 0.1198 | Elapsed 11:50:20
Epoch: 001/010 | Batch 41151/45709 | Average Loss in last 25 iteration(s): 0.1265 | Elapsed 11:50:48
Epoch: 001/010 | Batch 41176/45709 | Average Loss in last 25 iteration(s): 0.1362 | Elapsed 11:51:15
Epoch: 001/010 | Batch 41201/45709 | Average Loss in last 25 iteration(s): 0.1732 | Elapsed 11:51:42
Epoch: 001/010 | Batch 41226/45709 | Average Loss in last 25 iteration(s): 0.1510 | Elapsed 11:52:08
Epoch: 001/010 | Batch 41251/45709 | Average Loss in last 25 iteration(s): 0.1357 | Elapsed 11:52:33
Epoch: 001/010 | Batch 41276/45709 | Average Loss in last 25 iteration(s): 0.1660 | Elapsed 11:52:55
Epoch: 001/010 | Batch 41301/45709 | Average Loss in last 25 iteration(s): 0.1442 | Elapsed 11:53:22
Epoch: 001/010 | Batch 41326/45709 | Average Loss in last 25 iteration(s): 0.1808 | Elapsed 11:53:50
Epoch: 001/010 | Batch 41351/45709 | Average Loss in last 25 iteration(s): 0.1464 | Elapsed

Epoch: 001/010 | Batch 43176/45709 | Average Loss in last 25 iteration(s): 0.1845 | Elapsed 12:25:41
Epoch: 001/010 | Batch 43201/45709 | Average Loss in last 25 iteration(s): 0.1341 | Elapsed 12:26:04
Epoch: 001/010 | Batch 43226/45709 | Average Loss in last 25 iteration(s): 0.1497 | Elapsed 12:26:31
Epoch: 001/010 | Batch 43251/45709 | Average Loss in last 25 iteration(s): 0.1278 | Elapsed 12:26:58
Epoch: 001/010 | Batch 43276/45709 | Average Loss in last 25 iteration(s): 0.1123 | Elapsed 12:27:23
Epoch: 001/010 | Batch 43301/45709 | Average Loss in last 25 iteration(s): 0.1663 | Elapsed 12:27:51
Epoch: 001/010 | Batch 43326/45709 | Average Loss in last 25 iteration(s): 0.1418 | Elapsed 12:28:18
Epoch: 001/010 | Batch 43351/45709 | Average Loss in last 25 iteration(s): 0.1258 | Elapsed 12:28:40
Epoch: 001/010 | Batch 43376/45709 | Average Loss in last 25 iteration(s): 0.1228 | Elapsed 12:29:07
Epoch: 001/010 | Batch 43401/45709 | Average Loss in last 25 iteration(s): 0.1132 | Elapsed

Epoch: 001/010 | Batch 45226/45709 | Average Loss in last 25 iteration(s): 0.2094 | Elapsed 13:01:00
Epoch: 001/010 | Batch 45251/45709 | Average Loss in last 25 iteration(s): 0.1379 | Elapsed 13:01:24
Epoch: 001/010 | Batch 45276/45709 | Average Loss in last 25 iteration(s): 0.1610 | Elapsed 13:01:47
Epoch: 001/010 | Batch 45301/45709 | Average Loss in last 25 iteration(s): 0.1300 | Elapsed 13:02:17
Epoch: 001/010 | Batch 45326/45709 | Average Loss in last 25 iteration(s): 0.1226 | Elapsed 13:02:43
Epoch: 001/010 | Batch 45351/45709 | Average Loss in last 25 iteration(s): 0.1409 | Elapsed 13:03:08
Epoch: 001/010 | Batch 45376/45709 | Average Loss in last 25 iteration(s): 0.1450 | Elapsed 13:03:34
Epoch: 001/010 | Batch 45401/45709 | Average Loss in last 25 iteration(s): 0.1551 | Elapsed 13:03:59
Epoch: 001/010 | Batch 45426/45709 | Average Loss in last 25 iteration(s): 0.1548 | Elapsed 13:04:22
Epoch: 001/010 | Batch 45451/45709 | Average Loss in last 25 iteration(s): 0.1353 | Elapsed

0it [00:00, ?it/s]
100%|██████████| 45709/45709 [2:06:31<00:00,  6.03it/s]  



Training Accuracy: 95.91%
Calculating validation metrics...


0it [00:00, ?it/s]
  0%|          | 2/5079 [00:00<07:40, 11.01it/s]

Average validation loss = 0.12423044380968974


100%|██████████| 5079/5079 [14:03<00:00,  6.04it/s]


Average validation accuracy = 94.69849395751953

===== Epoch 2 / 10 =====
Training ...
Epoch: 002/010 | Batch 001/45709 | Average Loss in last 1 iteration(s): 0.0490 | Elapsed 0:00:01
Epoch: 002/010 | Batch 026/45709 | Average Loss in last 25 iteration(s): 0.1369 | Elapsed 0:00:24
Epoch: 002/010 | Batch 051/45709 | Average Loss in last 25 iteration(s): 0.1145 | Elapsed 0:00:50
Epoch: 002/010 | Batch 076/45709 | Average Loss in last 25 iteration(s): 0.0979 | Elapsed 0:01:15
Epoch: 002/010 | Batch 101/45709 | Average Loss in last 25 iteration(s): 0.1035 | Elapsed 0:01:43
Epoch: 002/010 | Batch 126/45709 | Average Loss in last 25 iteration(s): 0.1414 | Elapsed 0:02:10
Epoch: 002/010 | Batch 151/45709 | Average Loss in last 25 iteration(s): 0.1076 | Elapsed 0:02:37
Epoch: 002/010 | Batch 176/45709 | Average Loss in last 25 iteration(s): 0.1016 | Elapsed 0:03:00
Epoch: 002/010 | Batch 201/45709 | Average Loss in last 25 iteration(s): 0.1229 | Elapsed 0:03:25
Epoch: 002/010 | Batch 226/45709

Epoch: 002/010 | Batch 2076/45709 | Average Loss in last 25 iteration(s): 0.1437 | Elapsed 0:36:19
Epoch: 002/010 | Batch 2101/45709 | Average Loss in last 25 iteration(s): 0.1074 | Elapsed 0:36:48
Epoch: 002/010 | Batch 2126/45709 | Average Loss in last 25 iteration(s): 0.1132 | Elapsed 0:37:15
Epoch: 002/010 | Batch 2151/45709 | Average Loss in last 25 iteration(s): 0.1161 | Elapsed 0:37:43
Epoch: 002/010 | Batch 2176/45709 | Average Loss in last 25 iteration(s): 0.1250 | Elapsed 0:38:10
Epoch: 002/010 | Batch 2201/45709 | Average Loss in last 25 iteration(s): 0.0974 | Elapsed 0:38:36
Epoch: 002/010 | Batch 2226/45709 | Average Loss in last 25 iteration(s): 0.1081 | Elapsed 0:38:58
Epoch: 002/010 | Batch 2251/45709 | Average Loss in last 25 iteration(s): 0.0970 | Elapsed 0:39:27
Epoch: 002/010 | Batch 2276/45709 | Average Loss in last 25 iteration(s): 0.1217 | Elapsed 0:39:53
Epoch: 002/010 | Batch 2301/45709 | Average Loss in last 25 iteration(s): 0.0987 | Elapsed 0:40:19
Epoch: 002

Epoch: 002/010 | Batch 4151/45709 | Average Loss in last 25 iteration(s): 0.0993 | Elapsed 1:12:26
Epoch: 002/010 | Batch 4176/45709 | Average Loss in last 25 iteration(s): 0.1477 | Elapsed 1:12:53
Epoch: 002/010 | Batch 4201/45709 | Average Loss in last 25 iteration(s): 0.1145 | Elapsed 1:13:18
Epoch: 002/010 | Batch 4226/45709 | Average Loss in last 25 iteration(s): 0.0778 | Elapsed 1:13:41
Epoch: 002/010 | Batch 4251/45709 | Average Loss in last 25 iteration(s): 0.1068 | Elapsed 1:14:12
Epoch: 002/010 | Batch 4276/45709 | Average Loss in last 25 iteration(s): 0.1319 | Elapsed 1:14:38
Epoch: 002/010 | Batch 4301/45709 | Average Loss in last 25 iteration(s): 0.1323 | Elapsed 1:15:04
Epoch: 002/010 | Batch 4326/45709 | Average Loss in last 25 iteration(s): 0.0922 | Elapsed 1:15:31
Epoch: 002/010 | Batch 4351/45709 | Average Loss in last 25 iteration(s): 0.1092 | Elapsed 1:15:56
Epoch: 002/010 | Batch 4376/45709 | Average Loss in last 25 iteration(s): 0.1178 | Elapsed 1:16:20
Epoch: 002

Epoch: 002/010 | Batch 6226/45709 | Average Loss in last 25 iteration(s): 0.0897 | Elapsed 1:48:51
Epoch: 002/010 | Batch 6251/45709 | Average Loss in last 25 iteration(s): 0.0999 | Elapsed 1:49:17
Epoch: 002/010 | Batch 6276/45709 | Average Loss in last 25 iteration(s): 0.1257 | Elapsed 1:49:40
Epoch: 002/010 | Batch 6301/45709 | Average Loss in last 25 iteration(s): 0.1096 | Elapsed 1:50:06
Epoch: 002/010 | Batch 6326/45709 | Average Loss in last 25 iteration(s): 0.1241 | Elapsed 1:50:34
Epoch: 002/010 | Batch 6351/45709 | Average Loss in last 25 iteration(s): 0.1221 | Elapsed 1:51:03
Epoch: 002/010 | Batch 6376/45709 | Average Loss in last 25 iteration(s): 0.1158 | Elapsed 1:51:29
Epoch: 002/010 | Batch 6401/45709 | Average Loss in last 25 iteration(s): 0.1201 | Elapsed 1:51:55
Epoch: 002/010 | Batch 6426/45709 | Average Loss in last 25 iteration(s): 0.1282 | Elapsed 1:52:19
Epoch: 002/010 | Batch 6451/45709 | Average Loss in last 25 iteration(s): 0.1584 | Elapsed 1:52:48
Epoch: 002

Epoch: 002/010 | Batch 8301/45709 | Average Loss in last 25 iteration(s): 0.1169 | Elapsed 2:25:25
Epoch: 002/010 | Batch 8326/45709 | Average Loss in last 25 iteration(s): 0.1230 | Elapsed 2:25:53
Epoch: 002/010 | Batch 8351/45709 | Average Loss in last 25 iteration(s): 0.0888 | Elapsed 2:26:19
Epoch: 002/010 | Batch 8376/45709 | Average Loss in last 25 iteration(s): 0.0848 | Elapsed 2:26:46
Epoch: 002/010 | Batch 8401/45709 | Average Loss in last 25 iteration(s): 0.0937 | Elapsed 2:27:13
Epoch: 002/010 | Batch 8426/45709 | Average Loss in last 25 iteration(s): 0.1168 | Elapsed 2:27:38
Epoch: 002/010 | Batch 8451/45709 | Average Loss in last 25 iteration(s): 0.0835 | Elapsed 2:28:04
Epoch: 002/010 | Batch 8476/45709 | Average Loss in last 25 iteration(s): 0.1242 | Elapsed 2:28:32
Epoch: 002/010 | Batch 8501/45709 | Average Loss in last 25 iteration(s): 0.1008 | Elapsed 2:28:58
Epoch: 002/010 | Batch 8526/45709 | Average Loss in last 25 iteration(s): 0.1025 | Elapsed 2:29:25
Epoch: 002

Epoch: 002/010 | Batch 10376/45709 | Average Loss in last 25 iteration(s): 0.0909 | Elapsed 3:01:58
Epoch: 002/010 | Batch 10401/45709 | Average Loss in last 25 iteration(s): 0.1307 | Elapsed 3:02:23
Epoch: 002/010 | Batch 10426/45709 | Average Loss in last 25 iteration(s): 0.1010 | Elapsed 3:02:47
Epoch: 002/010 | Batch 10451/45709 | Average Loss in last 25 iteration(s): 0.0883 | Elapsed 3:03:11
Epoch: 002/010 | Batch 10476/45709 | Average Loss in last 25 iteration(s): 0.1009 | Elapsed 3:03:39
Epoch: 002/010 | Batch 10501/45709 | Average Loss in last 25 iteration(s): 0.0799 | Elapsed 3:04:07
Epoch: 002/010 | Batch 10526/45709 | Average Loss in last 25 iteration(s): 0.1116 | Elapsed 3:04:31
Epoch: 002/010 | Batch 10551/45709 | Average Loss in last 25 iteration(s): 0.1021 | Elapsed 3:04:59
Epoch: 002/010 | Batch 10576/45709 | Average Loss in last 25 iteration(s): 0.0945 | Elapsed 3:05:26
Epoch: 002/010 | Batch 10601/45709 | Average Loss in last 25 iteration(s): 0.1641 | Elapsed 3:05:49


Epoch: 002/010 | Batch 12426/45709 | Average Loss in last 25 iteration(s): 0.1281 | Elapsed 3:37:53
Epoch: 002/010 | Batch 12451/45709 | Average Loss in last 25 iteration(s): 0.0965 | Elapsed 3:38:20
Epoch: 002/010 | Batch 12476/45709 | Average Loss in last 25 iteration(s): 0.1086 | Elapsed 3:38:45
Epoch: 002/010 | Batch 12501/45709 | Average Loss in last 25 iteration(s): 0.0850 | Elapsed 3:39:08
Epoch: 002/010 | Batch 12526/45709 | Average Loss in last 25 iteration(s): 0.0910 | Elapsed 3:39:35
Epoch: 002/010 | Batch 12551/45709 | Average Loss in last 25 iteration(s): 0.0865 | Elapsed 3:40:04
Epoch: 002/010 | Batch 12576/45709 | Average Loss in last 25 iteration(s): 0.0725 | Elapsed 3:40:31
Epoch: 002/010 | Batch 12601/45709 | Average Loss in last 25 iteration(s): 0.0779 | Elapsed 3:40:59
Epoch: 002/010 | Batch 12626/45709 | Average Loss in last 25 iteration(s): 0.1359 | Elapsed 3:41:26
Epoch: 002/010 | Batch 12651/45709 | Average Loss in last 25 iteration(s): 0.1127 | Elapsed 3:41:48


Epoch: 002/010 | Batch 14476/45709 | Average Loss in last 25 iteration(s): 0.1216 | Elapsed 4:13:52
Epoch: 002/010 | Batch 14501/45709 | Average Loss in last 25 iteration(s): 0.0952 | Elapsed 4:14:15
Epoch: 002/010 | Batch 14526/45709 | Average Loss in last 25 iteration(s): 0.1100 | Elapsed 4:14:41
Epoch: 002/010 | Batch 14551/45709 | Average Loss in last 25 iteration(s): 0.1422 | Elapsed 4:15:10
Epoch: 002/010 | Batch 14576/45709 | Average Loss in last 25 iteration(s): 0.1156 | Elapsed 4:15:36
Epoch: 002/010 | Batch 14601/45709 | Average Loss in last 25 iteration(s): 0.0921 | Elapsed 4:16:02
Epoch: 002/010 | Batch 14626/45709 | Average Loss in last 25 iteration(s): 0.1191 | Elapsed 4:16:27
Epoch: 002/010 | Batch 14651/45709 | Average Loss in last 25 iteration(s): 0.0782 | Elapsed 4:16:53
Epoch: 002/010 | Batch 14676/45709 | Average Loss in last 25 iteration(s): 0.1282 | Elapsed 4:17:16
Epoch: 002/010 | Batch 14701/45709 | Average Loss in last 25 iteration(s): 0.1158 | Elapsed 4:17:46


Epoch: 002/010 | Batch 16526/45709 | Average Loss in last 25 iteration(s): 0.1028 | Elapsed 4:50:04
Epoch: 002/010 | Batch 16551/45709 | Average Loss in last 25 iteration(s): 0.1112 | Elapsed 4:50:30
Epoch: 002/010 | Batch 16576/45709 | Average Loss in last 25 iteration(s): 0.0938 | Elapsed 4:50:58
Epoch: 002/010 | Batch 16601/45709 | Average Loss in last 25 iteration(s): 0.0944 | Elapsed 4:51:25
Epoch: 002/010 | Batch 16626/45709 | Average Loss in last 25 iteration(s): 0.1151 | Elapsed 4:51:47
Epoch: 002/010 | Batch 16651/45709 | Average Loss in last 25 iteration(s): 0.1211 | Elapsed 4:52:11
Epoch: 002/010 | Batch 16676/45709 | Average Loss in last 25 iteration(s): 0.0953 | Elapsed 4:52:41
Epoch: 002/010 | Batch 16701/45709 | Average Loss in last 25 iteration(s): 0.0925 | Elapsed 4:53:10
Epoch: 002/010 | Batch 16726/45709 | Average Loss in last 25 iteration(s): 0.0739 | Elapsed 4:53:33
Epoch: 002/010 | Batch 16751/45709 | Average Loss in last 25 iteration(s): 0.0911 | Elapsed 4:53:59


Epoch: 002/010 | Batch 18576/45709 | Average Loss in last 25 iteration(s): 0.0940 | Elapsed 5:26:19
Epoch: 002/010 | Batch 18601/45709 | Average Loss in last 25 iteration(s): 0.1103 | Elapsed 5:26:46
Epoch: 002/010 | Batch 18626/45709 | Average Loss in last 25 iteration(s): 0.0637 | Elapsed 5:27:09
Epoch: 002/010 | Batch 18651/45709 | Average Loss in last 25 iteration(s): 0.1441 | Elapsed 5:27:37
Epoch: 002/010 | Batch 18676/45709 | Average Loss in last 25 iteration(s): 0.1225 | Elapsed 5:27:59
Epoch: 002/010 | Batch 18701/45709 | Average Loss in last 25 iteration(s): 0.1115 | Elapsed 5:28:27
Epoch: 002/010 | Batch 18726/45709 | Average Loss in last 25 iteration(s): 0.0909 | Elapsed 5:28:58
Epoch: 002/010 | Batch 18751/45709 | Average Loss in last 25 iteration(s): 0.0836 | Elapsed 5:29:26
Epoch: 002/010 | Batch 18776/45709 | Average Loss in last 25 iteration(s): 0.0944 | Elapsed 5:29:50
Epoch: 002/010 | Batch 18801/45709 | Average Loss in last 25 iteration(s): 0.0869 | Elapsed 5:30:17


Epoch: 002/010 | Batch 20626/45709 | Average Loss in last 25 iteration(s): 0.1007 | Elapsed 6:02:40
Epoch: 002/010 | Batch 20651/45709 | Average Loss in last 25 iteration(s): 0.0937 | Elapsed 6:03:11
Epoch: 002/010 | Batch 20676/45709 | Average Loss in last 25 iteration(s): 0.1491 | Elapsed 6:03:38
Epoch: 002/010 | Batch 20701/45709 | Average Loss in last 25 iteration(s): 0.0663 | Elapsed 6:04:04
Epoch: 002/010 | Batch 20726/45709 | Average Loss in last 25 iteration(s): 0.1011 | Elapsed 6:04:27
Epoch: 002/010 | Batch 20751/45709 | Average Loss in last 25 iteration(s): 0.1180 | Elapsed 6:04:53
Epoch: 002/010 | Batch 20776/45709 | Average Loss in last 25 iteration(s): 0.0757 | Elapsed 6:05:25
Epoch: 002/010 | Batch 20801/45709 | Average Loss in last 25 iteration(s): 0.1226 | Elapsed 6:05:54
Epoch: 002/010 | Batch 20826/45709 | Average Loss in last 25 iteration(s): 0.1011 | Elapsed 6:06:22
Epoch: 002/010 | Batch 20851/45709 | Average Loss in last 25 iteration(s): 0.1197 | Elapsed 6:06:48


Epoch: 002/010 | Batch 22676/45709 | Average Loss in last 25 iteration(s): 0.1101 | Elapsed 6:38:49
Epoch: 002/010 | Batch 22701/45709 | Average Loss in last 25 iteration(s): 0.1035 | Elapsed 6:39:16
Epoch: 002/010 | Batch 22726/45709 | Average Loss in last 25 iteration(s): 0.1122 | Elapsed 6:39:43
Epoch: 002/010 | Batch 22751/45709 | Average Loss in last 25 iteration(s): 0.1109 | Elapsed 6:40:09
Epoch: 002/010 | Batch 22776/45709 | Average Loss in last 25 iteration(s): 0.1027 | Elapsed 6:40:34
Epoch: 002/010 | Batch 22801/45709 | Average Loss in last 25 iteration(s): 0.0919 | Elapsed 6:41:01
Epoch: 002/010 | Batch 22826/45709 | Average Loss in last 25 iteration(s): 0.1369 | Elapsed 6:41:28
Epoch: 002/010 | Batch 22851/45709 | Average Loss in last 25 iteration(s): 0.1244 | Elapsed 6:41:55
Epoch: 002/010 | Batch 22876/45709 | Average Loss in last 25 iteration(s): 0.0709 | Elapsed 6:42:23
Epoch: 002/010 | Batch 22901/45709 | Average Loss in last 25 iteration(s): 0.0800 | Elapsed 6:42:47


Epoch: 002/010 | Batch 24726/45709 | Average Loss in last 25 iteration(s): 0.1047 | Elapsed 7:14:53
Epoch: 002/010 | Batch 24751/45709 | Average Loss in last 25 iteration(s): 0.1128 | Elapsed 7:15:20
Epoch: 002/010 | Batch 24776/45709 | Average Loss in last 25 iteration(s): 0.1146 | Elapsed 7:15:45
Epoch: 002/010 | Batch 24801/45709 | Average Loss in last 25 iteration(s): 0.1242 | Elapsed 7:16:09
Epoch: 002/010 | Batch 24826/45709 | Average Loss in last 25 iteration(s): 0.1095 | Elapsed 7:16:36
Epoch: 002/010 | Batch 24851/45709 | Average Loss in last 25 iteration(s): 0.0831 | Elapsed 7:17:05
Epoch: 002/010 | Batch 24876/45709 | Average Loss in last 25 iteration(s): 0.1219 | Elapsed 7:17:32
Epoch: 002/010 | Batch 24901/45709 | Average Loss in last 25 iteration(s): 0.1005 | Elapsed 7:17:58
Epoch: 002/010 | Batch 24926/45709 | Average Loss in last 25 iteration(s): 0.0881 | Elapsed 7:18:24
Epoch: 002/010 | Batch 24951/45709 | Average Loss in last 25 iteration(s): 0.0839 | Elapsed 7:18:47


Epoch: 002/010 | Batch 26776/45709 | Average Loss in last 25 iteration(s): 0.0846 | Elapsed 7:50:55
Epoch: 002/010 | Batch 26801/45709 | Average Loss in last 25 iteration(s): 0.1089 | Elapsed 7:51:20
Epoch: 002/010 | Batch 26826/45709 | Average Loss in last 25 iteration(s): 0.0945 | Elapsed 7:51:43
Epoch: 002/010 | Batch 26851/45709 | Average Loss in last 25 iteration(s): 0.1072 | Elapsed 7:52:09
Epoch: 002/010 | Batch 26876/45709 | Average Loss in last 25 iteration(s): 0.1086 | Elapsed 7:52:40
Epoch: 002/010 | Batch 26901/45709 | Average Loss in last 25 iteration(s): 0.1297 | Elapsed 7:53:07
Epoch: 002/010 | Batch 26926/45709 | Average Loss in last 25 iteration(s): 0.0766 | Elapsed 7:53:34
Epoch: 002/010 | Batch 26951/45709 | Average Loss in last 25 iteration(s): 0.0833 | Elapsed 7:54:00
Epoch: 002/010 | Batch 26976/45709 | Average Loss in last 25 iteration(s): 0.1395 | Elapsed 7:54:25
Epoch: 002/010 | Batch 27001/45709 | Average Loss in last 25 iteration(s): 0.0847 | Elapsed 7:54:53


Epoch: 002/010 | Batch 28826/45709 | Average Loss in last 25 iteration(s): 0.1083 | Elapsed 8:27:09
Epoch: 002/010 | Batch 28851/45709 | Average Loss in last 25 iteration(s): 0.0994 | Elapsed 8:27:36
Epoch: 002/010 | Batch 28876/45709 | Average Loss in last 25 iteration(s): 0.1077 | Elapsed 8:28:03
Epoch: 002/010 | Batch 28901/45709 | Average Loss in last 25 iteration(s): 0.1024 | Elapsed 8:28:31
Epoch: 002/010 | Batch 28926/45709 | Average Loss in last 25 iteration(s): 0.1364 | Elapsed 8:28:57
Epoch: 002/010 | Batch 28951/45709 | Average Loss in last 25 iteration(s): 0.0896 | Elapsed 8:29:21
Epoch: 002/010 | Batch 28976/45709 | Average Loss in last 25 iteration(s): 0.0744 | Elapsed 8:29:46
Epoch: 002/010 | Batch 29001/45709 | Average Loss in last 25 iteration(s): 0.0764 | Elapsed 8:30:13
Epoch: 002/010 | Batch 29026/45709 | Average Loss in last 25 iteration(s): 0.0986 | Elapsed 8:30:39
Epoch: 002/010 | Batch 29051/45709 | Average Loss in last 25 iteration(s): 0.0948 | Elapsed 8:31:07


Epoch: 002/010 | Batch 30876/45709 | Average Loss in last 25 iteration(s): 0.1040 | Elapsed 9:03:04
Epoch: 002/010 | Batch 30901/45709 | Average Loss in last 25 iteration(s): 0.0887 | Elapsed 9:03:34
Epoch: 002/010 | Batch 30926/45709 | Average Loss in last 25 iteration(s): 0.0730 | Elapsed 9:04:02
Epoch: 002/010 | Batch 30951/45709 | Average Loss in last 25 iteration(s): 0.1146 | Elapsed 9:04:28
Epoch: 002/010 | Batch 30976/45709 | Average Loss in last 25 iteration(s): 0.0720 | Elapsed 9:04:54
Epoch: 002/010 | Batch 31001/45709 | Average Loss in last 25 iteration(s): 0.1075 | Elapsed 9:05:19
Epoch: 002/010 | Batch 31026/45709 | Average Loss in last 25 iteration(s): 0.1326 | Elapsed 9:05:43
Epoch: 002/010 | Batch 31051/45709 | Average Loss in last 25 iteration(s): 0.0753 | Elapsed 9:06:11
Epoch: 002/010 | Batch 31076/45709 | Average Loss in last 25 iteration(s): 0.1060 | Elapsed 9:06:40
Epoch: 002/010 | Batch 31101/45709 | Average Loss in last 25 iteration(s): 0.1127 | Elapsed 9:07:06


Epoch: 002/010 | Batch 32926/45709 | Average Loss in last 25 iteration(s): 0.0823 | Elapsed 9:39:18
Epoch: 002/010 | Batch 32951/45709 | Average Loss in last 25 iteration(s): 0.0917 | Elapsed 9:39:44
Epoch: 002/010 | Batch 32976/45709 | Average Loss in last 25 iteration(s): 0.1088 | Elapsed 9:40:10
Epoch: 002/010 | Batch 33001/45709 | Average Loss in last 25 iteration(s): 0.0870 | Elapsed 9:40:35
Epoch: 002/010 | Batch 33026/45709 | Average Loss in last 25 iteration(s): 0.0870 | Elapsed 9:41:00
Epoch: 002/010 | Batch 33051/45709 | Average Loss in last 25 iteration(s): 0.1144 | Elapsed 9:41:28
Epoch: 002/010 | Batch 33076/45709 | Average Loss in last 25 iteration(s): 0.0786 | Elapsed 9:41:55
Epoch: 002/010 | Batch 33101/45709 | Average Loss in last 25 iteration(s): 0.0582 | Elapsed 9:42:22
Epoch: 002/010 | Batch 33126/45709 | Average Loss in last 25 iteration(s): 0.0913 | Elapsed 9:42:47
Epoch: 002/010 | Batch 33151/45709 | Average Loss in last 25 iteration(s): 0.0962 | Elapsed 9:43:14


Epoch: 002/010 | Batch 34976/45709 | Average Loss in last 25 iteration(s): 0.0910 | Elapsed 10:15:20
Epoch: 002/010 | Batch 35001/45709 | Average Loss in last 25 iteration(s): 0.0940 | Elapsed 10:15:47
Epoch: 002/010 | Batch 35026/45709 | Average Loss in last 25 iteration(s): 0.0704 | Elapsed 10:16:12
Epoch: 002/010 | Batch 35051/45709 | Average Loss in last 25 iteration(s): 0.1228 | Elapsed 10:16:37
Epoch: 002/010 | Batch 35076/45709 | Average Loss in last 25 iteration(s): 0.0656 | Elapsed 10:17:04
Epoch: 002/010 | Batch 35101/45709 | Average Loss in last 25 iteration(s): 0.1087 | Elapsed 10:17:32
Epoch: 002/010 | Batch 35126/45709 | Average Loss in last 25 iteration(s): 0.0626 | Elapsed 10:17:58
Epoch: 002/010 | Batch 35151/45709 | Average Loss in last 25 iteration(s): 0.0611 | Elapsed 10:18:25
Epoch: 002/010 | Batch 35176/45709 | Average Loss in last 25 iteration(s): 0.1079 | Elapsed 10:18:50
Epoch: 002/010 | Batch 35201/45709 | Average Loss in last 25 iteration(s): 0.0832 | Elapsed

Epoch: 002/010 | Batch 37026/45709 | Average Loss in last 25 iteration(s): 0.0579 | Elapsed 10:51:35
Epoch: 002/010 | Batch 37051/45709 | Average Loss in last 25 iteration(s): 0.1027 | Elapsed 10:51:59
Epoch: 002/010 | Batch 37076/45709 | Average Loss in last 25 iteration(s): 0.1193 | Elapsed 10:52:28
Epoch: 002/010 | Batch 37101/45709 | Average Loss in last 25 iteration(s): 0.1052 | Elapsed 10:52:54
Epoch: 002/010 | Batch 37126/45709 | Average Loss in last 25 iteration(s): 0.0833 | Elapsed 10:53:20
Epoch: 002/010 | Batch 37151/45709 | Average Loss in last 25 iteration(s): 0.1325 | Elapsed 10:53:48
Epoch: 002/010 | Batch 37176/45709 | Average Loss in last 25 iteration(s): 0.0821 | Elapsed 10:54:15
Epoch: 002/010 | Batch 37201/45709 | Average Loss in last 25 iteration(s): 0.0848 | Elapsed 10:54:38
Epoch: 002/010 | Batch 37226/45709 | Average Loss in last 25 iteration(s): 0.0811 | Elapsed 10:55:06
Epoch: 002/010 | Batch 37251/45709 | Average Loss in last 25 iteration(s): 0.1224 | Elapsed

Epoch: 002/010 | Batch 39076/45709 | Average Loss in last 25 iteration(s): 0.0978 | Elapsed 11:27:29
Epoch: 002/010 | Batch 39101/45709 | Average Loss in last 25 iteration(s): 0.0785 | Elapsed 11:27:53
Epoch: 002/010 | Batch 39126/45709 | Average Loss in last 25 iteration(s): 0.0619 | Elapsed 11:28:18
Epoch: 002/010 | Batch 39151/45709 | Average Loss in last 25 iteration(s): 0.0662 | Elapsed 11:28:48
Epoch: 002/010 | Batch 39176/45709 | Average Loss in last 25 iteration(s): 0.0849 | Elapsed 11:29:14
Epoch: 002/010 | Batch 39201/45709 | Average Loss in last 25 iteration(s): 0.0903 | Elapsed 11:29:43
Epoch: 002/010 | Batch 39226/45709 | Average Loss in last 25 iteration(s): 0.0939 | Elapsed 11:30:09
Epoch: 002/010 | Batch 39251/45709 | Average Loss in last 25 iteration(s): 0.0899 | Elapsed 11:30:35
Epoch: 002/010 | Batch 39276/45709 | Average Loss in last 25 iteration(s): 0.0687 | Elapsed 11:30:58
Epoch: 002/010 | Batch 39301/45709 | Average Loss in last 25 iteration(s): 0.0971 | Elapsed

Epoch: 002/010 | Batch 41126/45709 | Average Loss in last 25 iteration(s): 0.0735 | Elapsed 12:03:27
Epoch: 002/010 | Batch 41151/45709 | Average Loss in last 25 iteration(s): 0.0884 | Elapsed 12:03:51
Epoch: 002/010 | Batch 41176/45709 | Average Loss in last 25 iteration(s): 0.0710 | Elapsed 12:04:16
Epoch: 002/010 | Batch 41201/45709 | Average Loss in last 25 iteration(s): 0.0797 | Elapsed 12:04:48
Epoch: 002/010 | Batch 41226/45709 | Average Loss in last 25 iteration(s): 0.0773 | Elapsed 12:05:15
Epoch: 002/010 | Batch 41251/45709 | Average Loss in last 25 iteration(s): 0.0626 | Elapsed 12:05:41
Epoch: 002/010 | Batch 41276/45709 | Average Loss in last 25 iteration(s): 0.0728 | Elapsed 12:06:06
Epoch: 002/010 | Batch 41301/45709 | Average Loss in last 25 iteration(s): 0.0687 | Elapsed 12:06:33
Epoch: 002/010 | Batch 41326/45709 | Average Loss in last 25 iteration(s): 0.0846 | Elapsed 12:06:59
Epoch: 002/010 | Batch 41351/45709 | Average Loss in last 25 iteration(s): 0.1116 | Elapsed

Epoch: 002/010 | Batch 43176/45709 | Average Loss in last 25 iteration(s): 0.0827 | Elapsed 12:39:41
Epoch: 002/010 | Batch 43201/45709 | Average Loss in last 25 iteration(s): 0.0977 | Elapsed 12:40:08
Epoch: 002/010 | Batch 43226/45709 | Average Loss in last 25 iteration(s): 0.0736 | Elapsed 12:40:34
Epoch: 002/010 | Batch 43251/45709 | Average Loss in last 25 iteration(s): 0.1153 | Elapsed 12:41:02
Epoch: 002/010 | Batch 43276/45709 | Average Loss in last 25 iteration(s): 0.1345 | Elapsed 12:41:29
Epoch: 002/010 | Batch 43301/45709 | Average Loss in last 25 iteration(s): 0.0847 | Elapsed 12:41:54
Epoch: 002/010 | Batch 43326/45709 | Average Loss in last 25 iteration(s): 0.0726 | Elapsed 12:42:18
Epoch: 002/010 | Batch 43351/45709 | Average Loss in last 25 iteration(s): 0.1062 | Elapsed 12:42:46
Epoch: 002/010 | Batch 43376/45709 | Average Loss in last 25 iteration(s): 0.0902 | Elapsed 12:43:13
Epoch: 002/010 | Batch 43401/45709 | Average Loss in last 25 iteration(s): 0.0769 | Elapsed

Epoch: 002/010 | Batch 45226/45709 | Average Loss in last 25 iteration(s): 0.0908 | Elapsed 13:15:32
Epoch: 002/010 | Batch 45251/45709 | Average Loss in last 25 iteration(s): 0.1026 | Elapsed 13:15:56
Epoch: 002/010 | Batch 45276/45709 | Average Loss in last 25 iteration(s): 0.0881 | Elapsed 13:16:22
Epoch: 002/010 | Batch 45301/45709 | Average Loss in last 25 iteration(s): 0.0696 | Elapsed 13:16:49
Epoch: 002/010 | Batch 45326/45709 | Average Loss in last 25 iteration(s): 0.0888 | Elapsed 13:17:14
Epoch: 002/010 | Batch 45351/45709 | Average Loss in last 25 iteration(s): 0.0785 | Elapsed 13:17:43
Epoch: 002/010 | Batch 45376/45709 | Average Loss in last 25 iteration(s): 0.0845 | Elapsed 13:18:08
Epoch: 002/010 | Batch 45401/45709 | Average Loss in last 25 iteration(s): 0.0857 | Elapsed 13:18:32
Epoch: 002/010 | Batch 45426/45709 | Average Loss in last 25 iteration(s): 0.0636 | Elapsed 13:18:57
Epoch: 002/010 | Batch 45451/45709 | Average Loss in last 25 iteration(s): 0.0837 | Elapsed

0it [00:00, ?it/s]
100%|██████████| 45709/45709 [2:06:27<00:00,  6.04it/s]  



Training Accuracy: 97.19%
Calculating validation metrics...


0it [00:00, ?it/s]
  0%|          | 2/5079 [00:00<07:41, 11.00it/s]

Average validation loss = 0.08135525368020856


100%|██████████| 5079/5079 [14:03<00:00,  6.03it/s]


Average validation accuracy = 96.27861022949219

===== Epoch 3 / 10 =====
Training ...
Epoch: 003/010 | Batch 001/45709 | Average Loss in last 1 iteration(s): 0.2035 | Elapsed 0:00:01
Epoch: 003/010 | Batch 026/45709 | Average Loss in last 25 iteration(s): 0.0720 | Elapsed 0:00:27
Epoch: 003/010 | Batch 051/45709 | Average Loss in last 25 iteration(s): 0.0936 | Elapsed 0:00:55
Epoch: 003/010 | Batch 076/45709 | Average Loss in last 25 iteration(s): 0.0570 | Elapsed 0:01:21
Epoch: 003/010 | Batch 101/45709 | Average Loss in last 25 iteration(s): 0.0836 | Elapsed 0:01:45
Epoch: 003/010 | Batch 126/45709 | Average Loss in last 25 iteration(s): 0.0631 | Elapsed 0:02:15
Epoch: 003/010 | Batch 151/45709 | Average Loss in last 25 iteration(s): 0.0622 | Elapsed 0:02:41
Epoch: 003/010 | Batch 176/45709 | Average Loss in last 25 iteration(s): 0.0799 | Elapsed 0:03:08
Epoch: 003/010 | Batch 201/45709 | Average Loss in last 25 iteration(s): 0.0712 | Elapsed 0:03:35
Epoch: 003/010 | Batch 226/45709

Epoch: 003/010 | Batch 2076/45709 | Average Loss in last 25 iteration(s): 0.0721 | Elapsed 0:37:13
Epoch: 003/010 | Batch 2101/45709 | Average Loss in last 25 iteration(s): 0.0734 | Elapsed 0:37:42
Epoch: 003/010 | Batch 2126/45709 | Average Loss in last 25 iteration(s): 0.1120 | Elapsed 0:38:08
Epoch: 003/010 | Batch 2151/45709 | Average Loss in last 25 iteration(s): 0.0574 | Elapsed 0:38:36
Epoch: 003/010 | Batch 2176/45709 | Average Loss in last 25 iteration(s): 0.0711 | Elapsed 0:39:00
Epoch: 003/010 | Batch 2201/45709 | Average Loss in last 25 iteration(s): 0.0536 | Elapsed 0:39:28
Epoch: 003/010 | Batch 2226/45709 | Average Loss in last 25 iteration(s): 0.0445 | Elapsed 0:39:52
Epoch: 003/010 | Batch 2251/45709 | Average Loss in last 25 iteration(s): 0.0474 | Elapsed 0:40:20
Epoch: 003/010 | Batch 2276/45709 | Average Loss in last 25 iteration(s): 0.0574 | Elapsed 0:40:46
Epoch: 003/010 | Batch 2301/45709 | Average Loss in last 25 iteration(s): 0.0784 | Elapsed 0:41:14
Epoch: 003

Epoch: 003/010 | Batch 4151/45709 | Average Loss in last 25 iteration(s): 0.0929 | Elapsed 1:13:50
Epoch: 003/010 | Batch 4176/45709 | Average Loss in last 25 iteration(s): 0.0549 | Elapsed 1:14:17
Epoch: 003/010 | Batch 4201/45709 | Average Loss in last 25 iteration(s): 0.0364 | Elapsed 1:14:45
Epoch: 003/010 | Batch 4226/45709 | Average Loss in last 25 iteration(s): 0.0728 | Elapsed 1:15:10
Epoch: 003/010 | Batch 4251/45709 | Average Loss in last 25 iteration(s): 0.0539 | Elapsed 1:15:37
Epoch: 003/010 | Batch 4276/45709 | Average Loss in last 25 iteration(s): 0.1237 | Elapsed 1:16:08
Epoch: 003/010 | Batch 4301/45709 | Average Loss in last 25 iteration(s): 0.0764 | Elapsed 1:16:32
Epoch: 003/010 | Batch 4326/45709 | Average Loss in last 25 iteration(s): 0.0575 | Elapsed 1:17:01
Epoch: 003/010 | Batch 4351/45709 | Average Loss in last 25 iteration(s): 0.1050 | Elapsed 1:17:28
Epoch: 003/010 | Batch 4376/45709 | Average Loss in last 25 iteration(s): 0.0545 | Elapsed 1:17:51
Epoch: 003

Epoch: 003/010 | Batch 6301/45709 | Average Loss in last 25 iteration(s): 0.0560 | Elapsed 1:52:37
Epoch: 003/010 | Batch 6326/45709 | Average Loss in last 25 iteration(s): 0.0628 | Elapsed 1:53:05
Epoch: 003/010 | Batch 6351/45709 | Average Loss in last 25 iteration(s): 0.0595 | Elapsed 1:53:37
Epoch: 003/010 | Batch 6376/45709 | Average Loss in last 25 iteration(s): 0.0680 | Elapsed 1:54:05
Epoch: 003/010 | Batch 6401/45709 | Average Loss in last 25 iteration(s): 0.0876 | Elapsed 1:54:32
Epoch: 003/010 | Batch 6426/45709 | Average Loss in last 25 iteration(s): 0.0963 | Elapsed 1:54:53
Epoch: 003/010 | Batch 6451/45709 | Average Loss in last 25 iteration(s): 0.0955 | Elapsed 1:55:18
Epoch: 003/010 | Batch 6476/45709 | Average Loss in last 25 iteration(s): 0.0558 | Elapsed 1:55:49
Epoch: 003/010 | Batch 6501/45709 | Average Loss in last 25 iteration(s): 0.0839 | Elapsed 1:56:20
Epoch: 003/010 | Batch 6526/45709 | Average Loss in last 25 iteration(s): 0.0727 | Elapsed 1:56:47
Epoch: 003

Epoch: 003/010 | Batch 8376/45709 | Average Loss in last 25 iteration(s): 0.0671 | Elapsed 2:30:07
Epoch: 003/010 | Batch 8401/45709 | Average Loss in last 25 iteration(s): 0.0760 | Elapsed 2:30:31
Epoch: 003/010 | Batch 8426/45709 | Average Loss in last 25 iteration(s): 0.0661 | Elapsed 2:30:58
Epoch: 003/010 | Batch 8451/45709 | Average Loss in last 25 iteration(s): 0.0865 | Elapsed 2:31:27
Epoch: 003/010 | Batch 8476/45709 | Average Loss in last 25 iteration(s): 0.0789 | Elapsed 2:31:54
Epoch: 003/010 | Batch 8501/45709 | Average Loss in last 25 iteration(s): 0.0782 | Elapsed 2:32:22
Epoch: 003/010 | Batch 8526/45709 | Average Loss in last 25 iteration(s): 0.0921 | Elapsed 2:32:49
Epoch: 003/010 | Batch 8551/45709 | Average Loss in last 25 iteration(s): 0.1076 | Elapsed 2:33:12
Epoch: 003/010 | Batch 8576/45709 | Average Loss in last 25 iteration(s): 0.0651 | Elapsed 2:33:39
Epoch: 003/010 | Batch 8601/45709 | Average Loss in last 25 iteration(s): 0.0851 | Elapsed 2:34:06
Epoch: 003

Epoch: 003/010 | Batch 10451/45709 | Average Loss in last 25 iteration(s): 0.0500 | Elapsed 3:07:06
Epoch: 003/010 | Batch 10476/45709 | Average Loss in last 25 iteration(s): 0.0482 | Elapsed 3:07:36
Epoch: 003/010 | Batch 10501/45709 | Average Loss in last 25 iteration(s): 0.0843 | Elapsed 3:08:03
Epoch: 003/010 | Batch 10526/45709 | Average Loss in last 25 iteration(s): 0.0692 | Elapsed 3:08:30
Epoch: 003/010 | Batch 10551/45709 | Average Loss in last 25 iteration(s): 0.0692 | Elapsed 3:08:58
Epoch: 003/010 | Batch 10576/45709 | Average Loss in last 25 iteration(s): 0.0448 | Elapsed 3:09:24
Epoch: 003/010 | Batch 10601/45709 | Average Loss in last 25 iteration(s): 0.0631 | Elapsed 3:09:50
Epoch: 003/010 | Batch 10626/45709 | Average Loss in last 25 iteration(s): 0.0835 | Elapsed 3:10:21
Epoch: 003/010 | Batch 10651/45709 | Average Loss in last 25 iteration(s): 0.0632 | Elapsed 3:10:48
Epoch: 003/010 | Batch 10676/45709 | Average Loss in last 25 iteration(s): 0.0604 | Elapsed 3:11:15


Epoch: 003/010 | Batch 12501/45709 | Average Loss in last 25 iteration(s): 0.0648 | Elapsed 3:44:00
Epoch: 003/010 | Batch 12526/45709 | Average Loss in last 25 iteration(s): 0.0507 | Elapsed 3:44:27
Epoch: 003/010 | Batch 12551/45709 | Average Loss in last 25 iteration(s): 0.0565 | Elapsed 3:44:54
Epoch: 003/010 | Batch 12576/45709 | Average Loss in last 25 iteration(s): 0.1377 | Elapsed 3:45:18
Epoch: 003/010 | Batch 12601/45709 | Average Loss in last 25 iteration(s): 0.0918 | Elapsed 3:45:44
Epoch: 003/010 | Batch 12626/45709 | Average Loss in last 25 iteration(s): 0.0655 | Elapsed 3:46:13
Epoch: 003/010 | Batch 12651/45709 | Average Loss in last 25 iteration(s): 0.1030 | Elapsed 3:46:40
Epoch: 003/010 | Batch 12676/45709 | Average Loss in last 25 iteration(s): 0.1011 | Elapsed 3:47:08
Epoch: 003/010 | Batch 12701/45709 | Average Loss in last 25 iteration(s): 0.0951 | Elapsed 3:47:35
Epoch: 003/010 | Batch 12726/45709 | Average Loss in last 25 iteration(s): 0.0842 | Elapsed 3:48:00


Epoch: 003/010 | Batch 14551/45709 | Average Loss in last 25 iteration(s): 0.0685 | Elapsed 4:21:14
Epoch: 003/010 | Batch 14576/45709 | Average Loss in last 25 iteration(s): 0.0917 | Elapsed 4:21:41
Epoch: 003/010 | Batch 14601/45709 | Average Loss in last 25 iteration(s): 0.0555 | Elapsed 4:22:08
Epoch: 003/010 | Batch 14626/45709 | Average Loss in last 25 iteration(s): 0.0780 | Elapsed 4:22:32
Epoch: 003/010 | Batch 14651/45709 | Average Loss in last 25 iteration(s): 0.0657 | Elapsed 4:23:00
Epoch: 003/010 | Batch 14676/45709 | Average Loss in last 25 iteration(s): 0.0580 | Elapsed 4:23:28
Epoch: 003/010 | Batch 14701/45709 | Average Loss in last 25 iteration(s): 0.0785 | Elapsed 4:23:55
Epoch: 003/010 | Batch 14726/45709 | Average Loss in last 25 iteration(s): 0.0616 | Elapsed 4:24:22
Epoch: 003/010 | Batch 14751/45709 | Average Loss in last 25 iteration(s): 0.0684 | Elapsed 4:24:50
Epoch: 003/010 | Batch 14776/45709 | Average Loss in last 25 iteration(s): 0.0561 | Elapsed 4:25:12


Epoch: 003/010 | Batch 16601/45709 | Average Loss in last 25 iteration(s): 0.0831 | Elapsed 4:58:05
Epoch: 003/010 | Batch 16626/45709 | Average Loss in last 25 iteration(s): 0.0530 | Elapsed 4:58:33
Epoch: 003/010 | Batch 16651/45709 | Average Loss in last 25 iteration(s): 0.0414 | Elapsed 4:59:02
Epoch: 003/010 | Batch 16676/45709 | Average Loss in last 25 iteration(s): 0.0677 | Elapsed 4:59:29
Epoch: 003/010 | Batch 16701/45709 | Average Loss in last 25 iteration(s): 0.0623 | Elapsed 4:59:58
Epoch: 003/010 | Batch 16726/45709 | Average Loss in last 25 iteration(s): 0.0662 | Elapsed 5:00:23
Epoch: 003/010 | Batch 16751/45709 | Average Loss in last 25 iteration(s): 0.0456 | Elapsed 5:00:47
Epoch: 003/010 | Batch 16776/45709 | Average Loss in last 25 iteration(s): 0.0919 | Elapsed 5:01:17
Epoch: 003/010 | Batch 16801/45709 | Average Loss in last 25 iteration(s): 0.0717 | Elapsed 5:01:45
Epoch: 003/010 | Batch 16826/45709 | Average Loss in last 25 iteration(s): 0.0720 | Elapsed 5:02:12


Epoch: 003/010 | Batch 18651/45709 | Average Loss in last 25 iteration(s): 0.1059 | Elapsed 5:35:00
Epoch: 003/010 | Batch 18676/45709 | Average Loss in last 25 iteration(s): 0.1069 | Elapsed 5:35:27
Epoch: 003/010 | Batch 18701/45709 | Average Loss in last 25 iteration(s): 0.0710 | Elapsed 5:35:55
Epoch: 003/010 | Batch 18726/45709 | Average Loss in last 25 iteration(s): 0.0689 | Elapsed 5:36:20
Epoch: 003/010 | Batch 18751/45709 | Average Loss in last 25 iteration(s): 0.0613 | Elapsed 5:36:45
Epoch: 003/010 | Batch 18776/45709 | Average Loss in last 25 iteration(s): 0.0974 | Elapsed 5:37:17
Epoch: 003/010 | Batch 18801/45709 | Average Loss in last 25 iteration(s): 0.0596 | Elapsed 5:37:42
Epoch: 003/010 | Batch 18826/45709 | Average Loss in last 25 iteration(s): 0.0537 | Elapsed 5:38:11
Epoch: 003/010 | Batch 18851/45709 | Average Loss in last 25 iteration(s): 0.0801 | Elapsed 5:38:37
Epoch: 003/010 | Batch 18876/45709 | Average Loss in last 25 iteration(s): 0.0723 | Elapsed 5:39:03


Epoch: 003/010 | Batch 20701/45709 | Average Loss in last 25 iteration(s): 0.0928 | Elapsed 6:11:47
Epoch: 003/010 | Batch 20726/45709 | Average Loss in last 25 iteration(s): 0.0366 | Elapsed 6:12:12
Epoch: 003/010 | Batch 20751/45709 | Average Loss in last 25 iteration(s): 0.0812 | Elapsed 6:12:41
Epoch: 003/010 | Batch 20776/45709 | Average Loss in last 25 iteration(s): 0.0766 | Elapsed 6:13:10
Epoch: 003/010 | Batch 20801/45709 | Average Loss in last 25 iteration(s): 0.0494 | Elapsed 6:13:35
Epoch: 003/010 | Batch 20826/45709 | Average Loss in last 25 iteration(s): 0.0650 | Elapsed 6:14:01
Epoch: 003/010 | Batch 20851/45709 | Average Loss in last 25 iteration(s): 0.0671 | Elapsed 6:14:27
Epoch: 003/010 | Batch 20876/45709 | Average Loss in last 25 iteration(s): 0.0717 | Elapsed 6:14:52
Epoch: 003/010 | Batch 20901/45709 | Average Loss in last 25 iteration(s): 0.0511 | Elapsed 6:15:19
Epoch: 003/010 | Batch 20926/45709 | Average Loss in last 25 iteration(s): 0.0624 | Elapsed 6:15:49


Epoch: 003/010 | Batch 22751/45709 | Average Loss in last 25 iteration(s): 0.0885 | Elapsed 6:48:40
Epoch: 003/010 | Batch 22776/45709 | Average Loss in last 25 iteration(s): 0.0793 | Elapsed 6:49:06
Epoch: 003/010 | Batch 22801/45709 | Average Loss in last 25 iteration(s): 0.0581 | Elapsed 6:49:34
Epoch: 003/010 | Batch 22826/45709 | Average Loss in last 25 iteration(s): 0.0697 | Elapsed 6:50:00
Epoch: 003/010 | Batch 22851/45709 | Average Loss in last 25 iteration(s): 0.0627 | Elapsed 6:50:24
Epoch: 003/010 | Batch 22876/45709 | Average Loss in last 25 iteration(s): 0.0647 | Elapsed 6:50:52
Epoch: 003/010 | Batch 22901/45709 | Average Loss in last 25 iteration(s): 0.0666 | Elapsed 6:51:20
Epoch: 003/010 | Batch 22926/45709 | Average Loss in last 25 iteration(s): 0.0720 | Elapsed 6:51:48
Epoch: 003/010 | Batch 22951/45709 | Average Loss in last 25 iteration(s): 0.0889 | Elapsed 6:52:15
Epoch: 003/010 | Batch 22976/45709 | Average Loss in last 25 iteration(s): 0.0703 | Elapsed 6:52:41


Epoch: 003/010 | Batch 24801/45709 | Average Loss in last 25 iteration(s): 0.0783 | Elapsed 7:25:33
Epoch: 003/010 | Batch 24826/45709 | Average Loss in last 25 iteration(s): 0.0502 | Elapsed 7:25:56
Epoch: 003/010 | Batch 24851/45709 | Average Loss in last 25 iteration(s): 0.0531 | Elapsed 7:26:22
Epoch: 003/010 | Batch 24876/45709 | Average Loss in last 25 iteration(s): 0.0557 | Elapsed 7:26:53
Epoch: 003/010 | Batch 24901/45709 | Average Loss in last 25 iteration(s): 0.1004 | Elapsed 7:27:19
Epoch: 003/010 | Batch 24926/45709 | Average Loss in last 25 iteration(s): 0.0697 | Elapsed 7:27:47
Epoch: 003/010 | Batch 24951/45709 | Average Loss in last 25 iteration(s): 0.0972 | Elapsed 7:28:15
Epoch: 003/010 | Batch 24976/45709 | Average Loss in last 25 iteration(s): 0.0731 | Elapsed 7:28:40
Epoch: 003/010 | Batch 25001/45709 | Average Loss in last 25 iteration(s): 0.0776 | Elapsed 7:29:08
Epoch: 003/010 | Batch 25026/45709 | Average Loss in last 25 iteration(s): 0.0360 | Elapsed 7:29:36


KeyboardInterrupt: 