In [1]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM

sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm

from torch.cuda.amp import autocast, GradScaler

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

  warn(f"Failed to load image Python extension: {e}")


In [2]:
# ====================================================
# CFG
# ====================================================
class CFG:
    apex=False
    debug=False
    print_freq=10
    num_workers=0
    size=224
    model_name='vit_large_patch32_224'
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=40
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=3 # CosineAnnealingLR
    #T_0=3 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    batch_size=32
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed = [42] #[42, 10, 20, 51, 111]
    target_size=1
    target_col='KIc'
    n_fold=5
    kfold="Kfold" #or Kfold
    trn_fold = [i for i in range(n_fold)]
    train=True
    grad_cam=True
      
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './KIc/Model/vit/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [3]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


In [4]:
# ====================================================
# Dataset
# ====================================================
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df[CFG.target_col].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_names[idx]
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image=image)['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label

In [5]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    
    if data == 'train':
        return A.Compose([
            # A.Resize(CFG.size, CFG.size),
            A.RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return A.Compose([
            A.Resize(CFG.size, CFG.size),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [6]:
# ====================================================
# MODEL
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(self.cfg.model_name, pretrained=pretrained)
        self.n_features = self.model.head.in_features
        self.model.head = nn.Identity()
        self.fc = nn.Linear(self.n_features, self.cfg.target_size)

    def feature(self, image):
        feature = self.model(image)
        return feature
        
    def forward(self, image):
        feature = self.feature(image)
        output = self.fc(feature)
        return output

In [7]:
# ====================================================
# Loss
# ====================================================
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self, yhat, y):
        loss = torch.sqrt(self.mse(yhat, y) + self.eps)
        return loss

In [8]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    if CFG.apex:#使わないところは消す
        scaler = GradScaler()
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if CFG.apex:
            with autocast():
                y_preds = model(images)
                loss = criterion(y_preds.view(-1), labels)
        else:
            y_preds = model(images)
            loss = criterion(y_preds.view(-1), labels)
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    model.eval()
    losses = AverageMeter()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [9]:
# ====================================================
# Train loop
# ====================================================
def train_loop(folds, fold, seed):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target_col].values

    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='train'))

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size * 2, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = RMSELoss()

    best_score = np.inf
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
       
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds}, OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_fold{fold}_seed{seed}_best.pth')
    
    valid_folds['preds'] = torch.load(OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_fold{fold}_seed{seed}_best.pth', 
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

In [10]:
# ====================================================
# main
# ====================================================
def main():

    """
    Prepare: 1.train 
    """

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
    
    for seed in CFG.seed:
        LOGGER.info(f"========== seed{seed} ==========")
        seed_torch()

        train = pd.read_csv('/home/yamanaka/Estimate_KIc_with_ViT/Mototake_Analysis/VGG+GP/inout_data.csv', header=None, names=['Id', 'KIc'])
        train['file_path'] = ['/home/yamanaka/Estimate_KIc_with_ViT/Mototake_Analysis/VGG+GP/imagedata/' + str(i) + '.jpg' for i in train['Id']]

        if CFG.debug:
            CFG.epochs = 1
            train = train.sample(n=100, random_state=seed).reset_index(drop=True)

        if CFG.kfold == 'Kfold':
            Fold = KFold(n_splits=CFG.n_fold, shuffle=True, random_state=seed)
            for n, (train_index, val_index) in enumerate(Fold.split(train)):
                train.loc[val_index, 'fold'] = int(n)
            train['fold'] = train['fold'].astype(int)
        elif CFG.kfold == "StratifiedKfold":
            num_bins = int(np.floor(1 + np.log2(len(train))))
            train["bins"] = pd.cut(train[CFG.target_col], bins=num_bins, labels=False)
            Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=seed)
            for n, (train_index, val_index) in enumerate(Fold.split(train, train["bins"])):
                train.loc[val_index, 'fold'] = int(n)
            train['fold'] = train['fold'].astype(int)

        # # train出力 vgg 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            # train.to_csv vggも
            _oof_df = train_loop(train, fold, seed)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)

        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)

        # save result
        oof_df.to_csv(OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_seed{seed}_oof_df.csv', index=False)

In [11]:
if __name__ == '__main__':
    main()



Epoch: [1][0/19] Elapsed 0m 1s (remain 0m 28s) Loss: 3.3628(3.3628) Grad: 3.2838  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 4s (remain 0m 3s) Loss: 0.7283(1.6367) Grad: 1.6802  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 7s (remain 0m 0s) Loss: 0.5398(1.2228) Grad: 2.1676  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5998(0.5998) 


Epoch 1 - avg_train_loss: 1.2228  avg_val_loss: 0.5903  time: 8s
Epoch 1 - Score: 0.5936
Epoch 1 - Save Best Score: 0.5936 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7116(0.5903) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7481(0.7481) Grad: 3.1230  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5809(0.6492) Grad: 1.1197  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.4806(0.6010) Grad: 0.9472  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5146(0.5146) 


Epoch 2 - avg_train_loss: 0.6010  avg_val_loss: 0.4854  time: 7s
Epoch 2 - Score: 0.4879
Epoch 2 - Save Best Score: 0.4879 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5506(0.4854) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4125(0.4125) Grad: 2.9299  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5176(0.5031) Grad: 2.5650  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3940(0.4959) Grad: 1.9097  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4069(0.4069) 


Epoch 3 - avg_train_loss: 0.4959  avg_val_loss: 0.4240  time: 7s
Epoch 3 - Score: 0.4248
Epoch 3 - Save Best Score: 0.4248 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4825(0.4240) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3440(0.3440) Grad: 0.8918  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4470(0.4514) Grad: 1.2680  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.4707(0.4645) Grad: 1.6301  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3939(0.3939) 


Epoch 4 - avg_train_loss: 0.4645  avg_val_loss: 0.4135  time: 7s
Epoch 4 - Score: 0.4144
Epoch 4 - Save Best Score: 0.4144 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4714(0.4135) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3503(0.3503) Grad: 1.9642  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4554(0.4482) Grad: 3.3885  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.4637(0.4660) Grad: 1.1811  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3912(0.3912) 


Epoch 5 - avg_train_loss: 0.4660  avg_val_loss: 0.3981  time: 7s
Epoch 5 - Score: 0.3986
Epoch 5 - Save Best Score: 0.3986 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4430(0.3981) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3005(0.3005) Grad: 1.5180  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3694(0.4014) Grad: 1.5837  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5294(0.4189) Grad: 4.5858  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3433(0.3433) 


Epoch 6 - avg_train_loss: 0.4189  avg_val_loss: 0.3896  time: 7s
Epoch 6 - Score: 0.3915
Epoch 6 - Save Best Score: 0.3915 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4249(0.3896) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4153(0.4153) Grad: 1.0373  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2903(0.4528) Grad: 0.5711  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.3846(0.4202) Grad: 3.2553  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3267(0.3267) 


Epoch 7 - avg_train_loss: 0.4202  avg_val_loss: 0.3431  time: 7s
Epoch 7 - Score: 0.3440
Epoch 7 - Save Best Score: 0.3440 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3967(0.3431) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5053(0.5053) Grad: 1.5858  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2960(0.3660) Grad: 0.8546  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.4292(0.3495) Grad: 0.5660  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3001(0.3001) 


Epoch 8 - avg_train_loss: 0.3495  avg_val_loss: 0.3089  time: 7s
Epoch 8 - Score: 0.3090
Epoch 8 - Save Best Score: 0.3090 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3122(0.3089) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2789(0.2789) Grad: 2.0697  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2211(0.2819) Grad: 0.5172  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2872(0.2989) Grad: 2.2513  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2804(0.2804) 


Epoch 9 - avg_train_loss: 0.2989  avg_val_loss: 0.2942  time: 7s
Epoch 9 - Score: 0.2945
Epoch 9 - Save Best Score: 0.2945 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3011(0.2942) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3409(0.3409) Grad: 1.6976  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2247(0.2803) Grad: 0.7490  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1586(0.2834) Grad: 1.6951  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2730(0.2730) 


Epoch 10 - avg_train_loss: 0.2834  avg_val_loss: 0.2864  time: 7s
Epoch 10 - Score: 0.2866
Epoch 10 - Save Best Score: 0.2866 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3007(0.2864) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2069(0.2069) Grad: 1.1948  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2109(0.2610) Grad: 1.1206  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.2192(0.2680) Grad: 0.8302  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2815(0.2815) 


Epoch 11 - avg_train_loss: 0.2680  avg_val_loss: 0.2810  time: 7s
Epoch 11 - Score: 0.2812
Epoch 11 - Save Best Score: 0.2812 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3029(0.2810) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2538(0.2538) Grad: 1.8438  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3151(0.2649) Grad: 0.8356  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.3311(0.2792) Grad: 2.8031  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3053(0.3053) 


Epoch 12 - avg_train_loss: 0.2792  avg_val_loss: 0.3110  time: 7s
Epoch 12 - Score: 0.3113


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2926(0.3110) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2373(0.2373) Grad: 1.3709  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2009(0.2788) Grad: 0.7089  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.2810(0.2707) Grad: 0.9353  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2693(0.2693) 


Epoch 13 - avg_train_loss: 0.2707  avg_val_loss: 0.2885  time: 7s
Epoch 13 - Score: 0.2892


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2738(0.2885) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3516(0.3516) Grad: 2.7956  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2228(0.2652) Grad: 2.2940  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2232(0.2472) Grad: 3.2312  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2551(0.2551) 


Epoch 14 - avg_train_loss: 0.2472  avg_val_loss: 0.2730  time: 7s
Epoch 14 - Score: 0.2734
Epoch 14 - Save Best Score: 0.2734 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2777(0.2730) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2168(0.2168) Grad: 0.9399  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1381(0.1881) Grad: 0.6714  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2092(0.1894) Grad: 1.0486  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2570(0.2570) 


Epoch 15 - avg_train_loss: 0.1894  avg_val_loss: 0.2630  time: 7s
Epoch 15 - Score: 0.2630
Epoch 15 - Save Best Score: 0.2630 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2631(0.2630) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2217(0.2217) Grad: 1.6497  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1187(0.1865) Grad: 1.7954  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2061(0.1782) Grad: 1.3072  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2199(0.2199) 


Epoch 16 - avg_train_loss: 0.1782  avg_val_loss: 0.2428  time: 7s
Epoch 16 - Score: 0.2435
Epoch 16 - Save Best Score: 0.2435 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2601(0.2428) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1573(0.1573) Grad: 1.4563  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2655(0.1869) Grad: 3.0607  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1610(0.1878) Grad: 1.8911  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2457(0.2457) 


Epoch 17 - avg_train_loss: 0.1878  avg_val_loss: 0.2690  time: 7s
Epoch 17 - Score: 0.2703


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3233(0.2690) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1492(0.1492) Grad: 2.1280  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2185(0.2052) Grad: 2.5773  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1420(0.2041) Grad: 0.8490  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2501(0.2501) 


Epoch 18 - avg_train_loss: 0.2041  avg_val_loss: 0.2831  time: 7s
Epoch 18 - Score: 0.2850


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2708(0.2831) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2183(0.2183) Grad: 2.9760  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1560(0.1970) Grad: 3.1712  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1361(0.1840) Grad: 2.7275  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2637(0.2637) 


Epoch 19 - avg_train_loss: 0.1840  avg_val_loss: 0.2928  time: 7s
Epoch 19 - Score: 0.2942


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2826(0.2928) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1634(0.1634) Grad: 1.6708  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1527(0.1664) Grad: 1.1102  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1913(0.1657) Grad: 1.0079  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2211(0.2211) 


Epoch 20 - avg_train_loss: 0.1657  avg_val_loss: 0.2695  time: 7s
Epoch 20 - Score: 0.2727


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2866(0.2695) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1718(0.1718) Grad: 2.2565  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1001(0.1372) Grad: 0.9861  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1115(0.1367) Grad: 0.8097  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2226(0.2226) 


Epoch 21 - avg_train_loss: 0.1367  avg_val_loss: 0.2570  time: 7s
Epoch 21 - Score: 0.2587


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2895(0.2570) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1281(0.1281) Grad: 1.4645  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1016(0.1179) Grad: 1.0129  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1610(0.1200) Grad: 1.0489  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2220(0.2220) 


Epoch 22 - avg_train_loss: 0.1200  avg_val_loss: 0.2493  time: 7s
Epoch 22 - Score: 0.2504


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2819(0.2493) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0934(0.0934) Grad: 1.3553  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1377(0.1175) Grad: 2.0559  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1210(0.1193) Grad: 0.8001  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2103(0.2103) 


Epoch 23 - avg_train_loss: 0.1193  avg_val_loss: 0.2545  time: 7s
Epoch 23 - Score: 0.2572


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2854(0.2545) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1091(0.1091) Grad: 0.8216  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1365(0.1337) Grad: 1.5149  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1083(0.1395) Grad: 2.3543  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2280(0.2280) 


Epoch 24 - avg_train_loss: 0.1395  avg_val_loss: 0.2740  time: 7s
Epoch 24 - Score: 0.2769


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2862(0.2740) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1317(0.1317) Grad: 4.5010  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1095(0.1616) Grad: 1.9066  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1937(0.1544) Grad: 0.8660  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2205(0.2205) 


Epoch 25 - avg_train_loss: 0.1544  avg_val_loss: 0.2520  time: 7s
Epoch 25 - Score: 0.2535


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2926(0.2520) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1273(0.1273) Grad: 1.7468  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1572(0.1472) Grad: 0.6400  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1537(0.1444) Grad: 1.0351  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2340(0.2340) 


Epoch 26 - avg_train_loss: 0.1444  avg_val_loss: 0.2684  time: 7s
Epoch 26 - Score: 0.2703


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2661(0.2684) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1466(0.1466) Grad: 3.4089  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1176(0.1376) Grad: 3.2055  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1197(0.1288) Grad: 0.6833  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2124(0.2124) 


Epoch 27 - avg_train_loss: 0.1288  avg_val_loss: 0.2426  time: 7s
Epoch 27 - Score: 0.2445


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2988(0.2426) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0795(0.0795) Grad: 0.7212  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1066(0.0989) Grad: 1.4673  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0957(0.0983) Grad: 1.2207  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2174(0.2174) 


Epoch 28 - avg_train_loss: 0.0983  avg_val_loss: 0.2479  time: 7s
Epoch 28 - Score: 0.2494


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2914(0.2479) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0903(0.0903) Grad: 0.7461  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0831(0.1004) Grad: 1.4983  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0946(0.1022) Grad: 1.8564  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2325(0.2325) 


Epoch 29 - avg_train_loss: 0.1022  avg_val_loss: 0.2618  time: 7s
Epoch 29 - Score: 0.2630


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2968(0.2618) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0832(0.0832) Grad: 2.7453  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0835(0.0968) Grad: 2.3459  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0955(0.1043) Grad: 1.8686  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2258(0.2258) 


Epoch 30 - avg_train_loss: 0.1043  avg_val_loss: 0.2400  time: 7s
Epoch 30 - Score: 0.2408
Epoch 30 - Save Best Score: 0.2408 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2837(0.2400) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0986(0.0986) Grad: 2.1966  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1114(0.1148) Grad: 0.8788  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1244(0.1239) Grad: 2.7035  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2424(0.2424) 


Epoch 31 - avg_train_loss: 0.1239  avg_val_loss: 0.2749  time: 7s
Epoch 31 - Score: 0.2764


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2799(0.2749) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1222(0.1222) Grad: 1.1790  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1432(0.1241) Grad: 2.5109  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1103(0.1177) Grad: 2.7562  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2225(0.2225) 


Epoch 32 - avg_train_loss: 0.1177  avg_val_loss: 0.2698  time: 7s
Epoch 32 - Score: 0.2728


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3086(0.2698) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1324(0.1324) Grad: 0.6995  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0883(0.0981) Grad: 0.5673  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0877(0.0982) Grad: 0.7778  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2209(0.2209) 


Epoch 33 - avg_train_loss: 0.0982  avg_val_loss: 0.2558  time: 7s
Epoch 33 - Score: 0.2579


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3081(0.2558) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1115(0.1115) Grad: 2.4744  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0952(0.0931) Grad: 1.2966  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1138(0.0880) Grad: 1.2275  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2262(0.2262) 


Epoch 34 - avg_train_loss: 0.0880  avg_val_loss: 0.2545  time: 7s
Epoch 34 - Score: 0.2557


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2924(0.2545) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0896(0.0896) Grad: 0.7307  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0767(0.0980) Grad: 2.2975  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0780(0.0884) Grad: 2.5747  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2269(0.2269) 


Epoch 35 - avg_train_loss: 0.0884  avg_val_loss: 0.2568  time: 7s
Epoch 35 - Score: 0.2582


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3010(0.2568) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0603(0.0603) Grad: 2.5779  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1076(0.0934) Grad: 0.9556  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0995(0.0964) Grad: 0.6327  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2098(0.2098) 


Epoch 36 - avg_train_loss: 0.0964  avg_val_loss: 0.2541  time: 7s
Epoch 36 - Score: 0.2573


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3176(0.2541) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0679(0.0679) Grad: 0.7140  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1495(0.1157) Grad: 4.6553  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1234(0.1278) Grad: 3.3036  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2192(0.2192) 


Epoch 37 - avg_train_loss: 0.1278  avg_val_loss: 0.2489  time: 7s
Epoch 37 - Score: 0.2508


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3078(0.2489) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1533(0.1533) Grad: 2.8640  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0999(0.1289) Grad: 3.9271  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0985(0.1221) Grad: 1.1431  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2292(0.2292) 


Epoch 38 - avg_train_loss: 0.1221  avg_val_loss: 0.2575  time: 7s
Epoch 38 - Score: 0.2588


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2579(0.2575) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1205(0.1205) Grad: 2.6271  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0888(0.0994) Grad: 1.0881  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0698(0.0924) Grad: 2.5900  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2145(0.2145) 


Epoch 39 - avg_train_loss: 0.0924  avg_val_loss: 0.2410  time: 7s
Epoch 39 - Score: 0.2420


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2633(0.2410) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0559(0.0559) Grad: 1.0943  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0596(0.0695) Grad: 1.2432  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0761(0.0723) Grad: 0.7788  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2161(0.2161) 


Epoch 40 - avg_train_loss: 0.0723  avg_val_loss: 0.2475  time: 7s
Epoch 40 - Score: 0.2490


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2775(0.2475) 


Score: 0.2408


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.1775(3.1775) Grad: 3.4634  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7576(1.5838) Grad: 3.2584  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5699(1.1988) Grad: 1.0747  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7176(0.7176) 


Epoch 1 - avg_train_loss: 1.1988  avg_val_loss: 0.7087  time: 7s
Epoch 1 - Score: 0.7095
Epoch 1 - Save Best Score: 0.7095 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6340(0.7087) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7073(0.7073) Grad: 0.6116  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7010(0.6271) Grad: 1.3940  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4667(0.5809) Grad: 2.7013  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6932(0.6932) 


Epoch 2 - avg_train_loss: 0.5809  avg_val_loss: 0.6858  time: 7s
Epoch 2 - Score: 0.6865
Epoch 2 - Save Best Score: 0.6865 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6172(0.6858) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5880(0.5880) Grad: 4.8604  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4889(0.5149) Grad: 0.7710  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3191(0.4929) Grad: 1.1308  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5366(0.5366) 


Epoch 3 - avg_train_loss: 0.4929  avg_val_loss: 0.5335  time: 7s
Epoch 3 - Score: 0.5353
Epoch 3 - Save Best Score: 0.5353 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4416(0.5335) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5588(0.5588) Grad: 0.8139  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4052(0.4401) Grad: 1.4354  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5077(0.4375) Grad: 1.6174  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5257(0.5257) 


Epoch 4 - avg_train_loss: 0.4375  avg_val_loss: 0.5201  time: 7s
Epoch 4 - Score: 0.5222
Epoch 4 - Save Best Score: 0.5222 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4218(0.5201) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4041(0.4041) Grad: 1.3401  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4665(0.4285) Grad: 1.1161  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4275(0.4220) Grad: 2.5000  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4799(0.4799) 


Epoch 5 - avg_train_loss: 0.4220  avg_val_loss: 0.4742  time: 7s
Epoch 5 - Score: 0.4758
Epoch 5 - Save Best Score: 0.4758 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3907(0.4742) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3021(0.3021) Grad: 2.8193  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3667(0.3806) Grad: 2.4850  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4530(0.4086) Grad: 3.3311  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4419(0.4419) 


Epoch 6 - avg_train_loss: 0.4086  avg_val_loss: 0.4514  time: 7s
Epoch 6 - Score: 0.4531
Epoch 6 - Save Best Score: 0.4531 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3788(0.4514) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5191(0.5191) Grad: 4.0153  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4430(0.4231) Grad: 3.1461  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5398(0.4340) Grad: 2.3418  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4705(0.4705) 


Epoch 7 - avg_train_loss: 0.4340  avg_val_loss: 0.4585  time: 7s
Epoch 7 - Score: 0.4626


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3264(0.4585) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2928(0.2928) Grad: 2.0730  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3107(0.3303) Grad: 1.2581  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.3237(0.3372) Grad: 0.9874  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4233(0.4233) 


Epoch 8 - avg_train_loss: 0.3372  avg_val_loss: 0.4026  time: 7s
Epoch 8 - Score: 0.4098
Epoch 8 - Save Best Score: 0.4098 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2354(0.4026) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3467(0.3467) Grad: 1.6353  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2780(0.2766) Grad: 1.2324  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2169(0.2946) Grad: 0.7412  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4243(0.4243) 


Epoch 9 - avg_train_loss: 0.2946  avg_val_loss: 0.3902  time: 7s
Epoch 9 - Score: 0.3976
Epoch 9 - Save Best Score: 0.3976 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2211(0.3902) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3238(0.3238) Grad: 0.6181  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2589(0.2923) Grad: 0.5621  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.2861(0.2865) Grad: 1.2507  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3919(0.3919) 


Epoch 10 - avg_train_loss: 0.2865  avg_val_loss: 0.3747  time: 7s
Epoch 10 - Score: 0.3807
Epoch 10 - Save Best Score: 0.3807 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2283(0.3747) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3357(0.3357) Grad: 0.6314  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3758(0.2991) Grad: 3.5070  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3028(0.2881) Grad: 1.3574  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4027(0.4027) 


Epoch 11 - avg_train_loss: 0.2881  avg_val_loss: 0.3637  time: 7s
Epoch 11 - Score: 0.3702
Epoch 11 - Save Best Score: 0.3702 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2110(0.3637) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2666(0.2666) Grad: 0.8617  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2928(0.2738) Grad: 2.3068  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1888(0.2751) Grad: 0.6967  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3872(0.3872) 


Epoch 12 - avg_train_loss: 0.2751  avg_val_loss: 0.3590  time: 7s
Epoch 12 - Score: 0.3654
Epoch 12 - Save Best Score: 0.3654 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2079(0.3590) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2663(0.2663) Grad: 0.7929  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3614(0.2514) Grad: 3.0236  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3300(0.2595) Grad: 2.1471  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4879(0.4879) 


Epoch 13 - avg_train_loss: 0.2595  avg_val_loss: 0.4152  time: 7s
Epoch 13 - Score: 0.4227


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2606(0.4152) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2574(0.2574) Grad: 2.3541  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2080(0.2563) Grad: 1.3525  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1953(0.2392) Grad: 0.6082  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4198(0.4198) 


Epoch 14 - avg_train_loss: 0.2392  avg_val_loss: 0.3703  time: 7s
Epoch 14 - Score: 0.3798


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1849(0.3703) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2046(0.2046) Grad: 1.1546  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1920(0.1990) Grad: 1.5734  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2050(0.2005) Grad: 1.1775  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3843(0.3843) 


Epoch 15 - avg_train_loss: 0.2005  avg_val_loss: 0.3375  time: 7s
Epoch 15 - Score: 0.3446
Epoch 15 - Save Best Score: 0.3446 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1863(0.3375) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1841(0.1841) Grad: 2.1164  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2739(0.1926) Grad: 0.8247  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1932(0.1872) Grad: 0.6004  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3644(0.3644) 


Epoch 16 - avg_train_loss: 0.1872  avg_val_loss: 0.3191  time: 7s
Epoch 16 - Score: 0.3250
Epoch 16 - Save Best Score: 0.3250 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1878(0.3191) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1630(0.1630) Grad: 1.0465  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1755(0.1928) Grad: 1.1052  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1966(0.1860) Grad: 0.5802  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3785(0.3785) 


Epoch 17 - avg_train_loss: 0.1860  avg_val_loss: 0.3219  time: 7s
Epoch 17 - Score: 0.3292


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1792(0.3219) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1845(0.1845) Grad: 1.6099  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1780(0.1605) Grad: 1.0753  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2195(0.1805) Grad: 2.1874  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3740(0.3740) 


Epoch 18 - avg_train_loss: 0.1805  avg_val_loss: 0.3190  time: 7s
Epoch 18 - Score: 0.3275


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1618(0.3190) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2604(0.2604) Grad: 2.0435  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2308(0.2073) Grad: 4.3874  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2203(0.2056) Grad: 1.5184  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3917(0.3917) 


Epoch 19 - avg_train_loss: 0.2056  avg_val_loss: 0.3801  time: 7s
Epoch 19 - Score: 0.3902


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1923(0.3801) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1780(0.1780) Grad: 2.1418  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1960(0.1767) Grad: 1.7796  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1407(0.1781) Grad: 0.8083  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3985(0.3985) 


Epoch 20 - avg_train_loss: 0.1781  avg_val_loss: 0.3552  time: 7s
Epoch 20 - Score: 0.3591


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2451(0.3552) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1919(0.1919) Grad: 1.5641  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1801(0.1576) Grad: 1.2329  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1623(0.1513) Grad: 1.2029  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3924(0.3924) 


Epoch 21 - avg_train_loss: 0.1513  avg_val_loss: 0.3315  time: 7s
Epoch 21 - Score: 0.3438


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1341(0.3315) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1793(0.1793) Grad: 0.5465  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1232(0.1428) Grad: 0.9900  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1455(0.1311) Grad: 0.4579  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3908(0.3908) 


Epoch 22 - avg_train_loss: 0.1311  avg_val_loss: 0.3243  time: 7s
Epoch 22 - Score: 0.3354


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1440(0.3243) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1444(0.1444) Grad: 1.1059  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1020(0.1130) Grad: 2.3233  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2049(0.1261) Grad: 0.8237  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3810(0.3810) 


Epoch 23 - avg_train_loss: 0.1261  avg_val_loss: 0.3247  time: 7s
Epoch 23 - Score: 0.3351


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1454(0.3247) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0788(0.0788) Grad: 2.0532  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1411(0.1210) Grad: 1.0833  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1551(0.1256) Grad: 1.6440  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3787(0.3787) 


Epoch 24 - avg_train_loss: 0.1256  avg_val_loss: 0.3457  time: 7s
Epoch 24 - Score: 0.3496


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2313(0.3457) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1708(0.1708) Grad: 3.6407  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1567(0.1524) Grad: 1.0918  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1298(0.1582) Grad: 1.2790  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3984(0.3984) 


Epoch 25 - avg_train_loss: 0.1582  avg_val_loss: 0.3554  time: 7s
Epoch 25 - Score: 0.3644


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1779(0.3554) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1139(0.1139) Grad: 2.0948  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1500(0.1401) Grad: 0.7518  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1316(0.1401) Grad: 0.6777  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3602(0.3602) 


Epoch 26 - avg_train_loss: 0.1401  avg_val_loss: 0.3039  time: 7s
Epoch 26 - Score: 0.3105
Epoch 26 - Save Best Score: 0.3105 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1776(0.3039) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1528(0.1528) Grad: 0.8232  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1080(0.1144) Grad: 3.6493  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1205(0.1160) Grad: 1.2402  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3896(0.3896) 


Epoch 27 - avg_train_loss: 0.1160  avg_val_loss: 0.3262  time: 7s
Epoch 27 - Score: 0.3351


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1682(0.3262) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0672(0.0672) Grad: 0.9646  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1053(0.1025) Grad: 1.1010  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1138(0.1046) Grad: 0.5522  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3882(0.3882) 


Epoch 28 - avg_train_loss: 0.1046  avg_val_loss: 0.3205  time: 7s
Epoch 28 - Score: 0.3297


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1664(0.3205) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1339(0.1339) Grad: 1.0981  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0894(0.1025) Grad: 0.5485  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0957(0.1035) Grad: 2.1894  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3738(0.3738) 


Epoch 29 - avg_train_loss: 0.1035  avg_val_loss: 0.3229  time: 7s
Epoch 29 - Score: 0.3315


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1605(0.3229) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0594(0.0594) Grad: 1.7967  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0665(0.0995) Grad: 1.2576  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1003(0.1093) Grad: 1.3161  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3730(0.3730) 


Epoch 30 - avg_train_loss: 0.1093  avg_val_loss: 0.3177  time: 7s
Epoch 30 - Score: 0.3225


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2152(0.3177) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0726(0.0726) Grad: 0.6915  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1168(0.1125) Grad: 1.3361  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1055(0.1134) Grad: 1.7902  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3938(0.3938) 


Epoch 31 - avg_train_loss: 0.1134  avg_val_loss: 0.3317  time: 7s
Epoch 31 - Score: 0.3410


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1661(0.3317) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1086(0.1086) Grad: 1.7926  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1002(0.1169) Grad: 2.0544  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1005(0.1134) Grad: 1.6579  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3992(0.3992) 


Epoch 32 - avg_train_loss: 0.1134  avg_val_loss: 0.3387  time: 7s
Epoch 32 - Score: 0.3468


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1837(0.3387) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1477(0.1477) Grad: 1.0338  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0756(0.1057) Grad: 0.6842  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0714(0.0973) Grad: 0.7162  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3777(0.3777) 


Epoch 33 - avg_train_loss: 0.0973  avg_val_loss: 0.3153  time: 7s
Epoch 33 - Score: 0.3247


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1544(0.3153) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0737(0.0737) Grad: 0.5781  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0671(0.0811) Grad: 1.8445  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0932(0.0832) Grad: 0.6268  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3762(0.3762) 


Epoch 34 - avg_train_loss: 0.0832  avg_val_loss: 0.3096  time: 7s
Epoch 34 - Score: 0.3179


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1691(0.3096) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0907(0.0907) Grad: 1.2420  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0793(0.0799) Grad: 3.4608  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0912(0.0813) Grad: 1.4328  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3826(0.3826) 


Epoch 35 - avg_train_loss: 0.0813  avg_val_loss: 0.3261  time: 7s
Epoch 35 - Score: 0.3329


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1891(0.3261) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1094(0.1094) Grad: 0.6574  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1194(0.0887) Grad: 3.3778  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1070(0.0933) Grad: 2.0263  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3627(0.3627) 


Epoch 36 - avg_train_loss: 0.0933  avg_val_loss: 0.3319  time: 7s
Epoch 36 - Score: 0.3394


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1752(0.3319) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0940(0.0940) Grad: 1.1874  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1054(0.1011) Grad: 0.8311  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1110(0.1032) Grad: 1.4267  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3712(0.3712) 


Epoch 37 - avg_train_loss: 0.1032  avg_val_loss: 0.3380  time: 7s
Epoch 37 - Score: 0.3430


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2090(0.3380) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0970(0.0970) Grad: 2.7070  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1096(0.1014) Grad: 0.8251  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1042(0.0997) Grad: 1.9166  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3730(0.3730) 


Epoch 38 - avg_train_loss: 0.0997  avg_val_loss: 0.3247  time: 7s
Epoch 38 - Score: 0.3310


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1883(0.3247) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1138(0.1138) Grad: 3.7687  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1008(0.0973) Grad: 0.5492  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0959(0.0909) Grad: 0.7748  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3928(0.3928) 


Epoch 39 - avg_train_loss: 0.0909  avg_val_loss: 0.3242  time: 7s
Epoch 39 - Score: 0.3338


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1641(0.3242) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0762(0.0762) Grad: 1.3090  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0579(0.0673) Grad: 0.8461  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0660(0.0702) Grad: 0.6150  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3845(0.3845) 


Epoch 40 - avg_train_loss: 0.0702  avg_val_loss: 0.3215  time: 7s
Epoch 40 - Score: 0.3309


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1577(0.3215) 


Score: 0.3105


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3937(3.3937) Grad: 3.4221  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.8390(1.5557) Grad: 2.9689  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6461(1.1764) Grad: 0.9529  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6386(0.6386) 


Epoch 1 - avg_train_loss: 1.1764  avg_val_loss: 0.6559  time: 7s
Epoch 1 - Score: 0.6576
Epoch 1 - Save Best Score: 0.6576 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5765(0.6559) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5931(0.5931) Grad: 0.8820  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4881(0.6006) Grad: 1.3672  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5305(0.5430) Grad: 2.7008  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6076(0.6076) 


Epoch 2 - avg_train_loss: 0.5430  avg_val_loss: 0.6224  time: 7s
Epoch 2 - Score: 0.6234
Epoch 2 - Save Best Score: 0.6234 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7006(0.6224) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6167(0.6167) Grad: 4.1453  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4270(0.4965) Grad: 0.9358  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4076(0.4606) Grad: 1.2732  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4661(0.4661) 


Epoch 3 - avg_train_loss: 0.4606  avg_val_loss: 0.4794  time: 7s
Epoch 3 - Score: 0.4817
Epoch 3 - Save Best Score: 0.4817 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3966(0.4794) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4496(0.4496) Grad: 1.2390  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5082(0.4298) Grad: 1.2174  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4134(0.4170) Grad: 1.9952  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4666(0.4666) 


Epoch 4 - avg_train_loss: 0.4170  avg_val_loss: 0.4616  time: 7s
Epoch 4 - Score: 0.4636
Epoch 4 - Save Best Score: 0.4636 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3711(0.4616) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4185(0.4185) Grad: 3.2548  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5008(0.3957) Grad: 1.7863  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5085(0.4187) Grad: 1.6682  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4573(0.4573) 


Epoch 5 - avg_train_loss: 0.4187  avg_val_loss: 0.4341  time: 7s
Epoch 5 - Score: 0.4353
Epoch 5 - Save Best Score: 0.4353 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3651(0.4341) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3835(0.3835) Grad: 1.3488  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2741(0.3836) Grad: 0.8587  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3509(0.3797) Grad: 1.0190  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4553(0.4553) 


Epoch 6 - avg_train_loss: 0.3797  avg_val_loss: 0.4124  time: 7s
Epoch 6 - Score: 0.4148
Epoch 6 - Save Best Score: 0.4148 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3279(0.4124) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3487(0.3487) Grad: 1.5266  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2731(0.3656) Grad: 1.9582  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3990(0.3383) Grad: 1.6213  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4732(0.4732) 


Epoch 7 - avg_train_loss: 0.3383  avg_val_loss: 0.4256  time: 7s
Epoch 7 - Score: 0.4303


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2897(0.4256) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3119(0.3119) Grad: 0.9350  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4589(0.3080) Grad: 1.0823  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2572(0.3097) Grad: 0.6324  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5004(0.5004) 


Epoch 8 - avg_train_loss: 0.3097  avg_val_loss: 0.4139  time: 7s
Epoch 8 - Score: 0.4230


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2545(0.4139) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2185(0.2185) Grad: 0.9064  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1762(0.2609) Grad: 1.1962  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2510(0.2585) Grad: 1.1685  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4792(0.4792) 


Epoch 9 - avg_train_loss: 0.2585  avg_val_loss: 0.3966  time: 7s
Epoch 9 - Score: 0.4044
Epoch 9 - Save Best Score: 0.4044 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2605(0.3966) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2656(0.2656) Grad: 1.6245  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2121(0.2457) Grad: 0.4007  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2378(0.2402) Grad: 0.9003  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4635(0.4635) 


Epoch 10 - avg_train_loss: 0.2402  avg_val_loss: 0.3787  time: 7s
Epoch 10 - Score: 0.3864
Epoch 10 - Save Best Score: 0.3864 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2605(0.3787) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1953(0.1953) Grad: 0.4806  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3670(0.2424) Grad: 0.5997  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1832(0.2357) Grad: 0.8315  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4639(0.4639) 


Epoch 11 - avg_train_loss: 0.2357  avg_val_loss: 0.3791  time: 7s
Epoch 11 - Score: 0.3867


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2652(0.3791) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2519(0.2519) Grad: 1.1121  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2467(0.2436) Grad: 0.9950  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2264(0.2433) Grad: 1.5819  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5028(0.5028) 


Epoch 12 - avg_train_loss: 0.2433  avg_val_loss: 0.3946  time: 7s
Epoch 12 - Score: 0.4058


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2647(0.3946) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2076(0.2076) Grad: 1.9781  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2568(0.2279) Grad: 2.7902  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3039(0.2319) Grad: 2.6591  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4874(0.4874) 


Epoch 13 - avg_train_loss: 0.2319  avg_val_loss: 0.4010  time: 7s
Epoch 13 - Score: 0.4088


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2736(0.4010) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1786(0.1786) Grad: 1.9097  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2136(0.2310) Grad: 1.1541  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2604(0.2245) Grad: 1.9708  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4413(0.4413) 


Epoch 14 - avg_train_loss: 0.2245  avg_val_loss: 0.3808  time: 7s
Epoch 14 - Score: 0.3849
Epoch 14 - Save Best Score: 0.3849 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2881(0.3808) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1679(0.1679) Grad: 0.7435  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1780(0.1651) Grad: 1.3076  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2315(0.1767) Grad: 0.7734  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4709(0.4709) 


Epoch 15 - avg_train_loss: 0.1767  avg_val_loss: 0.3680  time: 7s
Epoch 15 - Score: 0.3784
Epoch 15 - Save Best Score: 0.3784 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2614(0.3680) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1358(0.1358) Grad: 0.7103  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1594(0.1735) Grad: 0.7910  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1326(0.1630) Grad: 1.0217  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4595(0.4595) 


Epoch 16 - avg_train_loss: 0.1630  avg_val_loss: 0.3663  time: 7s
Epoch 16 - Score: 0.3752
Epoch 16 - Save Best Score: 0.3752 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2573(0.3663) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1577(0.1577) Grad: 0.8811  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2011(0.1478) Grad: 3.4722  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2428(0.1497) Grad: 0.9997  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4580(0.4580) 


Epoch 17 - avg_train_loss: 0.1497  avg_val_loss: 0.3560  time: 7s
Epoch 17 - Score: 0.3665
Epoch 17 - Save Best Score: 0.3665 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2546(0.3560) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1455(0.1455) Grad: 3.0059  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1693(0.1476) Grad: 0.9795  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2132(0.1651) Grad: 1.0440  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4723(0.4723) 


Epoch 18 - avg_train_loss: 0.1651  avg_val_loss: 0.3813  time: 7s
Epoch 18 - Score: 0.3892


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2854(0.3813) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1736(0.1736) Grad: 4.2079  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1856(0.1739) Grad: 1.2941  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1882(0.1770) Grad: 1.0761  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4724(0.4724) 


Epoch 19 - avg_train_loss: 0.1770  avg_val_loss: 0.3976  time: 7s
Epoch 19 - Score: 0.4032


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2976(0.3976) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1704(0.1704) Grad: 3.9632  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1495(0.1746) Grad: 2.8857  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1667(0.1740) Grad: 2.4338  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4321(0.4321) 


Epoch 20 - avg_train_loss: 0.1740  avg_val_loss: 0.3951  time: 7s
Epoch 20 - Score: 0.3981


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2923(0.3951) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1823(0.1823) Grad: 3.6555  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1091(0.1440) Grad: 1.6238  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1093(0.1341) Grad: 0.7338  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4279(0.4279) 


Epoch 21 - avg_train_loss: 0.1341  avg_val_loss: 0.3578  time: 7s
Epoch 21 - Score: 0.3633
Epoch 21 - Save Best Score: 0.3633 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2619(0.3578) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0891(0.0891) Grad: 2.2293  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0888(0.1089) Grad: 1.7669  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1217(0.1082) Grad: 0.7164  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4545(0.4545) 


Epoch 22 - avg_train_loss: 0.1082  avg_val_loss: 0.3668  time: 7s
Epoch 22 - Score: 0.3748


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2572(0.3668) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1002(0.1002) Grad: 1.2108  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1110(0.1162) Grad: 0.6270  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0807(0.1124) Grad: 2.6551  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4690(0.4690) 


Epoch 23 - avg_train_loss: 0.1124  avg_val_loss: 0.3723  time: 7s
Epoch 23 - Score: 0.3815


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2708(0.3723) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0847(0.0847) Grad: 1.8909  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2035(0.1469) Grad: 2.0135  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1376(0.1400) Grad: 3.9674  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4501(0.4501) 


Epoch 24 - avg_train_loss: 0.1400  avg_val_loss: 0.3758  time: 7s
Epoch 24 - Score: 0.3822


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2619(0.3758) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1379(0.1379) Grad: 2.3032  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1593(0.1594) Grad: 0.9116  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1161(0.1491) Grad: 3.2215  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4321(0.4321) 


Epoch 25 - avg_train_loss: 0.1491  avg_val_loss: 0.3597  time: 7s
Epoch 25 - Score: 0.3653


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2695(0.3597) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1194(0.1194) Grad: 0.7244  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1769(0.1470) Grad: 1.2536  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1387(0.1398) Grad: 1.4723  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4732(0.4732) 


Epoch 26 - avg_train_loss: 0.1398  avg_val_loss: 0.3704  time: 7s
Epoch 26 - Score: 0.3806


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2678(0.3704) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1274(0.1274) Grad: 0.8262  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1038(0.1251) Grad: 1.8147  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0678(0.1147) Grad: 1.0009  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4739(0.4739) 


Epoch 27 - avg_train_loss: 0.1147  avg_val_loss: 0.3722  time: 7s
Epoch 27 - Score: 0.3822


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2704(0.3722) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0709(0.0709) Grad: 1.4304  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0793(0.0873) Grad: 1.6765  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0985(0.0887) Grad: 2.5346  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4460(0.4460) 


Epoch 28 - avg_train_loss: 0.0887  avg_val_loss: 0.3572  time: 7s
Epoch 28 - Score: 0.3662


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2320(0.3572) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0894(0.0894) Grad: 0.8134  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0821(0.0848) Grad: 2.0691  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0936(0.0866) Grad: 2.2662  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4483(0.4483) 


Epoch 29 - avg_train_loss: 0.0866  avg_val_loss: 0.3585  time: 7s
Epoch 29 - Score: 0.3674


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2401(0.3585) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0666(0.0666) Grad: 0.7158  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1140(0.0952) Grad: 2.2513  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0893(0.0978) Grad: 2.4764  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4582(0.4582) 


Epoch 30 - avg_train_loss: 0.0978  avg_val_loss: 0.3697  time: 7s
Epoch 30 - Score: 0.3787


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2366(0.3697) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0847(0.0847) Grad: 0.8114  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1369(0.1063) Grad: 2.5152  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1634(0.1149) Grad: 4.4403  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4865(0.4865) 


Epoch 31 - avg_train_loss: 0.1149  avg_val_loss: 0.3798  time: 7s
Epoch 31 - Score: 0.3911


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2552(0.3798) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1142(0.1142) Grad: 0.7299  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0940(0.1095) Grad: 1.1036  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1009(0.1037) Grad: 0.6069  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4570(0.4570) 


Epoch 32 - avg_train_loss: 0.1037  avg_val_loss: 0.3708  time: 7s
Epoch 32 - Score: 0.3785


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2636(0.3708) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0993(0.0993) Grad: 2.5530  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0716(0.0924) Grad: 0.9586  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0925(0.0894) Grad: 1.1956  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4721(0.4721) 


Epoch 33 - avg_train_loss: 0.0894  avg_val_loss: 0.3727  time: 7s
Epoch 33 - Score: 0.3825


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2613(0.3727) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0926(0.0926) Grad: 1.4599  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0742(0.0834) Grad: 0.7167  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0552(0.0774) Grad: 2.4362  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4739(0.4739) 


Epoch 34 - avg_train_loss: 0.0774  avg_val_loss: 0.3706  time: 7s
Epoch 34 - Score: 0.3820


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2323(0.3706) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0622(0.0622) Grad: 2.0856  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0677(0.0805) Grad: 0.7483  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0813(0.0828) Grad: 2.0270  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4644(0.4644) 


Epoch 35 - avg_train_loss: 0.0828  avg_val_loss: 0.3717  time: 7s
Epoch 35 - Score: 0.3802


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2719(0.3717) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0829(0.0829) Grad: 0.9828  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0930(0.0761) Grad: 0.8928  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0775(0.0809) Grad: 0.8217  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4633(0.4633) 


Epoch 36 - avg_train_loss: 0.0809  avg_val_loss: 0.3864  time: 7s
Epoch 36 - Score: 0.3935


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2563(0.3864) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1309(0.1309) Grad: 1.6990  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0983(0.0972) Grad: 0.9369  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0921(0.0978) Grad: 1.2076  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4657(0.4657) 


Epoch 37 - avg_train_loss: 0.0978  avg_val_loss: 0.3716  time: 7s
Epoch 37 - Score: 0.3805


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2653(0.3716) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1101(0.1101) Grad: 1.2748  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0793(0.0923) Grad: 2.0640  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0851(0.0956) Grad: 2.7106  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4751(0.4751) 


Epoch 38 - avg_train_loss: 0.0956  avg_val_loss: 0.3666  time: 7s
Epoch 38 - Score: 0.3780


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2661(0.3666) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0665(0.0665) Grad: 1.8992  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1078(0.0817) Grad: 0.9600  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0625(0.0761) Grad: 2.1268  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4486(0.4486) 


Epoch 39 - avg_train_loss: 0.0761  avg_val_loss: 0.3569  time: 7s
Epoch 39 - Score: 0.3657


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2518(0.3569) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0885(0.0885) Grad: 3.5893  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0570(0.0772) Grad: 1.6505  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0543(0.0714) Grad: 0.5635  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4778(0.4778) 


Epoch 40 - avg_train_loss: 0.0714  avg_val_loss: 0.3729  time: 7s
Epoch 40 - Score: 0.3841


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2469(0.3729) 


Score: 0.3633


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.4528(3.4528) Grad: 3.4342  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7245(1.6745) Grad: 2.9253  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5480(1.2408) Grad: 1.1790  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6713(0.6713) 


Epoch 1 - avg_train_loss: 1.2408  avg_val_loss: 0.6548  time: 7s
Epoch 1 - Score: 0.6551
Epoch 1 - Save Best Score: 0.6551 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6118(0.6548) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4650(0.4650) Grad: 1.2410  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5990(0.6018) Grad: 2.6523  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5870(0.6015) Grad: 3.5337  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5683(0.5683) 


Epoch 2 - avg_train_loss: 0.6015  avg_val_loss: 0.5529  time: 7s
Epoch 2 - Score: 0.5532
Epoch 2 - Save Best Score: 0.5532 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5157(0.5529) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6218(0.6218) Grad: 1.3524  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4324(0.5387) Grad: 1.0786  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3509(0.5028) Grad: 0.6423  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5047(0.5047) 


Epoch 3 - avg_train_loss: 0.5028  avg_val_loss: 0.4783  time: 7s
Epoch 3 - Score: 0.4793
Epoch 3 - Save Best Score: 0.4793 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4170(0.4783) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4386(0.4386) Grad: 2.1094  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5076(0.4523) Grad: 1.2667  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4671(0.4550) Grad: 2.3236  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4751(0.4751) 


Epoch 4 - avg_train_loss: 0.4550  avg_val_loss: 0.4676  time: 7s
Epoch 4 - Score: 0.4686
Epoch 4 - Save Best Score: 0.4686 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3991(0.4676) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4650(0.4650) Grad: 1.9478  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4481(0.4705) Grad: 1.0487  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4274(0.4570) Grad: 2.4988  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4236(0.4236) 


Epoch 5 - avg_train_loss: 0.4570  avg_val_loss: 0.4300  time: 7s
Epoch 5 - Score: 0.4308
Epoch 5 - Save Best Score: 0.4308 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3840(0.4300) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4399(0.4399) Grad: 0.7987  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3044(0.4203) Grad: 2.6905  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4581(0.4134) Grad: 2.2878  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3983(0.3983) 


Epoch 6 - avg_train_loss: 0.4134  avg_val_loss: 0.4015  time: 7s
Epoch 6 - Score: 0.4025
Epoch 6 - Save Best Score: 0.4025 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3451(0.4015) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4587(0.4587) Grad: 0.4733  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2712(0.3607) Grad: 2.0285  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3440(0.3762) Grad: 1.2015  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3854(0.3854) 


Epoch 7 - avg_train_loss: 0.3762  avg_val_loss: 0.3822  time: 7s
Epoch 7 - Score: 0.3837
Epoch 7 - Save Best Score: 0.3837 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3103(0.3822) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4960(0.4960) Grad: 0.9111  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4114(0.3677) Grad: 1.9945  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3146(0.3529) Grad: 2.7105  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3359(0.3359) 


Epoch 8 - avg_train_loss: 0.3529  avg_val_loss: 0.3480  time: 7s
Epoch 8 - Score: 0.3483
Epoch 8 - Save Best Score: 0.3483 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3324(0.3480) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2603(0.2603) Grad: 3.5915  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2699(0.2982) Grad: 1.0592  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3357(0.2958) Grad: 0.3939  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3276(0.3276) 


Epoch 9 - avg_train_loss: 0.2958  avg_val_loss: 0.3256  time: 7s
Epoch 9 - Score: 0.3264
Epoch 9 - Save Best Score: 0.3264 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2759(0.3256) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2606(0.2606) Grad: 0.5056  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2959(0.2845) Grad: 0.4571  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2636(0.2711) Grad: 1.2984  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3187(0.3187) 


Epoch 10 - avg_train_loss: 0.2711  avg_val_loss: 0.3150  time: 7s
Epoch 10 - Score: 0.3165
Epoch 10 - Save Best Score: 0.3165 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2479(0.3150) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1803(0.1803) Grad: 0.5689  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2852(0.2877) Grad: 1.7515  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3093(0.2634) Grad: 2.5870  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3059(0.3059) 


Epoch 11 - avg_train_loss: 0.2634  avg_val_loss: 0.3018  time: 7s
Epoch 11 - Score: 0.3022
Epoch 11 - Save Best Score: 0.3022 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2673(0.3018) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2280(0.2280) Grad: 1.2359  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3845(0.2873) Grad: 4.3498  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2326(0.2858) Grad: 3.8703  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3270(0.3270) 


Epoch 12 - avg_train_loss: 0.2858  avg_val_loss: 0.3251  time: 7s
Epoch 12 - Score: 0.3303


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2037(0.3251) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3053(0.3053) Grad: 0.8483  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2856(0.2638) Grad: 1.8695  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3986(0.2671) Grad: 4.8178  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3492(0.3492) 


Epoch 13 - avg_train_loss: 0.2671  avg_val_loss: 0.3369  time: 7s
Epoch 13 - Score: 0.3379


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2809(0.3369) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2264(0.2264) Grad: 4.7924  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1579(0.2247) Grad: 0.8783  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2160(0.2172) Grad: 1.1273  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3707(0.3707) 


Epoch 14 - avg_train_loss: 0.2172  avg_val_loss: 0.3384  time: 7s
Epoch 14 - Score: 0.3401


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2738(0.3384) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2305(0.2305) Grad: 0.8045  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1499(0.1934) Grad: 0.5652  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2033(0.1953) Grad: 1.8827  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3043(0.3043) 


Epoch 15 - avg_train_loss: 0.1953  avg_val_loss: 0.2982  time: 7s
Epoch 15 - Score: 0.2983
Epoch 15 - Save Best Score: 0.2983 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2854(0.2982) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1464(0.1464) Grad: 1.0452  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2335(0.1744) Grad: 0.7055  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1768(0.1660) Grad: 1.7927  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3068(0.3068) 


Epoch 16 - avg_train_loss: 0.1660  avg_val_loss: 0.2919  time: 7s
Epoch 16 - Score: 0.2922
Epoch 16 - Save Best Score: 0.2922 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2676(0.2919) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2184(0.2184) Grad: 1.7058  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1286(0.1797) Grad: 1.1317  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1687(0.1708) Grad: 1.7710  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3327(0.3327) 


Epoch 17 - avg_train_loss: 0.1708  avg_val_loss: 0.3105  time: 7s
Epoch 17 - Score: 0.3116


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2595(0.3105) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1742(0.1742) Grad: 1.2479  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2095(0.1926) Grad: 0.9240  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1697(0.1895) Grad: 3.1535  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3522(0.3522) 


Epoch 18 - avg_train_loss: 0.1895  avg_val_loss: 0.3198  time: 7s
Epoch 18 - Score: 0.3222


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2394(0.3198) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2273(0.2273) Grad: 3.0619  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1941(0.2094) Grad: 3.3804  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1915(0.1969) Grad: 0.9498  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3061(0.3061) 


Epoch 19 - avg_train_loss: 0.1969  avg_val_loss: 0.3021  time: 7s
Epoch 19 - Score: 0.3028


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2596(0.3021) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1708(0.1708) Grad: 3.3345  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1318(0.1595) Grad: 0.8716  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0990(0.1583) Grad: 0.9488  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3259(0.3259) 


Epoch 20 - avg_train_loss: 0.1583  avg_val_loss: 0.2994  time: 7s
Epoch 20 - Score: 0.3024


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2063(0.2994) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2005(0.2005) Grad: 2.3574  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1246(0.1503) Grad: 1.6006  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1386(0.1414) Grad: 2.1402  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2931(0.2931) 


Epoch 21 - avg_train_loss: 0.1414  avg_val_loss: 0.2721  time: 7s
Epoch 21 - Score: 0.2730
Epoch 21 - Save Best Score: 0.2730 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2288(0.2721) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1044(0.1044) Grad: 0.5849  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1095(0.1134) Grad: 1.4249  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1645(0.1221) Grad: 0.8246  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2853(0.2853) 


Epoch 22 - avg_train_loss: 0.1221  avg_val_loss: 0.2871  time: 7s
Epoch 22 - Score: 0.2876


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2546(0.2871) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0908(0.0908) Grad: 2.5154  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1211(0.1364) Grad: 1.5763  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1249(0.1270) Grad: 3.7931  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3090(0.3090) 


Epoch 23 - avg_train_loss: 0.1270  avg_val_loss: 0.2935  time: 7s
Epoch 23 - Score: 0.2960


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2085(0.2935) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1393(0.1393) Grad: 1.1777  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1298(0.1175) Grad: 3.2925  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0932(0.1195) Grad: 2.1043  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3058(0.3058) 


Epoch 24 - avg_train_loss: 0.1195  avg_val_loss: 0.2879  time: 7s
Epoch 24 - Score: 0.2889


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2367(0.2879) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1724(0.1724) Grad: 1.3930  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1552(0.1363) Grad: 0.8741  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0828(0.1295) Grad: 3.2032  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3056(0.3056) 


Epoch 25 - avg_train_loss: 0.1295  avg_val_loss: 0.2692  time: 7s
Epoch 25 - Score: 0.2748


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1487(0.2692) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1373(0.1373) Grad: 0.8519  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1093(0.1220) Grad: 2.6682  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1814(0.1265) Grad: 1.8525  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3035(0.3035) 


Epoch 26 - avg_train_loss: 0.1265  avg_val_loss: 0.2886  time: 7s
Epoch 26 - Score: 0.2898


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2310(0.2886) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0895(0.0895) Grad: 0.5998  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1605(0.1029) Grad: 2.3735  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0907(0.0992) Grad: 2.1095  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3148(0.3148) 


Epoch 27 - avg_train_loss: 0.0992  avg_val_loss: 0.2877  time: 7s
Epoch 27 - Score: 0.2887


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2479(0.2877) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1371(0.1371) Grad: 1.8587  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0626(0.0995) Grad: 1.7075  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0971(0.0942) Grad: 1.0098  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3291(0.3291) 


Epoch 28 - avg_train_loss: 0.0942  avg_val_loss: 0.2938  time: 7s
Epoch 28 - Score: 0.2955


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2453(0.2938) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0943(0.0943) Grad: 0.7189  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0757(0.0994) Grad: 0.8166  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0899(0.0955) Grad: 1.4304  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3049(0.3049) 


Epoch 29 - avg_train_loss: 0.0955  avg_val_loss: 0.2928  time: 7s
Epoch 29 - Score: 0.2937


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2437(0.2928) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1361(0.1361) Grad: 1.8395  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1330(0.1049) Grad: 4.3422  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1510(0.1139) Grad: 4.0787  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3393(0.3393) 


Epoch 30 - avg_train_loss: 0.1139  avg_val_loss: 0.3038  time: 7s
Epoch 30 - Score: 0.3055


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2518(0.3038) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1442(0.1442) Grad: 2.9586  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1628(0.1466) Grad: 3.0650  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1330(0.1527) Grad: 1.9617  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3417(0.3417) 


Epoch 31 - avg_train_loss: 0.1527  avg_val_loss: 0.3090  time: 7s
Epoch 31 - Score: 0.3104


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2636(0.3090) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1280(0.1280) Grad: 2.7449  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1224(0.1189) Grad: 2.3012  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1126(0.1177) Grad: 1.8308  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3357(0.3357) 


Epoch 32 - avg_train_loss: 0.1177  avg_val_loss: 0.3045  time: 7s
Epoch 32 - Score: 0.3070


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2231(0.3045) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1150(0.1150) Grad: 0.7925  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0797(0.0962) Grad: 1.8662  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1053(0.0938) Grad: 0.9712  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3095(0.3095) 


Epoch 33 - avg_train_loss: 0.0938  avg_val_loss: 0.2845  time: 7s
Epoch 33 - Score: 0.2854


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2474(0.2845) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0655(0.0655) Grad: 1.1963  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0559(0.0753) Grad: 1.1545  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0672(0.0755) Grad: 1.1287  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2987(0.2987) 


Epoch 34 - avg_train_loss: 0.0755  avg_val_loss: 0.2855  time: 7s
Epoch 34 - Score: 0.2861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2458(0.2855) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0870(0.0870) Grad: 0.5893  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0739(0.0773) Grad: 1.2154  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0714(0.0818) Grad: 2.5429  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3196(0.3196) 


Epoch 35 - avg_train_loss: 0.0818  avg_val_loss: 0.2832  time: 7s
Epoch 35 - Score: 0.2857


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2133(0.2832) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0712(0.0712) Grad: 2.2652  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1148(0.1048) Grad: 1.5767  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0698(0.1052) Grad: 1.6341  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3156(0.3156) 


Epoch 36 - avg_train_loss: 0.1052  avg_val_loss: 0.2850  time: 7s
Epoch 36 - Score: 0.2868


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2239(0.2850) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1065(0.1065) Grad: 2.0058  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1129(0.1132) Grad: 3.3746  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0866(0.1136) Grad: 2.1749  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3088(0.3088) 


Epoch 37 - avg_train_loss: 0.1136  avg_val_loss: 0.2884  time: 7s
Epoch 37 - Score: 0.2899


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2247(0.2884) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0779(0.0779) Grad: 2.5175  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1068(0.1007) Grad: 3.1163  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0929(0.1077) Grad: 0.9134  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2935(0.2935) 


Epoch 38 - avg_train_loss: 0.1077  avg_val_loss: 0.2907  time: 7s
Epoch 38 - Score: 0.2912


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2554(0.2907) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0698(0.0698) Grad: 0.5463  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0756(0.0847) Grad: 0.5570  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0810(0.0820) Grad: 0.8945  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2875(0.2875) 


Epoch 39 - avg_train_loss: 0.0820  avg_val_loss: 0.2727  time: 7s
Epoch 39 - Score: 0.2735


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2277(0.2727) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0530(0.0530) Grad: 0.5984  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0627(0.0663) Grad: 1.3790  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0697(0.0696) Grad: 1.2960  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3049(0.3049) 


Epoch 40 - avg_train_loss: 0.0696  avg_val_loss: 0.2834  time: 7s
Epoch 40 - Score: 0.2843


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2393(0.2834) 


Score: 0.2730


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3852(3.3852) Grad: 3.2534  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6945(1.6378) Grad: 3.9148  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6058(1.2173) Grad: 0.1516  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7216(0.7216) 


Epoch 1 - avg_train_loss: 1.2173  avg_val_loss: 0.6024  time: 7s
Epoch 1 - Score: 0.6112
Epoch 1 - Save Best Score: 0.6112 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5670(0.6024) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7345(0.7345) Grad: 0.2905  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6590(0.6226) Grad: 0.5929  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6362(0.6166) Grad: 1.3275  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6212(0.6212) 


Epoch 2 - avg_train_loss: 0.6166  avg_val_loss: 0.5036  time: 7s
Epoch 2 - Score: 0.5144
Epoch 2 - Save Best Score: 0.5144 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4893(0.5036) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5076(0.5076) Grad: 1.2602  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5625(0.5476) Grad: 2.3956  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4172(0.5205) Grad: 0.5972  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5917(0.5917) 


Epoch 3 - avg_train_loss: 0.5205  avg_val_loss: 0.4822  time: 7s
Epoch 3 - Score: 0.4912
Epoch 3 - Save Best Score: 0.4912 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4369(0.4822) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5196(0.5196) Grad: 2.8716  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4688(0.4992) Grad: 1.5114  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4377(0.4887) Grad: 0.9677  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6006(0.6006) 


Epoch 4 - avg_train_loss: 0.4887  avg_val_loss: 0.4619  time: 7s
Epoch 4 - Score: 0.4779
Epoch 4 - Save Best Score: 0.4779 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4375(0.4619) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3582(0.3582) Grad: 2.9652  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6233(0.4920) Grad: 0.9224  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4279(0.4994) Grad: 2.3854  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5875(0.5875) 


Epoch 5 - avg_train_loss: 0.4994  avg_val_loss: 0.4363  time: 7s
Epoch 5 - Score: 0.4552
Epoch 5 - Save Best Score: 0.4552 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3773(0.4363) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3706(0.3706) Grad: 1.9985  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5336(0.4692) Grad: 0.8066  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.4279(0.4600) Grad: 1.1902  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4972(0.4972) 


Epoch 6 - avg_train_loss: 0.4600  avg_val_loss: 0.3807  time: 7s
Epoch 6 - Score: 0.3932
Epoch 6 - Save Best Score: 0.3932 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3093(0.3807) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3814(0.3814) Grad: 0.6107  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3223(0.3938) Grad: 1.6364  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.3073(0.4115) Grad: 0.3484  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5260(0.5260) 


Epoch 7 - avg_train_loss: 0.4115  avg_val_loss: 0.4165  time: 7s
Epoch 7 - Score: 0.4267


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3540(0.4165) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4072(0.4072) Grad: 3.0062  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3368(0.3602) Grad: 2.2143  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.3190(0.3805) Grad: 2.2744  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4553(0.4553) 


Epoch 8 - avg_train_loss: 0.3805  avg_val_loss: 0.3649  time: 7s
Epoch 8 - Score: 0.3730
Epoch 8 - Save Best Score: 0.3730 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2774(0.3649) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2207(0.2207) Grad: 1.4738  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2601(0.3115) Grad: 0.6339  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3384(0.3064) Grad: 1.9724  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4400(0.4400) 


Epoch 9 - avg_train_loss: 0.3064  avg_val_loss: 0.3289  time: 7s
Epoch 9 - Score: 0.3424
Epoch 9 - Save Best Score: 0.3424 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2142(0.3289) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2143(0.2143) Grad: 1.7309  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2281(0.2719) Grad: 0.7863  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2841(0.2870) Grad: 1.5382  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4472(0.4472) 


Epoch 10 - avg_train_loss: 0.2870  avg_val_loss: 0.3311  time: 7s
Epoch 10 - Score: 0.3461


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2010(0.3311) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3117(0.3117) Grad: 0.8938  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2163(0.2688) Grad: 0.6892  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2948(0.2701) Grad: 1.4166  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4261(0.4261) 


Epoch 11 - avg_train_loss: 0.2701  avg_val_loss: 0.3135  time: 7s
Epoch 11 - Score: 0.3284
Epoch 11 - Save Best Score: 0.3284 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1863(0.3135) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4441(0.4441) Grad: 1.4157  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2036(0.2826) Grad: 0.9801  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2491(0.2817) Grad: 3.1437  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4045(0.4045) 


Epoch 12 - avg_train_loss: 0.2817  avg_val_loss: 0.3154  time: 7s
Epoch 12 - Score: 0.3253
Epoch 12 - Save Best Score: 0.3253 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1980(0.3154) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2182(0.2182) Grad: 1.1274  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2566(0.2684) Grad: 2.6602  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4229(0.2863) Grad: 3.4011  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4260(0.4260) 


Epoch 13 - avg_train_loss: 0.2863  avg_val_loss: 0.3051  time: 7s
Epoch 13 - Score: 0.3229
Epoch 13 - Save Best Score: 0.3229 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1627(0.3051) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2828(0.2828) Grad: 0.8458  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2151(0.2379) Grad: 1.6235  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2218(0.2372) Grad: 1.4186  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4296(0.4296) 


Epoch 14 - avg_train_loss: 0.2372  avg_val_loss: 0.3222  time: 7s
Epoch 14 - Score: 0.3347


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2360(0.3222) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2208(0.2208) Grad: 1.2779  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1286(0.2020) Grad: 1.0208  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1570(0.1980) Grad: 1.9973  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3979(0.3979) 


Epoch 15 - avg_train_loss: 0.1980  avg_val_loss: 0.3097  time: 7s
Epoch 15 - Score: 0.3196
Epoch 15 - Save Best Score: 0.3196 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1922(0.3097) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2351(0.2351) Grad: 2.2786  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1712(0.1808) Grad: 1.0952  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1176(0.1744) Grad: 1.4240  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3906(0.3906) 


Epoch 16 - avg_train_loss: 0.1744  avg_val_loss: 0.2998  time: 7s
Epoch 16 - Score: 0.3100
Epoch 16 - Save Best Score: 0.3100 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1958(0.2998) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2005(0.2005) Grad: 1.5480  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2091(0.1766) Grad: 0.4165  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1718(0.1742) Grad: 1.1369  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3822(0.3822) 


Epoch 17 - avg_train_loss: 0.1742  avg_val_loss: 0.2852  time: 7s
Epoch 17 - Score: 0.2968
Epoch 17 - Save Best Score: 0.2968 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1997(0.2852) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1275(0.1275) Grad: 0.7533  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1789(0.1744) Grad: 1.2900  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1715(0.1757) Grad: 1.8177  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4118(0.4118) 


Epoch 18 - avg_train_loss: 0.1757  avg_val_loss: 0.2937  time: 7s
Epoch 18 - Score: 0.3105


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1802(0.2937) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1283(0.1283) Grad: 3.4264  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2088(0.1942) Grad: 1.3936  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2040(0.1905) Grad: 1.0913  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3972(0.3972) 


Epoch 19 - avg_train_loss: 0.1905  avg_val_loss: 0.3042  time: 7s
Epoch 19 - Score: 0.3158


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1712(0.3042) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2046(0.2046) Grad: 1.3075  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2905(0.1941) Grad: 4.5959  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1410(0.1848) Grad: 2.4272  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3872(0.3872) 


Epoch 20 - avg_train_loss: 0.1848  avg_val_loss: 0.3138  time: 7s
Epoch 20 - Score: 0.3199


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2547(0.3138) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1561(0.1561) Grad: 2.7036  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1472(0.1346) Grad: 0.8446  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1173(0.1369) Grad: 1.5322  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3784(0.3784) 


Epoch 21 - avg_train_loss: 0.1369  avg_val_loss: 0.2877  time: 7s
Epoch 21 - Score: 0.2976


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2219(0.2877) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1273(0.1273) Grad: 0.5725  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1312(0.1229) Grad: 0.6804  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1226(0.1183) Grad: 0.8860  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3874(0.3874) 


Epoch 22 - avg_train_loss: 0.1183  avg_val_loss: 0.2862  time: 7s
Epoch 22 - Score: 0.2986


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2114(0.2862) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0723(0.0723) Grad: 0.6129  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0982(0.1209) Grad: 3.0341  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1014(0.1158) Grad: 1.4386  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3892(0.3892) 


Epoch 23 - avg_train_loss: 0.1158  avg_val_loss: 0.2949  time: 7s
Epoch 23 - Score: 0.3056


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2071(0.2949) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1070(0.1070) Grad: 2.5180  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1139(0.1178) Grad: 0.6481  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1777(0.1198) Grad: 2.5887  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3876(0.3876) 


Epoch 24 - avg_train_loss: 0.1198  avg_val_loss: 0.3006  time: 7s
Epoch 24 - Score: 0.3102


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1938(0.3006) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1213(0.1213) Grad: 2.3858  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1585(0.1268) Grad: 2.3178  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1105(0.1340) Grad: 2.6834  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3952(0.3952) 


Epoch 25 - avg_train_loss: 0.1340  avg_val_loss: 0.3023  time: 7s
Epoch 25 - Score: 0.3123


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2362(0.3023) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1698(0.1698) Grad: 0.7806  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1512(0.1420) Grad: 2.5756  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1166(0.1304) Grad: 1.3680  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3948(0.3948) 


Epoch 26 - avg_train_loss: 0.1304  avg_val_loss: 0.2992  time: 7s
Epoch 26 - Score: 0.3100


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2509(0.2992) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1397(0.1397) Grad: 1.1496  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1125(0.0965) Grad: 1.2767  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1294(0.1065) Grad: 2.1258  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3803(0.3803) 


Epoch 27 - avg_train_loss: 0.1065  avg_val_loss: 0.2729  time: 7s
Epoch 27 - Score: 0.2876
Epoch 27 - Save Best Score: 0.2876 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1826(0.2729) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0901(0.0901) Grad: 1.2660  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0715(0.0917) Grad: 1.7866  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1237(0.0856) Grad: 1.6402  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3651(0.3651) 


Epoch 28 - avg_train_loss: 0.0856  avg_val_loss: 0.2745  time: 7s
Epoch 28 - Score: 0.2855
Epoch 28 - Save Best Score: 0.2855 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1734(0.2745) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0795(0.0795) Grad: 1.1452  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1014(0.0833) Grad: 0.6298  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0964(0.0873) Grad: 1.1890  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3823(0.3823) 


Epoch 29 - avg_train_loss: 0.0873  avg_val_loss: 0.2781  time: 7s
Epoch 29 - Score: 0.2917


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1911(0.2781) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0585(0.0585) Grad: 3.3199  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1105(0.0815) Grad: 1.1182  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1739(0.0942) Grad: 2.9897  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3929(0.3929) 


Epoch 30 - avg_train_loss: 0.0942  avg_val_loss: 0.2981  time: 7s
Epoch 30 - Score: 0.3087


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2312(0.2981) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0976(0.0976) Grad: 3.7271  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1381(0.1104) Grad: 1.8857  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1249(0.1161) Grad: 1.2474  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4041(0.4041) 


Epoch 31 - avg_train_loss: 0.1161  avg_val_loss: 0.3001  time: 7s
Epoch 31 - Score: 0.3127


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2331(0.3001) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1630(0.1630) Grad: 1.3881  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1044(0.1152) Grad: 1.0507  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1285(0.1097) Grad: 0.7426  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3645(0.3645) 


Epoch 32 - avg_train_loss: 0.1097  avg_val_loss: 0.2763  time: 7s
Epoch 32 - Score: 0.2861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2108(0.2763) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0820(0.0820) Grad: 1.6644  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1285(0.0934) Grad: 1.1076  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0768(0.0917) Grad: 0.7698  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3864(0.3864) 


Epoch 33 - avg_train_loss: 0.0917  avg_val_loss: 0.2888  time: 7s
Epoch 33 - Score: 0.3003


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2190(0.2888) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0705(0.0705) Grad: 2.7372  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1008(0.0787) Grad: 2.0934  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0828(0.0792) Grad: 0.7934  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3877(0.3877) 


Epoch 34 - avg_train_loss: 0.0792  avg_val_loss: 0.2872  time: 7s
Epoch 34 - Score: 0.3001


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1751(0.2872) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0619(0.0619) Grad: 1.4165  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0647(0.0742) Grad: 0.6427  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0696(0.0746) Grad: 0.6317  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3832(0.3832) 


Epoch 35 - avg_train_loss: 0.0746  avg_val_loss: 0.2765  time: 7s
Epoch 35 - Score: 0.2908


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1945(0.2765) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0560(0.0560) Grad: 1.5840  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0999(0.0807) Grad: 1.8033  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0842(0.0819) Grad: 0.9887  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3728(0.3728) 


Epoch 36 - avg_train_loss: 0.0819  avg_val_loss: 0.2835  time: 7s
Epoch 36 - Score: 0.2936


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1936(0.2835) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0718(0.0718) Grad: 1.8584  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1171(0.0995) Grad: 3.0352  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1402(0.0989) Grad: 0.4852  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3619(0.3619) 


Epoch 37 - avg_train_loss: 0.0989  avg_val_loss: 0.2770  time: 7s
Epoch 37 - Score: 0.2861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2101(0.2770) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0974(0.0974) Grad: 2.9575  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0823(0.0867) Grad: 1.8543  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0940(0.0908) Grad: 2.9006  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3672(0.3672) 


Epoch 38 - avg_train_loss: 0.0908  avg_val_loss: 0.2741  time: 7s
Epoch 38 - Score: 0.2851
Epoch 38 - Save Best Score: 0.2851 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2135(0.2741) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0829(0.0829) Grad: 2.3503  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0847(0.0846) Grad: 1.1811  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0785(0.0807) Grad: 1.1935  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3639(0.3639) 


Epoch 39 - avg_train_loss: 0.0807  avg_val_loss: 0.2792  time: 7s
Epoch 39 - Score: 0.2882


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2197(0.2792) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0771(0.0771) Grad: 3.5962  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0701(0.0702) Grad: 1.9279  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0894(0.0686) Grad: 0.9451  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3576(0.3576) 


Epoch 40 - avg_train_loss: 0.0686  avg_val_loss: 0.2680  time: 7s
Epoch 40 - Score: 0.2785
Epoch 40 - Save Best Score: 0.2785 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2081(0.2680) 


Score: 0.2785
Score: 0.2961
