In [1]:
# ====================================================
# Library
# ====================================================
import os
import gc
import sys
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM

sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm

from torch.cuda.amp import autocast, GradScaler

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

  warn(f"Failed to load image Python extension: {e}")


In [2]:
# ====================================================
# CFG
# ====================================================
class CFG:
    apex=False
    debug=False
    print_freq=10
    num_workers=0
    size=224
    model_name='vit_large_patch32_224'
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=40
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=3 # CosineAnnealingLR
    #T_0=3 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    batch_size=32
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed= [42, 10, 20, 51, 111]
    # seed = [19, 23, 34, 40, 56, 77, 88, 99, 12, 45, 67, 78, 89]
    # seed = [18, 24]
    target_size=1
    target_col='KIc'
    n_fold=5
    kfold="Kfold" #or Kfold
    trn_fold = [i for i in range(n_fold)]
    train=True
    grad_cam=True
      
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './KIc/Model/vit/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

In [3]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
    return score


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


In [4]:
# ====================================================
# Dataset
# ====================================================
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df[CFG.target_col].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_path = self.file_names[idx]
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image=image)['image']
        label = torch.tensor(self.labels[idx]).float()
        return image, label

In [5]:
# ====================================================
# Transforms
# ====================================================
def get_transforms(*, data):
    
    if data == 'train':
        return A.Compose([
            # A.Resize(CFG.size, CFG.size),
            A.RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return A.Compose([
            A.Resize(CFG.size, CFG.size),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [6]:
# ====================================================
# MODEL
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(self.cfg.model_name, pretrained=pretrained)
        self.n_features = self.model.head.in_features
        self.model.head = nn.Identity()
        self.fc = nn.Linear(self.n_features, self.cfg.target_size)

    def feature(self, image):
        feature = self.model(image)
        return feature
        
    def forward(self, image):
        feature = self.feature(image)
        output = self.fc(feature)
        return output

In [7]:
# ====================================================
# Loss
# ====================================================
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self, yhat, y):
        loss = torch.sqrt(self.mse(yhat, y) + self.eps)
        return loss

In [8]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    if CFG.apex:#使わないところは消す
        scaler = GradScaler()
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if CFG.apex:
            with autocast():
                y_preds = model(images)
                loss = criterion(y_preds.view(-1), labels)
        else:
            y_preds = model(images)
            loss = criterion(y_preds.view(-1), labels)
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if CFG.apex:
            scaler.scale(loss).backward()
        else:
            loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            if CFG.apex:
                scaler.step(optimizer)
                scaler.update()
            else:
                optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    model.eval()
    losses = AverageMeter()
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [9]:
# ====================================================
# Train loop
# ====================================================
def train_loop(folds, fold, seed):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target_col].values

    train_dataset = TrainDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds, transform=get_transforms(data='train'))

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size * 2, 
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, pretrained=True)
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = RMSELoss()

    best_score = np.inf
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        
        start_time = time.time()
        
        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
       
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 'preds': preds}, OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_fold{fold}_seed{seed}_bootstrap.pth')
    
    valid_folds['preds'] = torch.load(OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_fold{fold}_seed{seed}_bootstrap.pth', 
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

In [15]:
# ====================================================
# main
# ====================================================
def main():

    """
    Prepare: 1.train 
    """

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG.target_col].values
        score = get_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
    
    for seed in CFG.seed:
        LOGGER.info(f"========== seed{seed} ==========")
        seed_torch()

        data = pd.read_csv('/home/yamanaka/Estimate_KIc_with_ViT/Mototake_Analysis/VGG+GP/inout_data.csv', header=None, names=['Id', 'KIc'])
        data['file_path'] = ['/home/yamanaka/Estimate_KIc_with_ViT/Mototake_Analysis/VGG+GP/imagedata/' + str(i) + '.jpg' for i in data['Id']]

        train = pd.DataFrame()
        data_id = np.random.choice(data['Id'], size=len(data), replace=True)
        for i in data_id:
            train = pd.concat([train, data[data['Id']==i]])
        train = train.reset_index()
            
        if CFG.debug:
            CFG.epochs = 1
            train = train.sample(n=100, random_state=seed).reset_index(drop=True)


        if CFG.kfold == 'Kfold':
            Fold = KFold(n_splits=CFG.n_fold, shuffle=True, random_state=seed)
            for n, (train_index, val_index) in enumerate(Fold.split(train)):
                train.loc[val_index, 'fold'] = int(n)
            train['fold'] = train['fold'].astype(int)
        elif CFG.kfold == "StratifiedKfold":
            num_bins = int(np.floor(1 + np.log2(len(train))))
            train["bins"] = pd.cut(train[CFG.target_col], bins=num_bins, labels=False)
            Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=seed)
            for n, (train_index, val_index) in enumerate(Fold.split(train, train["bins"])):
                train.loc[val_index, 'fold'] = int(n)
            train['fold'] = train['fold'].astype(int)

        # # train出力 vgg 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            # train.to_csv vggも
            _oof_df = train_loop(train, fold, seed)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)

        # CV result
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)

        # save result
        oof_df.to_csv(OUTPUT_DIR+f'{CFG.model_name}_{CFG.kfold}_seed{seed}_bootstrap_oof_df.csv', index=False)

In [16]:
if __name__ == '__main__':
    main()



Epoch: [1][0/19] Elapsed 0m 1s (remain 0m 28s) Loss: 3.5265(3.5265) Grad: 3.3219  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 4s (remain 0m 3s) Loss: 0.7392(1.6405) Grad: 3.3097  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 7s (remain 0m 0s) Loss: 0.5391(1.2327) Grad: 1.2772  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6494(0.6494) 


Epoch 1 - avg_train_loss: 1.2327  avg_val_loss: 0.6200  time: 8s
Epoch 1 - Score: 0.6206
Epoch 1 - Save Best Score: 0.6206 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6096(0.6200) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6534(0.6534) Grad: 3.0645  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6339(0.6040) Grad: 1.2726  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.7082(0.5781) Grad: 4.1957  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5633(0.5633) 


Epoch 2 - avg_train_loss: 0.5781  avg_val_loss: 0.5181  time: 7s
Epoch 2 - Score: 0.5195
Epoch 2 - Save Best Score: 0.5195 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4783(0.5181) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4911(0.4911) Grad: 1.3849  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3847(0.4774) Grad: 0.9483  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4196(0.4805) Grad: 1.3226  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5196(0.5196) 


Epoch 3 - avg_train_loss: 0.4805  avg_val_loss: 0.4617  time: 7s
Epoch 3 - Score: 0.4645
Epoch 3 - Save Best Score: 0.4645 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4514(0.4617) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3988(0.3988) Grad: 1.3441  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5215(0.4159) Grad: 1.8955  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6234(0.4301) Grad: 2.4390  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5029(0.5029) 


Epoch 4 - avg_train_loss: 0.4301  avg_val_loss: 0.4531  time: 7s
Epoch 4 - Score: 0.4553
Epoch 4 - Save Best Score: 0.4553 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4528(0.4531) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3936(0.3936) Grad: 1.9918  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4651(0.4278) Grad: 1.6902  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3826(0.4148) Grad: 2.2787  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4494(0.4494) 


Epoch 5 - avg_train_loss: 0.4148  avg_val_loss: 0.4035  time: 7s
Epoch 5 - Score: 0.4064
Epoch 5 - Save Best Score: 0.4064 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4288(0.4035) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4482(0.4482) Grad: 3.9798  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3200(0.3955) Grad: 0.5379  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2714(0.3942) Grad: 2.1047  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4316(0.4316) 


Epoch 6 - avg_train_loss: 0.3942  avg_val_loss: 0.3867  time: 7s
Epoch 6 - Score: 0.3887
Epoch 6 - Save Best Score: 0.3887 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3779(0.3867) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3410(0.3410) Grad: 1.9975  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4099(0.3722) Grad: 0.7803  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3280(0.3801) Grad: 1.6032  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3810(0.3810) 


Epoch 7 - avg_train_loss: 0.3801  avg_val_loss: 0.3350  time: 7s
Epoch 7 - Score: 0.3379
Epoch 7 - Save Best Score: 0.3379 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3443(0.3350) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2205(0.2205) Grad: 1.4947  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3277(0.3123) Grad: 1.0641  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3572(0.3218) Grad: 3.4504  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3919(0.3919) 


Epoch 8 - avg_train_loss: 0.3218  avg_val_loss: 0.3511  time: 7s
Epoch 8 - Score: 0.3528


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3266(0.3511) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2676(0.2676) Grad: 2.9315  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2560(0.2655) Grad: 1.1296  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2719(0.2748) Grad: 0.8434  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3077(0.3077) 


Epoch 9 - avg_train_loss: 0.2748  avg_val_loss: 0.2762  time: 7s
Epoch 9 - Score: 0.2775
Epoch 9 - Save Best Score: 0.2775 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2587(0.2762) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2182(0.2182) Grad: 1.5914  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1890(0.2588) Grad: 1.3387  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2652(0.2542) Grad: 1.0561  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3086(0.3086) 


Epoch 10 - avg_train_loss: 0.2542  avg_val_loss: 0.2750  time: 7s
Epoch 10 - Score: 0.2767
Epoch 10 - Save Best Score: 0.2767 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2745(0.2750) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2513(0.2513) Grad: 0.7661  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2162(0.2536) Grad: 2.6033  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2220(0.2512) Grad: 1.9576  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2953(0.2953) 


Epoch 11 - avg_train_loss: 0.2512  avg_val_loss: 0.2714  time: 7s
Epoch 11 - Score: 0.2721
Epoch 11 - Save Best Score: 0.2721 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2620(0.2714) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1898(0.1898) Grad: 2.7616  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2334(0.2495) Grad: 2.1135  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2767(0.2495) Grad: 4.1778  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3303(0.3303) 


Epoch 12 - avg_train_loss: 0.2495  avg_val_loss: 0.3195  time: 7s
Epoch 12 - Score: 0.3198


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3330(0.3195) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2539(0.2539) Grad: 5.1300  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3085(0.2894) Grad: 3.0541  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2821(0.2935) Grad: 0.4991  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2803(0.2803) 


Epoch 13 - avg_train_loss: 0.2935  avg_val_loss: 0.2508  time: 7s
Epoch 13 - Score: 0.2521
Epoch 13 - Save Best Score: 0.2521 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2360(0.2508) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2907(0.2907) Grad: 3.6658  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1543(0.1887) Grad: 0.9170  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2234(0.1997) Grad: 1.9238  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2582(0.2582) 


Epoch 14 - avg_train_loss: 0.1997  avg_val_loss: 0.2275  time: 7s
Epoch 14 - Score: 0.2291
Epoch 14 - Save Best Score: 0.2291 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1887(0.2275) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1817(0.1817) Grad: 0.6929  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2215(0.1689) Grad: 2.3958  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1809(0.1666) Grad: 2.4142  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2539(0.2539) 


Epoch 15 - avg_train_loss: 0.1666  avg_val_loss: 0.2269  time: 7s
Epoch 15 - Score: 0.2281
Epoch 15 - Save Best Score: 0.2281 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1963(0.2269) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1891(0.1891) Grad: 1.1009  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2394(0.1626) Grad: 0.5861  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1555(0.1570) Grad: 1.3470  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2525(0.2525) 


Epoch 16 - avg_train_loss: 0.1570  avg_val_loss: 0.2159  time: 7s
Epoch 16 - Score: 0.2183
Epoch 16 - Save Best Score: 0.2183 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1725(0.2159) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1840(0.1840) Grad: 1.4614  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1432(0.1631) Grad: 0.7729  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1313(0.1528) Grad: 2.6248  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2476(0.2476) 


Epoch 17 - avg_train_loss: 0.1528  avg_val_loss: 0.2149  time: 7s
Epoch 17 - Score: 0.2168
Epoch 17 - Save Best Score: 0.2168 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1740(0.2149) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2508(0.2508) Grad: 2.4110  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1783(0.1589) Grad: 1.0523  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1309(0.1535) Grad: 0.8579  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2795(0.2795) 


Epoch 18 - avg_train_loss: 0.1535  avg_val_loss: 0.2387  time: 7s
Epoch 18 - Score: 0.2425


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1587(0.2387) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1755(0.1755) Grad: 0.7843  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1795(0.1838) Grad: 3.1389  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1852(0.1840) Grad: 3.1289  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2999(0.2999) 


Epoch 19 - avg_train_loss: 0.1840  avg_val_loss: 0.2746  time: 7s
Epoch 19 - Score: 0.2755


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2549(0.2746) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2200(0.2200) Grad: 4.8540  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1863(0.1726) Grad: 1.5811  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1495(0.1635) Grad: 3.0624  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2893(0.2893) 


Epoch 20 - avg_train_loss: 0.1635  avg_val_loss: 0.2425  time: 7s
Epoch 20 - Score: 0.2458


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1963(0.2425) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1895(0.1895) Grad: 3.4499  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1434(0.1440) Grad: 1.5021  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0874(0.1263) Grad: 2.9605  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2453(0.2453) 


Epoch 21 - avg_train_loss: 0.1263  avg_val_loss: 0.2189  time: 7s
Epoch 21 - Score: 0.2205


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1734(0.2189) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1089(0.1089) Grad: 1.3994  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0733(0.1082) Grad: 1.2187  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1338(0.1119) Grad: 0.8409  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2401(0.2401) 


Epoch 22 - avg_train_loss: 0.1119  avg_val_loss: 0.2040  time: 7s
Epoch 22 - Score: 0.2073
Epoch 22 - Save Best Score: 0.2073 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1370(0.2040) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0695(0.0695) Grad: 0.4859  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1180(0.1116) Grad: 0.6392  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1494(0.1097) Grad: 3.2839  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2428(0.2428) 


Epoch 23 - avg_train_loss: 0.1097  avg_val_loss: 0.2106  time: 7s
Epoch 23 - Score: 0.2134


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1438(0.2106) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1237(0.1237) Grad: 1.7762  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1338(0.1077) Grad: 0.8174  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1287(0.1072) Grad: 1.6735  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2590(0.2590) 


Epoch 24 - avg_train_loss: 0.1072  avg_val_loss: 0.2140  time: 7s
Epoch 24 - Score: 0.2178


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1528(0.2140) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0925(0.0925) Grad: 1.3927  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1866(0.1229) Grad: 1.9408  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1305(0.1303) Grad: 3.1453  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2222(0.2222) 


Epoch 25 - avg_train_loss: 0.1303  avg_val_loss: 0.1958  time: 7s
Epoch 25 - Score: 0.1976
Epoch 25 - Save Best Score: 0.1976 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1469(0.1958) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0816(0.0816) Grad: 0.9922  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0895(0.1154) Grad: 0.5820  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0887(0.1148) Grad: 0.5402  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2430(0.2430) 


Epoch 26 - avg_train_loss: 0.1148  avg_val_loss: 0.2110  time: 7s
Epoch 26 - Score: 0.2139


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1413(0.2110) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0982(0.0982) Grad: 3.3028  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0688(0.0900) Grad: 1.1414  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0772(0.0886) Grad: 1.3260  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2495(0.2495) 


Epoch 27 - avg_train_loss: 0.0886  avg_val_loss: 0.2049  time: 7s
Epoch 27 - Score: 0.2090


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1395(0.2049) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0507(0.0507) Grad: 2.6751  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0643(0.0718) Grad: 2.6718  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0762(0.0703) Grad: 0.9927  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2510(0.2510) 


Epoch 28 - avg_train_loss: 0.0703  avg_val_loss: 0.2029  time: 7s
Epoch 28 - Score: 0.2077


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1310(0.2029) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0551(0.0551) Grad: 0.6171  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0728(0.0672) Grad: 2.2208  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0932(0.0751) Grad: 0.6629  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2242(0.2242) 


Epoch 29 - avg_train_loss: 0.0751  avg_val_loss: 0.1903  time: 7s
Epoch 29 - Score: 0.1930
Epoch 29 - Save Best Score: 0.1930 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1348(0.1903) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0580(0.0580) Grad: 2.9988  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0737(0.0851) Grad: 3.1515  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1077(0.0894) Grad: 2.3582  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2700(0.2700) 


Epoch 30 - avg_train_loss: 0.0894  avg_val_loss: 0.2223  time: 7s
Epoch 30 - Score: 0.2273


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1373(0.2223) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1195(0.1195) Grad: 3.0988  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0960(0.1033) Grad: 3.1336  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0783(0.1081) Grad: 2.4370  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2488(0.2488) 


Epoch 31 - avg_train_loss: 0.1081  avg_val_loss: 0.2047  time: 7s
Epoch 31 - Score: 0.2098


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1178(0.2047) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0806(0.0806) Grad: 1.9112  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0762(0.1037) Grad: 1.3598  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0879(0.1008) Grad: 0.9029  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2307(0.2307) 


Epoch 32 - avg_train_loss: 0.1008  avg_val_loss: 0.1973  time: 7s
Epoch 32 - Score: 0.2000


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1408(0.1973) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0786(0.0786) Grad: 2.2895  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0764(0.0811) Grad: 1.2277  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0582(0.0750) Grad: 1.1072  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2371(0.2371) 


Epoch 33 - avg_train_loss: 0.0750  avg_val_loss: 0.1989  time: 7s
Epoch 33 - Score: 0.2027


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1255(0.1989) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0828(0.0828) Grad: 1.5413  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0457(0.0578) Grad: 1.6899  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0687(0.0588) Grad: 1.0264  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2382(0.2382) 


Epoch 34 - avg_train_loss: 0.0588  avg_val_loss: 0.2002  time: 7s
Epoch 34 - Score: 0.2050


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1097(0.2002) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0602(0.0602) Grad: 0.6044  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0652(0.0653) Grad: 2.0285  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0522(0.0614) Grad: 2.9373  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2486(0.2486) 


Epoch 35 - avg_train_loss: 0.0614  avg_val_loss: 0.2020  time: 7s
Epoch 35 - Score: 0.2071


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1221(0.2020) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0820(0.0820) Grad: 0.8271  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0797(0.0684) Grad: 2.8598  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0595(0.0694) Grad: 2.2374  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2291(0.2291) 


Epoch 36 - avg_train_loss: 0.0694  avg_val_loss: 0.1952  time: 7s
Epoch 36 - Score: 0.1980


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1350(0.1952) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0643(0.0643) Grad: 1.5334  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0875(0.0915) Grad: 1.0666  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0968(0.0984) Grad: 3.4711  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2438(0.2438) 


Epoch 37 - avg_train_loss: 0.0984  avg_val_loss: 0.2178  time: 7s
Epoch 37 - Score: 0.2193


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1725(0.2178) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0742(0.0742) Grad: 1.1090  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1100(0.0899) Grad: 2.0912  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0829(0.0863) Grad: 2.5602  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2524(0.2524) 


Epoch 38 - avg_train_loss: 0.0863  avg_val_loss: 0.2103  time: 7s
Epoch 38 - Score: 0.2138


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1500(0.2103) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0645(0.0645) Grad: 1.7962  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0662(0.0668) Grad: 3.1631  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0643(0.0693) Grad: 1.4673  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2350(0.2350) 


Epoch 39 - avg_train_loss: 0.0693  avg_val_loss: 0.1960  time: 7s
Epoch 39 - Score: 0.1998


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1272(0.1960) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0544(0.0544) Grad: 0.6950  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0566(0.0540) Grad: 1.5538  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0509(0.0536) Grad: 2.3260  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2251(0.2251) 


Epoch 40 - avg_train_loss: 0.0536  avg_val_loss: 0.1988  time: 7s
Epoch 40 - Score: 0.2014


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1312(0.1988) 


Score: 0.1930


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2480(3.2480) Grad: 3.4100  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7375(1.5668) Grad: 4.2644  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6202(1.1912) Grad: 0.3900  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6786(0.6786) 


Epoch 1 - avg_train_loss: 1.1912  avg_val_loss: 0.6091  time: 7s
Epoch 1 - Score: 0.6119
Epoch 1 - Save Best Score: 0.6119 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5505(0.6091) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7567(0.7567) Grad: 4.0682  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6240(0.6250) Grad: 1.8307  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4101(0.5963) Grad: 3.1411  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5671(0.5671) 


Epoch 2 - avg_train_loss: 0.5963  avg_val_loss: 0.5135  time: 7s
Epoch 2 - Score: 0.5158
Epoch 2 - Save Best Score: 0.5158 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5087(0.5135) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4640(0.4640) Grad: 1.3547  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5310(0.4993) Grad: 1.3352  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3698(0.4933) Grad: 1.5745  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5304(0.5304) 


Epoch 3 - avg_train_loss: 0.4933  avg_val_loss: 0.4734  time: 7s
Epoch 3 - Score: 0.4759
Epoch 3 - Save Best Score: 0.4759 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4531(0.4734) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5441(0.5441) Grad: 2.0848  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4994(0.4496) Grad: 3.1056  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3405(0.4405) Grad: 1.1541  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4816(0.4816) 


Epoch 4 - avg_train_loss: 0.4405  avg_val_loss: 0.4433  time: 7s
Epoch 4 - Score: 0.4449
Epoch 4 - Save Best Score: 0.4449 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4552(0.4433) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3012(0.3012) Grad: 0.8923  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4222(0.4239) Grad: 2.3738  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4463(0.4222) Grad: 2.1178  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4434(0.4434) 


Epoch 5 - avg_train_loss: 0.4222  avg_val_loss: 0.4088  time: 7s
Epoch 5 - Score: 0.4101
Epoch 5 - Save Best Score: 0.4101 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4122(0.4088) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3450(0.3450) Grad: 1.2635  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4049(0.3921) Grad: 2.7890  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2353(0.3703) Grad: 0.5891  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4114(0.4114) 


Epoch 6 - avg_train_loss: 0.3703  avg_val_loss: 0.3779  time: 7s
Epoch 6 - Score: 0.3790
Epoch 6 - Save Best Score: 0.3790 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3478(0.3779) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3003(0.3003) Grad: 0.9887  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4179(0.3643) Grad: 1.0394  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3877(0.3693) Grad: 2.1296  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3761(0.3761) 


Epoch 7 - avg_train_loss: 0.3693  avg_val_loss: 0.3478  time: 7s
Epoch 7 - Score: 0.3487
Epoch 7 - Save Best Score: 0.3487 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3176(0.3478) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2925(0.2925) Grad: 0.6890  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3942(0.3138) Grad: 1.1165  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3033(0.3036) Grad: 4.2240  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3317(0.3317) 


Epoch 8 - avg_train_loss: 0.3036  avg_val_loss: 0.3356  time: 7s
Epoch 8 - Score: 0.3363
Epoch 8 - Save Best Score: 0.3363 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2936(0.3356) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3410(0.3410) Grad: 4.1453  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2778(0.2751) Grad: 1.4776  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2799(0.2645) Grad: 0.9756  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3003(0.3003) 


Epoch 9 - avg_train_loss: 0.2645  avg_val_loss: 0.3125  time: 7s
Epoch 9 - Score: 0.3139
Epoch 9 - Save Best Score: 0.3139 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2638(0.3125) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2099(0.2099) Grad: 3.2054  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2363(0.2366) Grad: 1.6158  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3276(0.2452) Grad: 1.5272  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3111(0.3111) 


Epoch 10 - avg_train_loss: 0.2452  avg_val_loss: 0.3090  time: 7s
Epoch 10 - Score: 0.3096
Epoch 10 - Save Best Score: 0.3096 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2688(0.3090) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1749(0.1749) Grad: 1.1070  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1373(0.2343) Grad: 0.8837  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2750(0.2364) Grad: 1.2289  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2744(0.2744) 


Epoch 11 - avg_train_loss: 0.2364  avg_val_loss: 0.2879  time: 7s
Epoch 11 - Score: 0.2889
Epoch 11 - Save Best Score: 0.2889 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2524(0.2879) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2338(0.2338) Grad: 0.7688  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2064(0.2369) Grad: 0.9997  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2840(0.2300) Grad: 1.7960  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3391(0.3391) 


Epoch 12 - avg_train_loss: 0.2300  avg_val_loss: 0.3427  time: 7s
Epoch 12 - Score: 0.3431


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3121(0.3427) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2721(0.2721) Grad: 3.8062  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2249(0.2526) Grad: 2.4998  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1818(0.2423) Grad: 1.3552  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2502(0.2502) 


Epoch 13 - avg_train_loss: 0.2423  avg_val_loss: 0.2571  time: 7s
Epoch 13 - Score: 0.2583
Epoch 13 - Save Best Score: 0.2583 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2132(0.2571) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2665(0.2665) Grad: 0.9594  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2157(0.2199) Grad: 4.2102  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2174(0.2100) Grad: 3.5279  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2589(0.2589) 


Epoch 14 - avg_train_loss: 0.2100  avg_val_loss: 0.2491  time: 7s
Epoch 14 - Score: 0.2498
Epoch 14 - Save Best Score: 0.2498 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2080(0.2491) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2059(0.2059) Grad: 2.2462  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1879(0.1849) Grad: 1.6716  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1571(0.1804) Grad: 2.2681  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2435(0.2435) 


Epoch 15 - avg_train_loss: 0.1804  avg_val_loss: 0.2456  time: 7s
Epoch 15 - Score: 0.2470
Epoch 15 - Save Best Score: 0.2470 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1931(0.2456) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2178(0.2178) Grad: 2.1747  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2143(0.1664) Grad: 2.1832  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1746(0.1566) Grad: 1.7088  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2189(0.2189) 


Epoch 16 - avg_train_loss: 0.1566  avg_val_loss: 0.2336  time: 7s
Epoch 16 - Score: 0.2353
Epoch 16 - Save Best Score: 0.2353 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1908(0.2336) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1843(0.1843) Grad: 1.9191  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1368(0.1809) Grad: 2.0496  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1674(0.1704) Grad: 1.9307  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2147(0.2147) 


Epoch 17 - avg_train_loss: 0.1704  avg_val_loss: 0.2306  time: 7s
Epoch 17 - Score: 0.2335
Epoch 17 - Save Best Score: 0.2335 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1718(0.2306) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1348(0.1348) Grad: 2.6350  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1611(0.1541) Grad: 1.5474  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1271(0.1608) Grad: 1.4691  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2070(0.2070) 


Epoch 18 - avg_train_loss: 0.1608  avg_val_loss: 0.2162  time: 7s
Epoch 18 - Score: 0.2170
Epoch 18 - Save Best Score: 0.2170 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1876(0.2162) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1371(0.1371) Grad: 0.7140  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1421(0.1526) Grad: 3.2466  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1673(0.1581) Grad: 3.8055  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2216(0.2216) 


Epoch 19 - avg_train_loss: 0.1581  avg_val_loss: 0.2231  time: 7s
Epoch 19 - Score: 0.2235


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1968(0.2231) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1487(0.1487) Grad: 1.2378  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1303(0.1513) Grad: 1.4483  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1431(0.1403) Grad: 1.8667  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2018(0.2018) 


Epoch 20 - avg_train_loss: 0.1403  avg_val_loss: 0.2285  time: 7s
Epoch 20 - Score: 0.2311


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1929(0.2285) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1762(0.1762) Grad: 3.3272  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1140(0.1309) Grad: 3.0182  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1718(0.1246) Grad: 2.9142  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1757(0.1757) 


Epoch 21 - avg_train_loss: 0.1246  avg_val_loss: 0.2174  time: 7s
Epoch 21 - Score: 0.2215


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1968(0.2174) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1153(0.1153) Grad: 4.8178  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0807(0.1024) Grad: 1.9707  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0830(0.1125) Grad: 1.1478  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1739(0.1739) 


Epoch 22 - avg_train_loss: 0.1125  avg_val_loss: 0.2005  time: 7s
Epoch 22 - Score: 0.2034
Epoch 22 - Save Best Score: 0.2034 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1658(0.2005) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1311(0.1311) Grad: 0.4672  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0775(0.1283) Grad: 1.3275  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1163(0.1234) Grad: 1.8103  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1592(0.1592) 


Epoch 23 - avg_train_loss: 0.1234  avg_val_loss: 0.1941  time: 7s
Epoch 23 - Score: 0.1986
Epoch 23 - Save Best Score: 0.1986 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1567(0.1941) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1042(0.1042) Grad: 0.9563  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1454(0.1358) Grad: 4.3153  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0967(0.1294) Grad: 0.7327  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1838(0.1838) 


Epoch 24 - avg_train_loss: 0.1294  avg_val_loss: 0.2002  time: 7s
Epoch 24 - Score: 0.2019


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1671(0.2002) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0995(0.0995) Grad: 2.4340  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1075(0.1142) Grad: 3.9948  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1736(0.1266) Grad: 3.4125  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1942(0.1942) 


Epoch 25 - avg_train_loss: 0.1266  avg_val_loss: 0.2109  time: 7s
Epoch 25 - Score: 0.2122


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1832(0.2109) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1405(0.1405) Grad: 1.8135  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1011(0.1362) Grad: 1.6898  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1369(0.1260) Grad: 1.1255  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1649(0.1649) 


Epoch 26 - avg_train_loss: 0.1260  avg_val_loss: 0.2000  time: 7s
Epoch 26 - Score: 0.2030


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1886(0.2000) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1058(0.1058) Grad: 0.9644  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0830(0.0949) Grad: 2.1042  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1294(0.1001) Grad: 3.4335  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1542(0.1542) 


Epoch 27 - avg_train_loss: 0.1001  avg_val_loss: 0.1947  time: 7s
Epoch 27 - Score: 0.1987


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1816(0.1947) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0645(0.0645) Grad: 1.2511  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0637(0.0831) Grad: 0.6472  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0769(0.0864) Grad: 2.1932  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1472(0.1472) 


Epoch 28 - avg_train_loss: 0.0864  avg_val_loss: 0.1851  time: 7s
Epoch 28 - Score: 0.1883
Epoch 28 - Save Best Score: 0.1883 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1844(0.1851) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1139(0.1139) Grad: 1.0247  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1183(0.0950) Grad: 3.0255  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0895(0.0905) Grad: 3.8298  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1387(0.1387) 


Epoch 29 - avg_train_loss: 0.0905  avg_val_loss: 0.1871  time: 7s
Epoch 29 - Score: 0.1935


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1633(0.1871) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0977(0.0977) Grad: 1.6777  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0566(0.0999) Grad: 0.5966  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0688(0.0947) Grad: 1.8851  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1512(0.1512) 


Epoch 30 - avg_train_loss: 0.0947  avg_val_loss: 0.1839  time: 7s
Epoch 30 - Score: 0.1866
Epoch 30 - Save Best Score: 0.1866 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1745(0.1839) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0893(0.0893) Grad: 3.5522  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0934(0.1038) Grad: 1.3911  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0934(0.1028) Grad: 2.6253  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1552(0.1552) 


Epoch 31 - avg_train_loss: 0.1028  avg_val_loss: 0.1931  time: 7s
Epoch 31 - Score: 0.1958


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2061(0.1931) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1475(0.1475) Grad: 2.7033  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1002(0.1062) Grad: 0.8439  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1395(0.1098) Grad: 2.0085  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1372(0.1372) 


Epoch 32 - avg_train_loss: 0.1098  avg_val_loss: 0.1799  time: 7s
Epoch 32 - Score: 0.1846
Epoch 32 - Save Best Score: 0.1846 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1681(0.1799) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0850(0.0850) Grad: 1.4331  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0873(0.0851) Grad: 1.6715  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0584(0.0850) Grad: 1.1232  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1335(0.1335) 


Epoch 33 - avg_train_loss: 0.0850  avg_val_loss: 0.1816  time: 7s
Epoch 33 - Score: 0.1877


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1655(0.1816) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0540(0.0540) Grad: 1.9960  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0835(0.0709) Grad: 1.6066  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0663(0.0741) Grad: 0.8615  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1398(0.1398) 


Epoch 34 - avg_train_loss: 0.0741  avg_val_loss: 0.1821  time: 7s
Epoch 34 - Score: 0.1861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1821(0.1821) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0558(0.0558) Grad: 1.4125  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0495(0.0674) Grad: 2.5621  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0935(0.0724) Grad: 1.8566  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1245(0.1245) 


Epoch 35 - avg_train_loss: 0.0724  avg_val_loss: 0.1755  time: 7s
Epoch 35 - Score: 0.1809
Epoch 35 - Save Best Score: 0.1809 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1925(0.1755) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0650(0.0650) Grad: 1.2795  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0615(0.0785) Grad: 0.8941  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0558(0.0785) Grad: 1.3880  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1255(0.1255) 


Epoch 36 - avg_train_loss: 0.0785  avg_val_loss: 0.1813  time: 7s
Epoch 36 - Score: 0.1883


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1805(0.1813) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0880(0.0880) Grad: 0.9570  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1568(0.1043) Grad: 4.4565  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0992(0.1048) Grad: 3.3947  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1323(0.1323) 


Epoch 37 - avg_train_loss: 0.1048  avg_val_loss: 0.1814  time: 7s
Epoch 37 - Score: 0.1873


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1719(0.1814) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1024(0.1024) Grad: 2.0094  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1123(0.1071) Grad: 3.5596  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0702(0.0990) Grad: 1.8610  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1309(0.1309) 


Epoch 38 - avg_train_loss: 0.0990  avg_val_loss: 0.1644  time: 7s
Epoch 38 - Score: 0.1679
Epoch 38 - Save Best Score: 0.1679 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1480(0.1644) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0858(0.0858) Grad: 3.6637  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0893(0.0869) Grad: 1.1640  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0786(0.0828) Grad: 3.6117  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1300(0.1300) 


Epoch 39 - avg_train_loss: 0.0828  avg_val_loss: 0.1742  time: 7s
Epoch 39 - Score: 0.1795


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1614(0.1742) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0571(0.0571) Grad: 3.1563  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0735(0.0731) Grad: 3.0248  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0604(0.0706) Grad: 1.8419  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1186(0.1186) 


Epoch 40 - avg_train_loss: 0.0706  avg_val_loss: 0.1631  time: 7s
Epoch 40 - Score: 0.1683


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1571(0.1631) 


Score: 0.1679


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2907(3.2907) Grad: 3.5126  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7410(1.5531) Grad: 2.6236  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5414(1.1606) Grad: 0.7135  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6808(0.6808) 


Epoch 1 - avg_train_loss: 1.1606  avg_val_loss: 0.6975  time: 7s
Epoch 1 - Score: 0.6981
Epoch 1 - Save Best Score: 0.6981 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7599(0.6975) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5540(0.5540) Grad: 1.2085  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4523(0.5731) Grad: 3.4981  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4323(0.5486) Grad: 2.6294  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6059(0.6059) 


Epoch 2 - avg_train_loss: 0.5486  avg_val_loss: 0.5551  time: 7s
Epoch 2 - Score: 0.5568
Epoch 2 - Save Best Score: 0.5568 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5301(0.5551) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5175(0.5175) Grad: 2.3809  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3533(0.4620) Grad: 1.1441  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4209(0.4413) Grad: 0.9355  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5074(0.5074) 


Epoch 3 - avg_train_loss: 0.4413  avg_val_loss: 0.4608  time: 7s
Epoch 3 - Score: 0.4629
Epoch 3 - Save Best Score: 0.4629 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3874(0.4608) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3930(0.3930) Grad: 1.7501  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4626(0.3913) Grad: 1.0107  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3287(0.3907) Grad: 1.6065  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5012(0.5012) 


Epoch 4 - avg_train_loss: 0.3907  avg_val_loss: 0.4507  time: 7s
Epoch 4 - Score: 0.4535
Epoch 4 - Save Best Score: 0.4535 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3578(0.4507) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3687(0.3687) Grad: 2.3433  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4561(0.4006) Grad: 2.6066  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3764(0.4011) Grad: 2.4093  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5026(0.5026) 


Epoch 5 - avg_train_loss: 0.4011  avg_val_loss: 0.4461  time: 7s
Epoch 5 - Score: 0.4495
Epoch 5 - Save Best Score: 0.4495 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3500(0.4461) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4013(0.4013) Grad: 1.4554  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4104(0.4787) Grad: 1.6896  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3334(0.4246) Grad: 0.8388  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4651(0.4651) 


Epoch 6 - avg_train_loss: 0.4246  avg_val_loss: 0.4190  time: 7s
Epoch 6 - Score: 0.4232
Epoch 6 - Save Best Score: 0.4232 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2932(0.4190) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4114(0.4114) Grad: 1.8420  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3382(0.3517) Grad: 0.7609  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3654(0.3454) Grad: 0.7216  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4536(0.4536) 


Epoch 7 - avg_train_loss: 0.3454  avg_val_loss: 0.4247  time: 7s
Epoch 7 - Score: 0.4261


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3540(0.4247) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3108(0.3108) Grad: 2.5566  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3243(0.3035) Grad: 1.2239  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2393(0.3015) Grad: 3.0800  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3934(0.3934) 


Epoch 8 - avg_train_loss: 0.3015  avg_val_loss: 0.3490  time: 7s
Epoch 8 - Score: 0.3555
Epoch 8 - Save Best Score: 0.3555 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2015(0.3490) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3685(0.3685) Grad: 1.1112  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3408(0.2552) Grad: 1.6364  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2399(0.2453) Grad: 0.7951  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3675(0.3675) 


Epoch 9 - avg_train_loss: 0.2453  avg_val_loss: 0.3297  time: 7s
Epoch 9 - Score: 0.3337
Epoch 9 - Save Best Score: 0.3337 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2204(0.3297) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1763(0.1763) Grad: 0.8852  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2504(0.2198) Grad: 1.5923  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1788(0.2336) Grad: 0.8084  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3762(0.3762) 


Epoch 10 - avg_train_loss: 0.2336  avg_val_loss: 0.3437  time: 7s
Epoch 10 - Score: 0.3472


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2362(0.3437) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2348(0.2348) Grad: 1.0694  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2051(0.2029) Grad: 1.7657  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2182(0.2238) Grad: 1.5627  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3738(0.3738) 


Epoch 11 - avg_train_loss: 0.2238  avg_val_loss: 0.3164  time: 7s
Epoch 11 - Score: 0.3234
Epoch 11 - Save Best Score: 0.3234 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1810(0.3164) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2663(0.2663) Grad: 0.6456  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3412(0.2524) Grad: 4.8261  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2084(0.2494) Grad: 3.6108  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3658(0.3658) 


Epoch 12 - avg_train_loss: 0.2494  avg_val_loss: 0.3337  time: 7s
Epoch 12 - Score: 0.3379


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2179(0.3337) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2430(0.2430) Grad: 1.5135  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2253(0.2150) Grad: 1.3134  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1804(0.2055) Grad: 1.4976  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3381(0.3381) 


Epoch 13 - avg_train_loss: 0.2055  avg_val_loss: 0.3053  time: 7s
Epoch 13 - Score: 0.3066
Epoch 13 - Save Best Score: 0.3066 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2725(0.3053) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2552(0.2552) Grad: 1.1020  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1464(0.1772) Grad: 1.1873  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1913(0.1886) Grad: 2.3857  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3107(0.3107) 


Epoch 14 - avg_train_loss: 0.1886  avg_val_loss: 0.2905  time: 7s
Epoch 14 - Score: 0.2945
Epoch 14 - Save Best Score: 0.2945 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1840(0.2905) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1932(0.1932) Grad: 1.5409  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1778(0.1554) Grad: 0.8568  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1695(0.1533) Grad: 0.9230  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3044(0.3044) 


Epoch 15 - avg_train_loss: 0.1533  avg_val_loss: 0.2905  time: 7s
Epoch 15 - Score: 0.2927
Epoch 15 - Save Best Score: 0.2927 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2119(0.2905) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1683(0.1683) Grad: 1.1412  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1619(0.1495) Grad: 0.5418  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1548(0.1397) Grad: 1.9410  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3197(0.3197) 


Epoch 16 - avg_train_loss: 0.1397  avg_val_loss: 0.2884  time: 7s
Epoch 16 - Score: 0.2898
Epoch 16 - Save Best Score: 0.2898 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2433(0.2884) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1070(0.1070) Grad: 0.5425  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1553(0.1355) Grad: 1.4048  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2032(0.1381) Grad: 0.9328  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2993(0.2993) 


Epoch 17 - avg_train_loss: 0.1381  avg_val_loss: 0.2710  time: 7s
Epoch 17 - Score: 0.2726
Epoch 17 - Save Best Score: 0.2726 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2133(0.2710) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1280(0.1280) Grad: 0.6670  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1545(0.1471) Grad: 2.2602  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1518(0.1479) Grad: 1.1830  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2943(0.2943) 


Epoch 18 - avg_train_loss: 0.1479  avg_val_loss: 0.2848  time: 7s
Epoch 18 - Score: 0.2864


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2195(0.2848) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1729(0.1729) Grad: 1.0768  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1970(0.1257) Grad: 0.9677  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1529(0.1442) Grad: 2.0341  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3650(0.3650) 


Epoch 19 - avg_train_loss: 0.1442  avg_val_loss: 0.3052  time: 7s
Epoch 19 - Score: 0.3099


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2226(0.3052) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1869(0.1869) Grad: 4.3681  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1654(0.1658) Grad: 4.0093  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2155(0.1690) Grad: 4.3059  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3119(0.3119) 


Epoch 20 - avg_train_loss: 0.1690  avg_val_loss: 0.3000  time: 7s
Epoch 20 - Score: 0.3011


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2414(0.3000) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1155(0.1155) Grad: 4.0494  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1491(0.1332) Grad: 3.1273  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1386(0.1305) Grad: 3.4722  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3078(0.3078) 


Epoch 21 - avg_train_loss: 0.1305  avg_val_loss: 0.2865  time: 7s
Epoch 21 - Score: 0.2874


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2426(0.2865) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1420(0.1420) Grad: 2.6290  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0811(0.1205) Grad: 2.4411  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0957(0.1114) Grad: 0.7590  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2956(0.2956) 


Epoch 22 - avg_train_loss: 0.1114  avg_val_loss: 0.2786  time: 7s
Epoch 22 - Score: 0.2795


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2311(0.2786) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1335(0.1335) Grad: 1.2951  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0923(0.1046) Grad: 1.0581  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0758(0.1048) Grad: 3.0288  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2953(0.2953) 


Epoch 23 - avg_train_loss: 0.1048  avg_val_loss: 0.2712  time: 7s
Epoch 23 - Score: 0.2743


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1811(0.2712) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0644(0.0644) Grad: 1.8576  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1266(0.1124) Grad: 1.3421  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2007(0.1130) Grad: 2.4472  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3133(0.3133) 


Epoch 24 - avg_train_loss: 0.1130  avg_val_loss: 0.2949  time: 7s
Epoch 24 - Score: 0.2959


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2407(0.2949) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1292(0.1292) Grad: 4.0267  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1363(0.1353) Grad: 3.4812  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1104(0.1339) Grad: 1.2326  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3189(0.3189) 


Epoch 25 - avg_train_loss: 0.1339  avg_val_loss: 0.2695  time: 7s
Epoch 25 - Score: 0.2729


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2109(0.2695) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1302(0.1302) Grad: 3.1393  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1509(0.1054) Grad: 0.8289  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1104(0.1149) Grad: 1.4557  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2887(0.2887) 


Epoch 26 - avg_train_loss: 0.1149  avg_val_loss: 0.2610  time: 7s
Epoch 26 - Score: 0.2630
Epoch 26 - Save Best Score: 0.2630 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1956(0.2610) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0861(0.0861) Grad: 2.4585  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0818(0.0979) Grad: 1.2849  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0761(0.0884) Grad: 2.6713  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2732(0.2732) 


Epoch 27 - avg_train_loss: 0.0884  avg_val_loss: 0.2492  time: 7s
Epoch 27 - Score: 0.2504
Epoch 27 - Save Best Score: 0.2504 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2065(0.2492) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0980(0.0980) Grad: 1.3233  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0600(0.0856) Grad: 2.3484  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0508(0.0772) Grad: 1.6479  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2964(0.2964) 


Epoch 28 - avg_train_loss: 0.0772  avg_val_loss: 0.2647  time: 7s
Epoch 28 - Score: 0.2661


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2291(0.2647) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1020(0.1020) Grad: 0.6265  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0703(0.0814) Grad: 1.2722  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0547(0.0781) Grad: 1.6655  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2740(0.2740) 


Epoch 29 - avg_train_loss: 0.0781  avg_val_loss: 0.2602  time: 7s
Epoch 29 - Score: 0.2611


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2123(0.2602) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0748(0.0748) Grad: 0.6131  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1068(0.0825) Grad: 3.9942  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1150(0.0882) Grad: 1.8816  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2920(0.2920) 


Epoch 30 - avg_train_loss: 0.0882  avg_val_loss: 0.2645  time: 7s
Epoch 30 - Score: 0.2666


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1968(0.2645) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1175(0.1175) Grad: 2.6116  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0772(0.1053) Grad: 0.8958  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0707(0.0999) Grad: 2.9118  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3217(0.3217) 


Epoch 31 - avg_train_loss: 0.0999  avg_val_loss: 0.2879  time: 7s
Epoch 31 - Score: 0.2897


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2316(0.2879) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1204(0.1204) Grad: 3.5816  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1099(0.1306) Grad: 3.2808  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1298(0.1200) Grad: 0.7403  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2676(0.2676) 


Epoch 32 - avg_train_loss: 0.1200  avg_val_loss: 0.2755  time: 7s
Epoch 32 - Score: 0.2769


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2252(0.2755) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1395(0.1395) Grad: 0.9530  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0799(0.0921) Grad: 1.3250  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0683(0.0877) Grad: 2.6094  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2866(0.2866) 


Epoch 33 - avg_train_loss: 0.0877  avg_val_loss: 0.2561  time: 7s
Epoch 33 - Score: 0.2580


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1968(0.2561) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0668(0.0668) Grad: 0.6684  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0753(0.0668) Grad: 0.9964  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0514(0.0699) Grad: 0.7990  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3037(0.3037) 


Epoch 34 - avg_train_loss: 0.0699  avg_val_loss: 0.2765  time: 7s
Epoch 34 - Score: 0.2795


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1885(0.2765) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0619(0.0619) Grad: 0.5137  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0682(0.0651) Grad: 1.1335  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0499(0.0673) Grad: 2.2442  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2735(0.2735) 


Epoch 35 - avg_train_loss: 0.0673  avg_val_loss: 0.2599  time: 7s
Epoch 35 - Score: 0.2608


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2124(0.2599) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0449(0.0449) Grad: 1.3000  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0603(0.0768) Grad: 2.6568  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0490(0.0713) Grad: 1.6007  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2756(0.2756) 


Epoch 36 - avg_train_loss: 0.0713  avg_val_loss: 0.2613  time: 7s
Epoch 36 - Score: 0.2630


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1935(0.2613) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0665(0.0665) Grad: 1.3585  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0913(0.0808) Grad: 1.1535  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0691(0.0799) Grad: 1.9027  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2740(0.2740) 


Epoch 37 - avg_train_loss: 0.0799  avg_val_loss: 0.2449  time: 7s
Epoch 37 - Score: 0.2463
Epoch 37 - Save Best Score: 0.2463 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2058(0.2449) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0655(0.0655) Grad: 1.0548  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0765(0.0788) Grad: 2.5776  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0689(0.0803) Grad: 2.5364  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2829(0.2829) 


Epoch 38 - avg_train_loss: 0.0803  avg_val_loss: 0.2553  time: 7s
Epoch 38 - Score: 0.2580


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1772(0.2553) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0783(0.0783) Grad: 2.9431  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0792(0.0780) Grad: 3.8791  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0386(0.0724) Grad: 0.8998  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2951(0.2951) 


Epoch 39 - avg_train_loss: 0.0724  avg_val_loss: 0.2605  time: 7s
Epoch 39 - Score: 0.2635


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1814(0.2605) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0650(0.0650) Grad: 2.1031  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0464(0.0518) Grad: 1.9940  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0565(0.0557) Grad: 2.4421  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3168(0.3168) 


Epoch 40 - avg_train_loss: 0.0557  avg_val_loss: 0.2828  time: 7s
Epoch 40 - Score: 0.2849


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2186(0.2828) 


Score: 0.2463


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2931(3.2931) Grad: 3.2703  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7843(1.6837) Grad: 2.2456  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6317(1.2738) Grad: 0.3763  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5826(0.5826) 


Epoch 1 - avg_train_loss: 1.2738  avg_val_loss: 0.5653  time: 7s
Epoch 1 - Score: 0.5654
Epoch 1 - Save Best Score: 0.5654 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5536(0.5653) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6213(0.6213) Grad: 1.6406  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7135(0.6115) Grad: 1.6172  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7592(0.5965) Grad: 1.0226  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4829(0.4829) 


Epoch 2 - avg_train_loss: 0.5965  avg_val_loss: 0.4642  time: 7s
Epoch 2 - Score: 0.4650
Epoch 2 - Save Best Score: 0.4650 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4056(0.4642) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7000(0.7000) Grad: 1.0063  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6496(0.5805) Grad: 3.9120  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4768(0.5410) Grad: 1.0885  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4151(0.4151) 


Epoch 3 - avg_train_loss: 0.5410  avg_val_loss: 0.4138  time: 7s
Epoch 3 - Score: 0.4146
Epoch 3 - Save Best Score: 0.4146 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3607(0.4138) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3170(0.3170) Grad: 1.6156  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5128(0.4916) Grad: 1.2406  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4875(0.4952) Grad: 1.1946  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4222(0.4222) 


Epoch 4 - avg_train_loss: 0.4952  avg_val_loss: 0.4156  time: 7s
Epoch 4 - Score: 0.4164


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3608(0.4156) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4286(0.4286) Grad: 0.7552  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5199(0.4994) Grad: 1.0837  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5588(0.4727) Grad: 2.1422  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5121(0.5121) 


Epoch 5 - avg_train_loss: 0.4727  avg_val_loss: 0.5079  time: 7s
Epoch 5 - Score: 0.5081


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4722(0.5079) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4441(0.4441) Grad: 4.1172  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4050(0.4322) Grad: 0.7750  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3592(0.4317) Grad: 1.6964  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3542(0.3542) 


Epoch 6 - avg_train_loss: 0.4317  avg_val_loss: 0.3414  time: 7s
Epoch 6 - Score: 0.3464
Epoch 6 - Save Best Score: 0.3464 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2141(0.3414) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4513(0.4513) Grad: 1.0021  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3406(0.3742) Grad: 0.6393  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3070(0.3517) Grad: 2.7324  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3057(0.3057) 


Epoch 7 - avg_train_loss: 0.3517  avg_val_loss: 0.2839  time: 7s
Epoch 7 - Score: 0.2866
Epoch 7 - Save Best Score: 0.2866 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1955(0.2839) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2234(0.2234) Grad: 2.8211  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2494(0.3108) Grad: 1.1555  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3325(0.3077) Grad: 2.3937  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3400(0.3400) 


Epoch 8 - avg_train_loss: 0.3077  avg_val_loss: 0.3172  time: 7s
Epoch 8 - Score: 0.3179


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2798(0.3172) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2881(0.2881) Grad: 2.1324  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2558(0.2676) Grad: 0.7188  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2909(0.2652) Grad: 2.1446  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2786(0.2786) 


Epoch 9 - avg_train_loss: 0.2652  avg_val_loss: 0.2545  time: 7s
Epoch 9 - Score: 0.2564
Epoch 9 - Save Best Score: 0.2564 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1880(0.2545) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2243(0.2243) Grad: 0.8524  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2677(0.2561) Grad: 1.4219  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2223(0.2441) Grad: 2.5196  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2860(0.2860) 


Epoch 10 - avg_train_loss: 0.2441  avg_val_loss: 0.2541  time: 7s
Epoch 10 - Score: 0.2570


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1750(0.2541) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3203(0.3203) Grad: 0.6073  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2955(0.2290) Grad: 2.4097  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2254(0.2379) Grad: 0.6889  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2768(0.2768) 


Epoch 11 - avg_train_loss: 0.2379  avg_val_loss: 0.2568  time: 7s
Epoch 11 - Score: 0.2590


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1832(0.2568) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2263(0.2263) Grad: 1.3166  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2559(0.2316) Grad: 1.3091  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1887(0.2374) Grad: 1.7053  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2577(0.2577) 


Epoch 12 - avg_train_loss: 0.2374  avg_val_loss: 0.2563  time: 7s
Epoch 12 - Score: 0.2573


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2082(0.2563) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2734(0.2734) Grad: 0.4641  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3059(0.2228) Grad: 2.6834  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1985(0.2335) Grad: 1.6837  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2428(0.2428) 


Epoch 13 - avg_train_loss: 0.2335  avg_val_loss: 0.2446  time: 7s
Epoch 13 - Score: 0.2454
Epoch 13 - Save Best Score: 0.2454 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2058(0.2446) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1780(0.1780) Grad: 1.2142  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1751(0.1972) Grad: 0.9792  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1688(0.1913) Grad: 0.6287  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2378(0.2378) 


Epoch 14 - avg_train_loss: 0.1913  avg_val_loss: 0.2298  time: 7s
Epoch 14 - Score: 0.2311
Epoch 14 - Save Best Score: 0.2311 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1754(0.2298) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2082(0.2082) Grad: 2.3750  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1803(0.1531) Grad: 3.3936  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1224(0.1589) Grad: 2.5636  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2383(0.2383) 


Epoch 15 - avg_train_loss: 0.1589  avg_val_loss: 0.2105  time: 7s
Epoch 15 - Score: 0.2137
Epoch 15 - Save Best Score: 0.2137 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1331(0.2105) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1134(0.1134) Grad: 2.6788  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1763(0.1361) Grad: 1.4168  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0992(0.1335) Grad: 1.5367  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2220(0.2220) 


Epoch 16 - avg_train_loss: 0.1335  avg_val_loss: 0.2021  time: 7s
Epoch 16 - Score: 0.2046
Epoch 16 - Save Best Score: 0.2046 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1335(0.2021) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1883(0.1883) Grad: 2.3821  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1104(0.1469) Grad: 1.6307  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0577(0.1359) Grad: 0.8236  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2052(0.2052) 


Epoch 17 - avg_train_loss: 0.1359  avg_val_loss: 0.1955  time: 7s
Epoch 17 - Score: 0.1983
Epoch 17 - Save Best Score: 0.1983 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1233(0.1955) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1497(0.1497) Grad: 1.0346  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1264(0.1356) Grad: 3.6309  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1849(0.1395) Grad: 2.6204  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2361(0.2361) 


Epoch 18 - avg_train_loss: 0.1395  avg_val_loss: 0.2255  time: 7s
Epoch 18 - Score: 0.2266


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1762(0.2255) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1682(0.1682) Grad: 3.0300  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1838(0.1442) Grad: 4.1050  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1260(0.1522) Grad: 1.7378  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2361(0.2361) 


Epoch 19 - avg_train_loss: 0.1522  avg_val_loss: 0.2310  time: 7s
Epoch 19 - Score: 0.2362


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1262(0.2310) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1657(0.1657) Grad: 3.2844  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1477(0.1280) Grad: 3.1758  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1245(0.1432) Grad: 1.8337  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1870(0.1870) 


Epoch 20 - avg_train_loss: 0.1432  avg_val_loss: 0.1853  time: 7s
Epoch 20 - Score: 0.1870
Epoch 20 - Save Best Score: 0.1870 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1317(0.1853) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1025(0.1025) Grad: 0.9895  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1002(0.1148) Grad: 1.0682  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1308(0.1075) Grad: 1.1002  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2016(0.2016) 


Epoch 21 - avg_train_loss: 0.1075  avg_val_loss: 0.1955  time: 7s
Epoch 21 - Score: 0.1985


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1220(0.1955) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0980(0.0980) Grad: 1.5823  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1221(0.0922) Grad: 0.8299  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0901(0.0971) Grad: 1.0205  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1949(0.1949) 


Epoch 22 - avg_train_loss: 0.0971  avg_val_loss: 0.1837  time: 7s
Epoch 22 - Score: 0.1857
Epoch 22 - Save Best Score: 0.1857 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1229(0.1837) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0897(0.0897) Grad: 1.1029  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1124(0.0928) Grad: 0.6157  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0785(0.0893) Grad: 2.2508  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1862(0.1862) 


Epoch 23 - avg_train_loss: 0.0893  avg_val_loss: 0.1813  time: 7s
Epoch 23 - Score: 0.1848
Epoch 23 - Save Best Score: 0.1848 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1056(0.1813) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0785(0.0785) Grad: 1.1227  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0932(0.1068) Grad: 4.1247  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1071(0.1054) Grad: 2.0731  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2047(0.2047) 


Epoch 24 - avg_train_loss: 0.1054  avg_val_loss: 0.2021  time: 7s
Epoch 24 - Score: 0.2034


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1532(0.2021) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1203(0.1203) Grad: 1.8409  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1031(0.1095) Grad: 2.0321  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1069(0.1179) Grad: 1.3342  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 1s) Loss: 0.2337(0.2337) 


Epoch 25 - avg_train_loss: 0.1179  avg_val_loss: 0.2283  time: 7s
Epoch 25 - Score: 0.2307


EVAL: [2/3] Elapsed 0m 1s (remain 0m 0s) Loss: 0.1560(0.2283) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1491(0.1491) Grad: 3.8787  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0781(0.1216) Grad: 1.6109  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1139(0.1159) Grad: 0.7390  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2022(0.2022) 


Epoch 26 - avg_train_loss: 0.1159  avg_val_loss: 0.1936  time: 7s
Epoch 26 - Score: 0.1958


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1280(0.1936) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0979(0.0979) Grad: 0.7446  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0710(0.0823) Grad: 1.5183  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0815(0.0879) Grad: 0.8933  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1870(0.1870) 


Epoch 27 - avg_train_loss: 0.0879  avg_val_loss: 0.1795  time: 7s
Epoch 27 - Score: 0.1828
Epoch 27 - Save Best Score: 0.1828 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1048(0.1795) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0744(0.0744) Grad: 1.0823  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0757(0.0774) Grad: 0.7862  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0750(0.0781) Grad: 0.7767  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1899(0.1899) 


Epoch 28 - avg_train_loss: 0.0781  avg_val_loss: 0.1789  time: 7s
Epoch 28 - Score: 0.1823
Epoch 28 - Save Best Score: 0.1823 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1015(0.1789) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0813(0.0813) Grad: 1.1183  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0717(0.0713) Grad: 0.8219  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1271(0.0760) Grad: 1.1025  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1870(0.1870) 


Epoch 29 - avg_train_loss: 0.0760  avg_val_loss: 0.1811  time: 7s
Epoch 29 - Score: 0.1840


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1104(0.1811) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1124(0.1124) Grad: 0.7383  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0849(0.0927) Grad: 2.4075  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0977(0.0906) Grad: 2.2473  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1979(0.1979) 


Epoch 30 - avg_train_loss: 0.0906  avg_val_loss: 0.1970  time: 7s
Epoch 30 - Score: 0.2005


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1189(0.1970) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1124(0.1124) Grad: 1.6770  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1291(0.1057) Grad: 2.1422  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1085(0.1036) Grad: 3.3457  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1933(0.1933) 


Epoch 31 - avg_train_loss: 0.1036  avg_val_loss: 0.2002  time: 7s
Epoch 31 - Score: 0.2034


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1326(0.2002) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1195(0.1195) Grad: 1.4060  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0942(0.1168) Grad: 1.7584  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0958(0.1079) Grad: 2.9601  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2050(0.2050) 


Epoch 32 - avg_train_loss: 0.1079  avg_val_loss: 0.1941  time: 7s
Epoch 32 - Score: 0.1988


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0997(0.1941) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1178(0.1178) Grad: 1.2975  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0677(0.0891) Grad: 1.8522  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0812(0.0827) Grad: 3.2170  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1658(0.1658) 


Epoch 33 - avg_train_loss: 0.0827  avg_val_loss: 0.1615  time: 7s
Epoch 33 - Score: 0.1648
Epoch 33 - Save Best Score: 0.1648 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0915(0.1615) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0857(0.0857) Grad: 0.8871  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0695(0.0718) Grad: 1.4490  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0613(0.0672) Grad: 1.5442  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1595(0.1595) 


Epoch 34 - avg_train_loss: 0.0672  avg_val_loss: 0.1606  time: 7s
Epoch 34 - Score: 0.1632
Epoch 34 - Save Best Score: 0.1632 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1024(0.1606) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0629(0.0629) Grad: 1.2488  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0585(0.0638) Grad: 0.8930  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0526(0.0612) Grad: 1.2533  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1773(0.1773) 


Epoch 35 - avg_train_loss: 0.0612  avg_val_loss: 0.1738  time: 7s
Epoch 35 - Score: 0.1777


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0961(0.1738) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0538(0.0538) Grad: 1.2759  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0654(0.0746) Grad: 2.7105  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0781(0.0728) Grad: 4.3122  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1820(0.1820) 


Epoch 36 - avg_train_loss: 0.0728  avg_val_loss: 0.1706  time: 7s
Epoch 36 - Score: 0.1738


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0984(0.1706) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0692(0.0692) Grad: 0.6881  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1045(0.0959) Grad: 2.8987  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1083(0.1006) Grad: 4.4210  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1957(0.1957) 


Epoch 37 - avg_train_loss: 0.1006  avg_val_loss: 0.1868  time: 7s
Epoch 37 - Score: 0.1891


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1229(0.1868) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1206(0.1206) Grad: 4.3552  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0667(0.1089) Grad: 2.1449  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0831(0.1073) Grad: 1.6724  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2213(0.2213) 


Epoch 38 - avg_train_loss: 0.1073  avg_val_loss: 0.1964  time: 7s
Epoch 38 - Score: 0.2018


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0932(0.1964) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1040(0.1040) Grad: 2.3621  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1292(0.0855) Grad: 0.9764  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0514(0.0751) Grad: 1.7334  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1684(0.1684) 


Epoch 39 - avg_train_loss: 0.0751  avg_val_loss: 0.1685  time: 7s
Epoch 39 - Score: 0.1701


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1207(0.1685) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0539(0.0539) Grad: 1.6983  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0501(0.0547) Grad: 0.8335  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0485(0.0570) Grad: 1.4073  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1809(0.1809) 


Epoch 40 - avg_train_loss: 0.0570  avg_val_loss: 0.1702  time: 7s
Epoch 40 - Score: 0.1728


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1048(0.1702) 


Score: 0.1632


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3771(3.3771) Grad: 3.2795  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6721(1.6065) Grad: 3.2297  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5455(1.1962) Grad: 1.8839  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6508(0.6508) 


Epoch 1 - avg_train_loss: 1.1962  avg_val_loss: 0.6381  time: 7s
Epoch 1 - Score: 0.6409
Epoch 1 - Save Best Score: 0.6409 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7482(0.6381) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6617(0.6617) Grad: 1.4600  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5500(0.5727) Grad: 1.1310  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5453(0.5575) Grad: 3.7970  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5968(0.5968) 


Epoch 2 - avg_train_loss: 0.5575  avg_val_loss: 0.5652  time: 7s
Epoch 2 - Score: 0.5666
Epoch 2 - Save Best Score: 0.5666 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6022(0.5652) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6280(0.6280) Grad: 4.8583  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3982(0.5172) Grad: 0.9784  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3750(0.4984) Grad: 0.7902  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5303(0.5303) 


Epoch 3 - avg_train_loss: 0.4984  avg_val_loss: 0.4990  time: 7s
Epoch 3 - Score: 0.5001
Epoch 3 - Save Best Score: 0.5001 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5178(0.4990) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5160(0.5160) Grad: 1.4370  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4001(0.4692) Grad: 1.0861  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5531(0.4533) Grad: 0.6926  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5138(0.5138) 


Epoch 4 - avg_train_loss: 0.4533  avg_val_loss: 0.4847  time: 7s
Epoch 4 - Score: 0.4856
Epoch 4 - Save Best Score: 0.4856 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4999(0.4847) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4583(0.4583) Grad: 1.1449  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3752(0.4512) Grad: 1.2387  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3438(0.4401) Grad: 1.3156  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4345(0.4345) 


Epoch 5 - avg_train_loss: 0.4401  avg_val_loss: 0.4193  time: 7s
Epoch 5 - Score: 0.4195
Epoch 5 - Save Best Score: 0.4195 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4103(0.4193) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3969(0.3969) Grad: 1.0554  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4111(0.4566) Grad: 0.7785  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3440(0.4235) Grad: 1.9184  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4065(0.4065) 


Epoch 6 - avg_train_loss: 0.4235  avg_val_loss: 0.3755  time: 7s
Epoch 6 - Score: 0.3774
Epoch 6 - Save Best Score: 0.3774 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2981(0.3755) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2734(0.2734) Grad: 0.9764  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3159(0.3450) Grad: 3.0324  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3369(0.3374) Grad: 2.1558  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3958(0.3958) 


Epoch 7 - avg_train_loss: 0.3374  avg_val_loss: 0.3912  time: 7s
Epoch 7 - Score: 0.3946


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2824(0.3912) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4144(0.4144) Grad: 2.5326  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3557(0.3219) Grad: 3.0384  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2260(0.3003) Grad: 0.9105  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3012(0.3012) 


Epoch 8 - avg_train_loss: 0.3003  avg_val_loss: 0.3220  time: 7s
Epoch 8 - Score: 0.3235
Epoch 8 - Save Best Score: 0.3235 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2834(0.3220) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2938(0.2938) Grad: 2.0643  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2846(0.2506) Grad: 1.8296  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2876(0.2558) Grad: 1.1384  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2898(0.2898) 


Epoch 9 - avg_train_loss: 0.2558  avg_val_loss: 0.2966  time: 7s
Epoch 9 - Score: 0.2977
Epoch 9 - Save Best Score: 0.2977 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2494(0.2966) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2189(0.2189) Grad: 2.5533  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2800(0.2545) Grad: 1.6856  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1518(0.2367) Grad: 0.7185  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2889(0.2889) 


Epoch 10 - avg_train_loss: 0.2367  avg_val_loss: 0.2874  time: 7s
Epoch 10 - Score: 0.2904
Epoch 10 - Save Best Score: 0.2904 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1995(0.2874) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1759(0.1759) Grad: 1.0788  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3212(0.2504) Grad: 3.7272  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2836(0.2573) Grad: 1.8189  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2860(0.2860) 


Epoch 11 - avg_train_loss: 0.2573  avg_val_loss: 0.2858  time: 7s
Epoch 11 - Score: 0.2887
Epoch 11 - Save Best Score: 0.2887 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2017(0.2858) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3174(0.3174) Grad: 1.3667  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2956(0.2637) Grad: 2.7666  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2900(0.2550) Grad: 2.5108  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2599(0.2599) 


Epoch 12 - avg_train_loss: 0.2550  avg_val_loss: 0.2800  time: 7s
Epoch 12 - Score: 0.2850
Epoch 12 - Save Best Score: 0.2850 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1901(0.2800) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2237(0.2237) Grad: 0.6439  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2590(0.2363) Grad: 0.8629  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2436(0.2256) Grad: 2.4268  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2365(0.2365) 


Epoch 13 - avg_train_loss: 0.2256  avg_val_loss: 0.2593  time: 7s
Epoch 13 - Score: 0.2631
Epoch 13 - Save Best Score: 0.2631 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1931(0.2593) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2452(0.2452) Grad: 2.4326  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1248(0.1885) Grad: 0.9744  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1889(0.1868) Grad: 0.5427  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1985(0.1985) 


Epoch 14 - avg_train_loss: 0.1868  avg_val_loss: 0.2276  time: 7s
Epoch 14 - Score: 0.2348
Epoch 14 - Save Best Score: 0.2348 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1414(0.2276) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1492(0.1492) Grad: 0.6642  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1730(0.1510) Grad: 0.6099  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1921(0.1543) Grad: 0.6204  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2193(0.2193) 


Epoch 15 - avg_train_loss: 0.1543  avg_val_loss: 0.2356  time: 7s
Epoch 15 - Score: 0.2414


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1431(0.2356) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2099(0.2099) Grad: 0.9613  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1847(0.1578) Grad: 0.8696  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1416(0.1451) Grad: 0.8879  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1953(0.1953) 


Epoch 16 - avg_train_loss: 0.1451  avg_val_loss: 0.2295  time: 7s
Epoch 16 - Score: 0.2374


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1438(0.2295) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1060(0.1060) Grad: 0.8373  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1419(0.1447) Grad: 2.2231  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1803(0.1457) Grad: 0.8271  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2159(0.2159) 


Epoch 17 - avg_train_loss: 0.1457  avg_val_loss: 0.2492  time: 7s
Epoch 17 - Score: 0.2586


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1427(0.2492) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1804(0.1804) Grad: 1.0111  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1557(0.1560) Grad: 3.6120  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1761(0.1586) Grad: 2.6621  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2196(0.2196) 


Epoch 18 - avg_train_loss: 0.1586  avg_val_loss: 0.2655  time: 7s
Epoch 18 - Score: 0.2740


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1842(0.2655) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1117(0.1117) Grad: 1.0917  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1593(0.1402) Grad: 3.2138  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1989(0.1499) Grad: 1.8315  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2157(0.2157) 


Epoch 19 - avg_train_loss: 0.1499  avg_val_loss: 0.2663  time: 7s
Epoch 19 - Score: 0.2753


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1893(0.2663) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1234(0.1234) Grad: 0.8796  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1412(0.1312) Grad: 1.1831  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0937(0.1357) Grad: 1.1856  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2185(0.2185) 


Epoch 20 - avg_train_loss: 0.1357  avg_val_loss: 0.2636  time: 7s
Epoch 20 - Score: 0.2748


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1567(0.2636) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1283(0.1283) Grad: 0.8159  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1163(0.1119) Grad: 2.2639  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1420(0.1139) Grad: 1.0808  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2325(0.2325) 


Epoch 21 - avg_train_loss: 0.1139  avg_val_loss: 0.2657  time: 7s
Epoch 21 - Score: 0.2719


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1858(0.2657) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0887(0.0887) Grad: 2.0103  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0846(0.1019) Grad: 0.8333  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1278(0.1010) Grad: 0.6175  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2044(0.2044) 


Epoch 22 - avg_train_loss: 0.1010  avg_val_loss: 0.2415  time: 7s
Epoch 22 - Score: 0.2493


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1584(0.2415) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1053(0.1053) Grad: 0.9427  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1105(0.1027) Grad: 0.8949  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1051(0.1033) Grad: 1.3250  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1913(0.1913) 


Epoch 23 - avg_train_loss: 0.1033  avg_val_loss: 0.2433  time: 7s
Epoch 23 - Score: 0.2522


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1776(0.2433) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0764(0.0764) Grad: 1.0246  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1143(0.1169) Grad: 0.8029  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0767(0.1130) Grad: 1.9321  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2112(0.2112) 


Epoch 24 - avg_train_loss: 0.1130  avg_val_loss: 0.2461  time: 7s
Epoch 24 - Score: 0.2529


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1677(0.2461) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0935(0.0935) Grad: 0.7604  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0866(0.1104) Grad: 0.6762  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1540(0.1161) Grad: 1.7163  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2307(0.2307) 


Epoch 25 - avg_train_loss: 0.1161  avg_val_loss: 0.2541  time: 7s
Epoch 25 - Score: 0.2662


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1135(0.2541) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0767(0.0767) Grad: 1.3011  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0889(0.1077) Grad: 1.8863  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0934(0.1115) Grad: 1.2688  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2392(0.2392) 


Epoch 26 - avg_train_loss: 0.1115  avg_val_loss: 0.2541  time: 7s
Epoch 26 - Score: 0.2623


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1334(0.2541) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1045(0.1045) Grad: 1.6607  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0691(0.1024) Grad: 1.6571  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0806(0.0990) Grad: 1.1395  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2234(0.2234) 


Epoch 27 - avg_train_loss: 0.0990  avg_val_loss: 0.2356  time: 7s
Epoch 27 - Score: 0.2447


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1102(0.2356) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0559(0.0559) Grad: 1.4775  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0948(0.0822) Grad: 1.6917  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0852(0.0827) Grad: 1.5296  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1941(0.1941) 


Epoch 28 - avg_train_loss: 0.0827  avg_val_loss: 0.2237  time: 7s
Epoch 28 - Score: 0.2330
Epoch 28 - Save Best Score: 0.2330 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1215(0.2237) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0972(0.0972) Grad: 1.8584  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0872(0.0946) Grad: 2.3927  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0803(0.0830) Grad: 0.7508  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2121(0.2121) 


Epoch 29 - avg_train_loss: 0.0830  avg_val_loss: 0.2340  time: 7s
Epoch 29 - Score: 0.2449


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1064(0.2340) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0660(0.0660) Grad: 2.2436  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1045(0.0837) Grad: 0.6025  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0952(0.0871) Grad: 3.2254  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1976(0.1976) 


Epoch 30 - avg_train_loss: 0.0871  avg_val_loss: 0.2263  time: 7s
Epoch 30 - Score: 0.2365


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1149(0.2263) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0881(0.0881) Grad: 2.7841  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1112(0.1025) Grad: 3.0129  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1367(0.1083) Grad: 4.2764  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2101(0.2101) 


Epoch 31 - avg_train_loss: 0.1083  avg_val_loss: 0.2467  time: 7s
Epoch 31 - Score: 0.2614


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1068(0.2467) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0863(0.0863) Grad: 1.1709  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0920(0.1071) Grad: 1.2145  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0769(0.1027) Grad: 0.9973  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1778(0.1778) 


Epoch 32 - avg_train_loss: 0.1027  avg_val_loss: 0.2176  time: 7s
Epoch 32 - Score: 0.2274
Epoch 32 - Save Best Score: 0.2274 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1288(0.2176) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0841(0.0841) Grad: 2.8766  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0890(0.0854) Grad: 1.7298  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0873(0.0835) Grad: 0.6714  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1902(0.1902) 


Epoch 33 - avg_train_loss: 0.0835  avg_val_loss: 0.2240  time: 7s
Epoch 33 - Score: 0.2352


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1124(0.2240) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0897(0.0897) Grad: 1.9800  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0765(0.0674) Grad: 1.5645  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0481(0.0662) Grad: 0.6616  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2084(0.2084) 


Epoch 34 - avg_train_loss: 0.0662  avg_val_loss: 0.2309  time: 7s
Epoch 34 - Score: 0.2415


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1071(0.2309) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0519(0.0519) Grad: 1.5749  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0639(0.0684) Grad: 2.1914  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0644(0.0682) Grad: 0.6819  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2205(0.2205) 


Epoch 35 - avg_train_loss: 0.0682  avg_val_loss: 0.2353  time: 7s
Epoch 35 - Score: 0.2497


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0762(0.2353) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0563(0.0563) Grad: 1.9923  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0566(0.0739) Grad: 0.9419  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0962(0.0790) Grad: 1.4641  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1952(0.1952) 


Epoch 36 - avg_train_loss: 0.0790  avg_val_loss: 0.2263  time: 7s
Epoch 36 - Score: 0.2368


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1153(0.2263) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0735(0.0735) Grad: 2.0410  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0957(0.0905) Grad: 2.1009  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0774(0.0904) Grad: 1.5306  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2159(0.2159) 


Epoch 37 - avg_train_loss: 0.0904  avg_val_loss: 0.2340  time: 7s
Epoch 37 - Score: 0.2488


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0762(0.2340) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0997(0.0997) Grad: 0.9954  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0930(0.1000) Grad: 1.4791  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1150(0.0944) Grad: 0.9229  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2065(0.2065) 


Epoch 38 - avg_train_loss: 0.0944  avg_val_loss: 0.2480  time: 7s
Epoch 38 - Score: 0.2561


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1673(0.2480) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0955(0.0955) Grad: 4.3347  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0543(0.0796) Grad: 1.1519  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0504(0.0741) Grad: 2.0091  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2162(0.2162) 


Epoch 39 - avg_train_loss: 0.0741  avg_val_loss: 0.2378  time: 7s
Epoch 39 - Score: 0.2448


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1400(0.2378) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0583(0.0583) Grad: 1.2845  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0543(0.0536) Grad: 1.5534  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0882(0.0587) Grad: 1.0556  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2025(0.2025) 


Epoch 40 - avg_train_loss: 0.0587  avg_val_loss: 0.2330  time: 7s
Epoch 40 - Score: 0.2465


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0985(0.2330) 


Score: 0.2274
Score: 0.2022


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3750(3.3750) Grad: 3.1892  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.8027(1.5839) Grad: 3.7007  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5940(1.2059) Grad: 0.1434  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5994(0.5994) 


Epoch 1 - avg_train_loss: 1.2059  avg_val_loss: 0.6042  time: 7s
Epoch 1 - Score: 0.6061
Epoch 1 - Save Best Score: 0.6061 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7032(0.6042) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6012(0.6012) Grad: 1.2514  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6253(0.6132) Grad: 0.8835  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6179(0.5960) Grad: 1.8457  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5240(0.5240) 


Epoch 2 - avg_train_loss: 0.5960  avg_val_loss: 0.5335  time: 7s
Epoch 2 - Score: 0.5345
Epoch 2 - Save Best Score: 0.5345 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6068(0.5335) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6029(0.6029) Grad: 4.2531  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5373(0.4860) Grad: 2.0839  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4456(0.4863) Grad: 3.5908  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4157(0.4157) 


Epoch 3 - avg_train_loss: 0.4863  avg_val_loss: 0.4201  time: 7s
Epoch 3 - Score: 0.4233
Epoch 3 - Save Best Score: 0.4233 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5308(0.4201) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4092(0.4092) Grad: 3.1207  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4809(0.4317) Grad: 2.1031  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4777(0.4378) Grad: 1.1762  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3910(0.3910) 


Epoch 4 - avg_train_loss: 0.4378  avg_val_loss: 0.4122  time: 8s
Epoch 4 - Score: 0.4151
Epoch 4 - Save Best Score: 0.4151 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5217(0.4122) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4231(0.4231) Grad: 2.5743  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3561(0.4082) Grad: 1.1252  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3466(0.4164) Grad: 1.1400  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3689(0.3689) 


Epoch 5 - avg_train_loss: 0.4164  avg_val_loss: 0.3804  time: 7s
Epoch 5 - Score: 0.3827
Epoch 5 - Save Best Score: 0.3827 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4726(0.3804) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4266(0.4266) Grad: 1.4556  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4881(0.4024) Grad: 4.0782  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4671(0.3896) Grad: 2.3108  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3918(0.3918) 


Epoch 6 - avg_train_loss: 0.3896  avg_val_loss: 0.4166  time: 7s
Epoch 6 - Score: 0.4175


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4734(0.4166) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4234(0.4234) Grad: 1.6595  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4143(0.3662) Grad: 3.7817  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5346(0.3924) Grad: 4.0868  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4511(0.4511) 


Epoch 7 - avg_train_loss: 0.3924  avg_val_loss: 0.4438  time: 7s
Epoch 7 - Score: 0.4446


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4921(0.4438) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5197(0.5197) Grad: 2.8320  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4237(0.4076) Grad: 3.7734  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3420(0.3809) Grad: 1.2293  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3602(0.3602) 


Epoch 8 - avg_train_loss: 0.3809  avg_val_loss: 0.3603  time: 7s
Epoch 8 - Score: 0.3629
Epoch 8 - Save Best Score: 0.3629 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4497(0.3603) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2951(0.2951) Grad: 2.1128  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2987(0.2997) Grad: 1.1490  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2478(0.3040) Grad: 0.5362  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3236(0.3236) 


Epoch 9 - avg_train_loss: 0.3040  avg_val_loss: 0.3013  time: 7s
Epoch 9 - Score: 0.3029
Epoch 9 - Save Best Score: 0.3029 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3372(0.3013) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2409(0.2409) Grad: 1.1610  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2613(0.2886) Grad: 1.5653  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2759(0.2790) Grad: 0.9099  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3189(0.3189) 


Epoch 10 - avg_train_loss: 0.2790  avg_val_loss: 0.2963  time: 7s
Epoch 10 - Score: 0.2980
Epoch 10 - Save Best Score: 0.2980 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3312(0.2963) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3000(0.3000) Grad: 0.7941  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2570(0.2730) Grad: 1.2076  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2366(0.2683) Grad: 0.6664  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3145(0.3145) 


Epoch 11 - avg_train_loss: 0.2683  avg_val_loss: 0.2896  time: 7s
Epoch 11 - Score: 0.2909
Epoch 11 - Save Best Score: 0.2909 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3070(0.2896) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2660(0.2660) Grad: 1.6302  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2900(0.2581) Grad: 0.5834  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2073(0.2568) Grad: 1.3677  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3113(0.3113) 


Epoch 12 - avg_train_loss: 0.2568  avg_val_loss: 0.2879  time: 7s
Epoch 12 - Score: 0.2889
Epoch 12 - Save Best Score: 0.2889 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2968(0.2879) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1774(0.1774) Grad: 1.6541  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1680(0.2263) Grad: 0.7831  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2966(0.2471) Grad: 1.6616  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2401(0.2401) 


Epoch 13 - avg_train_loss: 0.2471  avg_val_loss: 0.2502  time: 7s
Epoch 13 - Score: 0.2504
Epoch 13 - Save Best Score: 0.2504 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2656(0.2502) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2153(0.2153) Grad: 1.3718  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1790(0.2108) Grad: 0.9331  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2266(0.2034) Grad: 2.1123  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2249(0.2249) 


Epoch 14 - avg_train_loss: 0.2034  avg_val_loss: 0.2330  time: 7s
Epoch 14 - Score: 0.2331
Epoch 14 - Save Best Score: 0.2331 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2382(0.2330) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1254(0.1254) Grad: 1.9796  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1331(0.1580) Grad: 1.2219  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0769(0.1680) Grad: 1.1474  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2093(0.2093) 


Epoch 15 - avg_train_loss: 0.1680  avg_val_loss: 0.2182  time: 7s
Epoch 15 - Score: 0.2184
Epoch 15 - Save Best Score: 0.2184 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2107(0.2182) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1846(0.1846) Grad: 0.9370  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1513(0.1612) Grad: 1.3653  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0906(0.1498) Grad: 0.6331  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 1s) Loss: 0.2012(0.2012) 


Epoch 16 - avg_train_loss: 0.1498  avg_val_loss: 0.2129  time: 8s
Epoch 16 - Score: 0.2132
Epoch 16 - Save Best Score: 0.2132 Model


EVAL: [2/3] Elapsed 0m 1s (remain 0m 0s) Loss: 0.2109(0.2129) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1144(0.1144) Grad: 0.7919  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1283(0.1644) Grad: 1.0244  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0967(0.1499) Grad: 1.6225  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2045(0.2045) 


Epoch 17 - avg_train_loss: 0.1499  avg_val_loss: 0.2173  time: 7s
Epoch 17 - Score: 0.2179


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2052(0.2173) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1464(0.1464) Grad: 1.0105  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1499(0.1546) Grad: 1.9198  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1670(0.1616) Grad: 3.9827  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2088(0.2088) 


Epoch 18 - avg_train_loss: 0.1616  avg_val_loss: 0.2177  time: 7s
Epoch 18 - Score: 0.2179


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2110(0.2177) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1169(0.1169) Grad: 3.5223  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1441(0.1652) Grad: 1.5158  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1920(0.1622) Grad: 4.2067  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2043(0.2043) 


Epoch 19 - avg_train_loss: 0.1622  avg_val_loss: 0.2288  time: 7s
Epoch 19 - Score: 0.2298


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2404(0.2288) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 7s) Loss: 0.1223(0.1223) Grad: 3.2854  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1446(0.1645) Grad: 1.1240  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1376(0.1538) Grad: 3.5499  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1824(0.1824) 


Epoch 20 - avg_train_loss: 0.1538  avg_val_loss: 0.2007  time: 7s
Epoch 20 - Score: 0.2015
Epoch 20 - Save Best Score: 0.2015 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1929(0.2007) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1075(0.1075) Grad: 2.0291  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1418(0.1319) Grad: 0.8125  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1149(0.1271) Grad: 0.6460  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1875(0.1875) 


Epoch 21 - avg_train_loss: 0.1271  avg_val_loss: 0.2019  time: 7s
Epoch 21 - Score: 0.2025


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1900(0.2019) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1511(0.1511) Grad: 0.7363  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0801(0.1095) Grad: 1.7154  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1484(0.1083) Grad: 1.0327  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1793(0.1793) 


Epoch 22 - avg_train_loss: 0.1083  avg_val_loss: 0.1942  time: 7s
Epoch 22 - Score: 0.1953
Epoch 22 - Save Best Score: 0.1953 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1729(0.1942) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1361(0.1361) Grad: 1.3246  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1030(0.1089) Grad: 1.8023  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1036(0.1041) Grad: 0.7756  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1580(0.1580) 


Epoch 23 - avg_train_loss: 0.1041  avg_val_loss: 0.1886  time: 7s
Epoch 23 - Score: 0.1906
Epoch 23 - Save Best Score: 0.1906 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1904(0.1886) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0637(0.0637) Grad: 0.9644  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1667(0.1114) Grad: 1.2969  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1415(0.1118) Grad: 1.9126  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1812(0.1812) 


Epoch 24 - avg_train_loss: 0.1118  avg_val_loss: 0.1902  time: 7s
Epoch 24 - Score: 0.1916


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1507(0.1902) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0881(0.0881) Grad: 2.5333  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1154(0.1098) Grad: 1.1797  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1707(0.1108) Grad: 1.4629  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2013(0.2013) 


Epoch 25 - avg_train_loss: 0.1108  avg_val_loss: 0.1986  time: 7s
Epoch 25 - Score: 0.1993


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1642(0.1986) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0907(0.0907) Grad: 1.2961  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1208(0.1074) Grad: 3.3839  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1083(0.1058) Grad: 2.6275  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1662(0.1662) 


Epoch 26 - avg_train_loss: 0.1058  avg_val_loss: 0.1846  time: 7s
Epoch 26 - Score: 0.1855
Epoch 26 - Save Best Score: 0.1855 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1775(0.1846) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0694(0.0694) Grad: 1.4168  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0743(0.0944) Grad: 1.0613  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1644(0.0943) Grad: 1.1997  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1948(0.1948) 


Epoch 27 - avg_train_loss: 0.0943  avg_val_loss: 0.2064  time: 7s
Epoch 27 - Score: 0.2070


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1911(0.2064) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0906(0.0906) Grad: 2.4232  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0724(0.0810) Grad: 2.2340  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0743(0.0810) Grad: 1.0510  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1739(0.1739) 


Epoch 28 - avg_train_loss: 0.0810  avg_val_loss: 0.1856  time: 7s
Epoch 28 - Score: 0.1863


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1684(0.1856) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0687(0.0687) Grad: 0.6439  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0619(0.0760) Grad: 2.5548  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0764(0.0798) Grad: 0.8978  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1842(0.1842) 


Epoch 29 - avg_train_loss: 0.0798  avg_val_loss: 0.1957  time: 7s
Epoch 29 - Score: 0.1960


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2050(0.1957) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0691(0.0691) Grad: 0.7876  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0510(0.0809) Grad: 2.4384  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0752(0.0866) Grad: 1.1176  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2027(0.2027) 


Epoch 30 - avg_train_loss: 0.0866  avg_val_loss: 0.1958  time: 7s
Epoch 30 - Score: 0.1965


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1589(0.1958) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0661(0.0661) Grad: 3.6367  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1457(0.1233) Grad: 2.8828  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1472(0.1302) Grad: 3.2927  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1521(0.1521) 


Epoch 31 - avg_train_loss: 0.1302  avg_val_loss: 0.1932  time: 7s
Epoch 31 - Score: 0.1974


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1794(0.1932) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1346(0.1346) Grad: 1.4366  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1207(0.1338) Grad: 2.7542  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0741(0.1211) Grad: 0.5536  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1960(0.1960) 


Epoch 32 - avg_train_loss: 0.1211  avg_val_loss: 0.2105  time: 7s
Epoch 32 - Score: 0.2115


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1862(0.2105) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1012(0.1012) Grad: 2.7969  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0452(0.0857) Grad: 1.9262  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0865(0.0855) Grad: 1.2678  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1605(0.1605) 


Epoch 33 - avg_train_loss: 0.0855  avg_val_loss: 0.1781  time: 7s
Epoch 33 - Score: 0.1810
Epoch 33 - Save Best Score: 0.1810 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1317(0.1781) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0673(0.0673) Grad: 1.5546  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0683(0.0674) Grad: 0.8312  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0610(0.0685) Grad: 0.8649  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1463(0.1463) 


Epoch 34 - avg_train_loss: 0.0685  avg_val_loss: 0.1745  time: 7s
Epoch 34 - Score: 0.1778
Epoch 34 - Save Best Score: 0.1778 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1452(0.1745) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0627(0.0627) Grad: 0.9235  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0617(0.0743) Grad: 1.4693  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0705(0.0715) Grad: 0.6971  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1602(0.1602) 


Epoch 35 - avg_train_loss: 0.0715  avg_val_loss: 0.1817  time: 7s
Epoch 35 - Score: 0.1837


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1549(0.1817) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1073(0.1073) Grad: 1.1908  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0704(0.0743) Grad: 0.8832  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0562(0.0718) Grad: 1.0810  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1717(0.1717) 


Epoch 36 - avg_train_loss: 0.0718  avg_val_loss: 0.1830  time: 7s
Epoch 36 - Score: 0.1849


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1405(0.1830) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0697(0.0697) Grad: 3.6818  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0843(0.0748) Grad: 2.8474  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0753(0.0820) Grad: 1.7900  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1787(0.1787) 


Epoch 37 - avg_train_loss: 0.0820  avg_val_loss: 0.2095  time: 7s
Epoch 37 - Score: 0.2111


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2310(0.2095) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0932(0.0932) Grad: 3.0037  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0843(0.0983) Grad: 1.8274  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0603(0.0974) Grad: 1.7070  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1701(0.1701) 


Epoch 38 - avg_train_loss: 0.0974  avg_val_loss: 0.1820  time: 7s
Epoch 38 - Score: 0.1828


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1626(0.1820) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0584(0.0584) Grad: 1.8276  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0634(0.0590) Grad: 1.1736  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0548(0.0625) Grad: 1.0913  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1677(0.1677) 


Epoch 39 - avg_train_loss: 0.0625  avg_val_loss: 0.1827  time: 7s
Epoch 39 - Score: 0.1835


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1705(0.1827) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0664(0.0664) Grad: 1.8255  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0588(0.0566) Grad: 0.5557  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1154(0.0582) Grad: 1.2595  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1662(0.1662) 


Epoch 40 - avg_train_loss: 0.0582  avg_val_loss: 0.1735  time: 7s
Epoch 40 - Score: 0.1743
Epoch 40 - Save Best Score: 0.1743 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1469(0.1735) 


Score: 0.1743


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2128(3.2128) Grad: 3.5351  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7870(1.5989) Grad: 3.3611  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5016(1.1911) Grad: 0.4392  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6573(0.6573) 


Epoch 1 - avg_train_loss: 1.1911  avg_val_loss: 0.5817  time: 7s
Epoch 1 - Score: 0.5853
Epoch 1 - Save Best Score: 0.5853 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5129(0.5817) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6866(0.6866) Grad: 1.1293  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5656(0.5851) Grad: 1.0671  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5800(0.5727) Grad: 1.6616  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5676(0.5676) 


Epoch 2 - avg_train_loss: 0.5727  avg_val_loss: 0.4669  time: 7s
Epoch 2 - Score: 0.4750
Epoch 2 - Save Best Score: 0.4750 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3527(0.4669) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6010(0.6010) Grad: 1.0335  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5985(0.4553) Grad: 1.3922  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3916(0.4579) Grad: 1.5641  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4719(0.4719) 


Epoch 3 - avg_train_loss: 0.4579  avg_val_loss: 0.3888  time: 7s
Epoch 3 - Score: 0.3952
Epoch 3 - Save Best Score: 0.3952 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3557(0.3888) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4275(0.4275) Grad: 0.9028  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4008(0.4222) Grad: 1.3843  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3045(0.4080) Grad: 1.6667  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4669(0.4669) 


Epoch 4 - avg_train_loss: 0.4080  avg_val_loss: 0.3869  time: 7s
Epoch 4 - Score: 0.3928
Epoch 4 - Save Best Score: 0.3928 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3420(0.3869) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4547(0.4547) Grad: 3.0646  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4201(0.4041) Grad: 2.3548  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4412(0.4109) Grad: 1.4630  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4284(0.4284) 


Epoch 5 - avg_train_loss: 0.4109  avg_val_loss: 0.3642  time: 7s
Epoch 5 - Score: 0.3685
Epoch 5 - Save Best Score: 0.3685 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3492(0.3642) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3099(0.3099) Grad: 1.4474  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4141(0.3988) Grad: 3.6184  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4640(0.4194) Grad: 2.2431  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4363(0.4363) 


Epoch 6 - avg_train_loss: 0.4194  avg_val_loss: 0.3943  time: 7s
Epoch 6 - Score: 0.3979


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4469(0.3943) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3911(0.3911) Grad: 3.9388  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3203(0.4041) Grad: 1.6978  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3435(0.3731) Grad: 1.9316  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3820(0.3820) 


Epoch 7 - avg_train_loss: 0.3731  avg_val_loss: 0.3309  time: 7s
Epoch 7 - Score: 0.3358
Epoch 7 - Save Best Score: 0.3358 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3718(0.3309) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4171(0.4171) Grad: 1.0446  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3053(0.3326) Grad: 3.6338  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3395(0.3319) Grad: 2.6980  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3659(0.3659) 


Epoch 8 - avg_train_loss: 0.3319  avg_val_loss: 0.3099  time: 7s
Epoch 8 - Score: 0.3159
Epoch 8 - Save Best Score: 0.3159 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3507(0.3099) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3927(0.3927) Grad: 1.2701  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2108(0.2805) Grad: 2.5864  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3435(0.2780) Grad: 0.6249  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3458(0.3458) 


Epoch 9 - avg_train_loss: 0.2780  avg_val_loss: 0.2955  time: 7s
Epoch 9 - Score: 0.3000
Epoch 9 - Save Best Score: 0.3000 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3205(0.2955) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3501(0.3501) Grad: 1.4392  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2545(0.2511) Grad: 0.7593  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2875(0.2604) Grad: 2.3247  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3456(0.3456) 


Epoch 10 - avg_train_loss: 0.2604  avg_val_loss: 0.2975  time: 7s
Epoch 10 - Score: 0.3022


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3340(0.2975) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1718(0.1718) Grad: 1.7400  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2598(0.2674) Grad: 2.0340  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2242(0.2594) Grad: 1.6583  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3267(0.3267) 


Epoch 11 - avg_train_loss: 0.2594  avg_val_loss: 0.2880  time: 7s
Epoch 11 - Score: 0.2906
Epoch 11 - Save Best Score: 0.2906 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3059(0.2880) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2387(0.2387) Grad: 0.9038  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2629(0.2801) Grad: 1.9113  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2233(0.2585) Grad: 3.7177  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3858(0.3858) 


Epoch 12 - avg_train_loss: 0.2585  avg_val_loss: 0.3225  time: 7s
Epoch 12 - Score: 0.3273


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3087(0.3225) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2942(0.2942) Grad: 2.4499  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2214(0.2674) Grad: 2.1691  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2335(0.2584) Grad: 1.8223  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3175(0.3175) 


Epoch 13 - avg_train_loss: 0.2584  avg_val_loss: 0.2914  time: 7s
Epoch 13 - Score: 0.2951


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3574(0.2914) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3119(0.3119) Grad: 2.7304  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1791(0.2237) Grad: 1.6905  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2420(0.2091) Grad: 2.9549  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3041(0.3041) 


Epoch 14 - avg_train_loss: 0.2091  avg_val_loss: 0.2760  time: 7s
Epoch 14 - Score: 0.2785
Epoch 14 - Save Best Score: 0.2785 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3139(0.2760) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2114(0.2114) Grad: 2.6657  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2137(0.1878) Grad: 1.2964  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1463(0.1904) Grad: 1.1269  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2659(0.2659) 


Epoch 15 - avg_train_loss: 0.1904  avg_val_loss: 0.2425  time: 7s
Epoch 15 - Score: 0.2458
Epoch 15 - Save Best Score: 0.2458 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2967(0.2425) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1865(0.1865) Grad: 0.5835  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1291(0.1666) Grad: 1.2545  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1258(0.1681) Grad: 0.8198  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2659(0.2659) 


Epoch 16 - avg_train_loss: 0.1681  avg_val_loss: 0.2421  time: 7s
Epoch 16 - Score: 0.2464


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3090(0.2421) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1349(0.1349) Grad: 0.8073  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1839(0.1728) Grad: 1.2137  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1201(0.1626) Grad: 2.6054  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2638(0.2638) 


Epoch 17 - avg_train_loss: 0.1626  avg_val_loss: 0.2366  time: 7s
Epoch 17 - Score: 0.2389
Epoch 17 - Save Best Score: 0.2389 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2674(0.2366) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1061(0.1061) Grad: 1.4530  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2062(0.1678) Grad: 1.9628  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2578(0.1744) Grad: 2.8194  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2523(0.2523) 


Epoch 18 - avg_train_loss: 0.1744  avg_val_loss: 0.2338  time: 7s
Epoch 18 - Score: 0.2350
Epoch 18 - Save Best Score: 0.2350 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2571(0.2338) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1137(0.1137) Grad: 0.8918  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1261(0.1765) Grad: 1.5030  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1842(0.1707) Grad: 1.6853  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2580(0.2580) 


Epoch 19 - avg_train_loss: 0.1707  avg_val_loss: 0.2481  time: 7s
Epoch 19 - Score: 0.2497


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2965(0.2481) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1156(0.1156) Grad: 1.5230  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1439(0.1497) Grad: 2.7376  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1278(0.1542) Grad: 0.8055  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2314(0.2314) 


Epoch 20 - avg_train_loss: 0.1542  avg_val_loss: 0.2256  time: 7s
Epoch 20 - Score: 0.2270
Epoch 20 - Save Best Score: 0.2270 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2732(0.2256) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1524(0.1524) Grad: 2.8241  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1242(0.1333) Grad: 1.5059  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0949(0.1326) Grad: 2.1363  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2289(0.2289) 


Epoch 21 - avg_train_loss: 0.1326  avg_val_loss: 0.2210  time: 7s
Epoch 21 - Score: 0.2236
Epoch 21 - Save Best Score: 0.2236 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2837(0.2210) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0890(0.0890) Grad: 2.5215  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1190(0.1170) Grad: 0.8651  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0802(0.1099) Grad: 1.3770  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2328(0.2328) 


Epoch 22 - avg_train_loss: 0.1099  avg_val_loss: 0.2251  time: 7s
Epoch 22 - Score: 0.2269


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2754(0.2251) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0881(0.0881) Grad: 1.4427  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0844(0.1087) Grad: 1.5437  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0762(0.1122) Grad: 2.3911  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2091(0.2091) 


Epoch 23 - avg_train_loss: 0.1122  avg_val_loss: 0.2115  time: 7s
Epoch 23 - Score: 0.2141
Epoch 23 - Save Best Score: 0.2141 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2814(0.2115) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0683(0.0683) Grad: 1.0281  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1123(0.1169) Grad: 1.3583  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1328(0.1212) Grad: 2.8435  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2241(0.2241) 


Epoch 24 - avg_train_loss: 0.1212  avg_val_loss: 0.2348  time: 7s
Epoch 24 - Score: 0.2385


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3265(0.2348) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1382(0.1382) Grad: 4.1286  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1360(0.1251) Grad: 0.8256  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1235(0.1329) Grad: 2.2551  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2294(0.2294) 


Epoch 25 - avg_train_loss: 0.1329  avg_val_loss: 0.2246  time: 7s
Epoch 25 - Score: 0.2247


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2161(0.2246) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1293(0.1293) Grad: 1.9396  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0980(0.1164) Grad: 2.1350  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1465(0.1299) Grad: 2.6323  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2399(0.2399) 


Epoch 26 - avg_train_loss: 0.1299  avg_val_loss: 0.2281  time: 7s
Epoch 26 - Score: 0.2287


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2467(0.2281) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1234(0.1234) Grad: 2.4063  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0833(0.1195) Grad: 1.4125  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1054(0.1121) Grad: 3.5554  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2043(0.2043) 


Epoch 27 - avg_train_loss: 0.1121  avg_val_loss: 0.2079  time: 7s
Epoch 27 - Score: 0.2096
Epoch 27 - Save Best Score: 0.2096 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2646(0.2079) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0838(0.0838) Grad: 1.1122  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0947(0.0924) Grad: 0.6883  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0728(0.0880) Grad: 1.9413  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2011(0.2011) 


Epoch 28 - avg_train_loss: 0.0880  avg_val_loss: 0.2073  time: 7s
Epoch 28 - Score: 0.2088
Epoch 28 - Save Best Score: 0.2088 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2619(0.2073) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0903(0.0903) Grad: 1.1440  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1005(0.0907) Grad: 1.3482  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0636(0.0868) Grad: 2.0014  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1945(0.1945) 


Epoch 29 - avg_train_loss: 0.0868  avg_val_loss: 0.2006  time: 7s
Epoch 29 - Score: 0.2017
Epoch 29 - Save Best Score: 0.2017 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2471(0.2006) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0742(0.0742) Grad: 0.6931  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0775(0.0892) Grad: 2.1268  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1095(0.0897) Grad: 1.2457  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1994(0.1994) 


Epoch 30 - avg_train_loss: 0.0897  avg_val_loss: 0.2068  time: 7s
Epoch 30 - Score: 0.2096


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2813(0.2068) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1108(0.1108) Grad: 0.7227  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1201(0.1093) Grad: 2.2393  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1292(0.1101) Grad: 0.7588  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1984(0.1984) 


Epoch 31 - avg_train_loss: 0.1101  avg_val_loss: 0.2212  time: 7s
Epoch 31 - Score: 0.2239


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2969(0.2212) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1318(0.1318) Grad: 2.4337  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0838(0.0955) Grad: 1.5839  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1339(0.0976) Grad: 1.0590  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1940(0.1940) 


Epoch 32 - avg_train_loss: 0.0976  avg_val_loss: 0.2295  time: 7s
Epoch 32 - Score: 0.2333


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3153(0.2295) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0873(0.0873) Grad: 3.0993  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0914(0.1000) Grad: 3.3253  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0580(0.0937) Grad: 0.7889  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1901(0.1901) 


Epoch 33 - avg_train_loss: 0.0937  avg_val_loss: 0.1941  time: 7s
Epoch 33 - Score: 0.1965
Epoch 33 - Save Best Score: 0.1965 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2596(0.1941) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0711(0.0711) Grad: 0.8962  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0792(0.0710) Grad: 1.1199  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0643(0.0693) Grad: 0.8856  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1974(0.1974) 


Epoch 34 - avg_train_loss: 0.0693  avg_val_loss: 0.1960  time: 7s
Epoch 34 - Score: 0.1993


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2694(0.1960) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0573(0.0573) Grad: 1.2053  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0548(0.0678) Grad: 3.4393  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0616(0.0657) Grad: 2.7103  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1896(0.1896) 


Epoch 35 - avg_train_loss: 0.0657  avg_val_loss: 0.2039  time: 7s
Epoch 35 - Score: 0.2077


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2911(0.2039) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0685(0.0685) Grad: 1.9461  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0744(0.0678) Grad: 2.4610  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0816(0.0761) Grad: 0.6369  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1911(0.1911) 


Epoch 36 - avg_train_loss: 0.0761  avg_val_loss: 0.2077  time: 7s
Epoch 36 - Score: 0.2109


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2887(0.2077) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0898(0.0898) Grad: 2.4983  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0845(0.0933) Grad: 1.0808  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1108(0.0945) Grad: 2.9997  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1905(0.1905) 


Epoch 37 - avg_train_loss: 0.0945  avg_val_loss: 0.1980  time: 7s
Epoch 37 - Score: 0.2003


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2642(0.1980) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1309(0.1309) Grad: 2.4227  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1041(0.1048) Grad: 4.0909  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0924(0.0945) Grad: 3.6761  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2016(0.2016) 


Epoch 38 - avg_train_loss: 0.0945  avg_val_loss: 0.2096  time: 8s
Epoch 38 - Score: 0.2100


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2356(0.2096) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0904(0.0904) Grad: 3.4301  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0800(0.0739) Grad: 2.6719  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0491(0.0705) Grad: 1.9877  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1887(0.1887) 


Epoch 39 - avg_train_loss: 0.0705  avg_val_loss: 0.1985  time: 7s
Epoch 39 - Score: 0.1999


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2501(0.1985) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0566(0.0566) Grad: 3.2247  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0758(0.0613) Grad: 1.4953  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0638(0.0583) Grad: 1.8961  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1766(0.1766) 


Epoch 40 - avg_train_loss: 0.0583  avg_val_loss: 0.1923  time: 7s
Epoch 40 - Score: 0.1941
Epoch 40 - Save Best Score: 0.1941 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2506(0.1923) 


Score: 0.1941


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 3.2907(3.2907) Grad: 3.4877  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7239(1.5096) Grad: 3.0190  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6816(1.1510) Grad: 2.0480  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6261(0.6261) 


Epoch 1 - avg_train_loss: 1.1510  avg_val_loss: 0.6471  time: 7s
Epoch 1 - Score: 0.6497
Epoch 1 - Save Best Score: 0.6497 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5498(0.6471) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7441(0.7441) Grad: 1.0952  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4416(0.5708) Grad: 1.5379  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3347(0.5381) Grad: 0.9884  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5435(0.5435) 


Epoch 2 - avg_train_loss: 0.5381  avg_val_loss: 0.5395  time: 7s
Epoch 2 - Score: 0.5422
Epoch 2 - Save Best Score: 0.5422 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4248(0.5395) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3529(0.3529) Grad: 1.9870  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4484(0.4498) Grad: 2.0986  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4095(0.4336) Grad: 1.6208  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4742(0.4742) 


Epoch 3 - avg_train_loss: 0.4336  avg_val_loss: 0.4485  time: 7s
Epoch 3 - Score: 0.4528
Epoch 3 - Save Best Score: 0.4528 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3106(0.4485) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4022(0.4022) Grad: 1.4453  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3274(0.4071) Grad: 1.3785  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2770(0.3872) Grad: 1.2947  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4582(0.4582) 


Epoch 4 - avg_train_loss: 0.3872  avg_val_loss: 0.4405  time: 7s
Epoch 4 - Score: 0.4454
Epoch 4 - Save Best Score: 0.4454 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2956(0.4405) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3136(0.3136) Grad: 1.7699  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4794(0.3927) Grad: 1.0828  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3453(0.3810) Grad: 1.7043  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4587(0.4587) 


Epoch 5 - avg_train_loss: 0.3810  avg_val_loss: 0.4249  time: 7s
Epoch 5 - Score: 0.4288
Epoch 5 - Save Best Score: 0.4288 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2986(0.4249) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4336(0.4336) Grad: 1.2294  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2965(0.3848) Grad: 1.3179  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3259(0.3768) Grad: 1.2702  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4329(0.4329) 


Epoch 6 - avg_train_loss: 0.3768  avg_val_loss: 0.4081  time: 7s
Epoch 6 - Score: 0.4107
Epoch 6 - Save Best Score: 0.4107 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3061(0.4081) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3964(0.3964) Grad: 1.7142  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3406(0.3424) Grad: 2.0220  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3616(0.3322) Grad: 1.0306  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3591(0.3591) 


Epoch 7 - avg_train_loss: 0.3322  avg_val_loss: 0.3448  time: 7s
Epoch 7 - Score: 0.3489
Epoch 7 - Save Best Score: 0.3489 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2290(0.3448) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3162(0.3162) Grad: 1.0045  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2167(0.3090) Grad: 1.3140  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2315(0.2992) Grad: 0.9462  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3643(0.3643) 


Epoch 8 - avg_train_loss: 0.2992  avg_val_loss: 0.3463  time: 7s
Epoch 8 - Score: 0.3487
Epoch 8 - Save Best Score: 0.3487 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2563(0.3463) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4440(0.4440) Grad: 1.1003  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3158(0.2660) Grad: 1.2285  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1997(0.2526) Grad: 1.6447  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2971(0.2971) 


Epoch 9 - avg_train_loss: 0.2526  avg_val_loss: 0.3026  time: 7s
Epoch 9 - Score: 0.3041
Epoch 9 - Save Best Score: 0.3041 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2456(0.3026) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2395(0.2395) Grad: 3.1398  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2461(0.2416) Grad: 2.2821  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2836(0.2362) Grad: 0.6492  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2960(0.2960) 


Epoch 10 - avg_train_loss: 0.2362  avg_val_loss: 0.2884  time: 7s
Epoch 10 - Score: 0.2897
Epoch 10 - Save Best Score: 0.2897 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2291(0.2884) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3244(0.3244) Grad: 0.6267  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2239(0.2416) Grad: 2.0030  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1834(0.2331) Grad: 1.8411  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2751(0.2751) 


Epoch 11 - avg_train_loss: 0.2331  avg_val_loss: 0.2740  time: 7s
Epoch 11 - Score: 0.2767
Epoch 11 - Save Best Score: 0.2767 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1945(0.2740) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1857(0.1857) Grad: 0.9362  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3219(0.2184) Grad: 3.7839  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1724(0.2284) Grad: 2.5062  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3429(0.3429) 


Epoch 12 - avg_train_loss: 0.2284  avg_val_loss: 0.3240  time: 7s
Epoch 12 - Score: 0.3254


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2591(0.3240) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2466(0.2466) Grad: 2.8367  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2402(0.2273) Grad: 0.6546  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1748(0.2150) Grad: 2.3808  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2887(0.2887) 


Epoch 13 - avg_train_loss: 0.2150  avg_val_loss: 0.2750  time: 7s
Epoch 13 - Score: 0.2769


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2031(0.2750) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2346(0.2346) Grad: 0.9454  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2099(0.1968) Grad: 2.1304  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2388(0.1940) Grad: 1.1436  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2679(0.2679) 


Epoch 14 - avg_train_loss: 0.1940  avg_val_loss: 0.2485  time: 7s
Epoch 14 - Score: 0.2509
Epoch 14 - Save Best Score: 0.2509 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1730(0.2485) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1482(0.1482) Grad: 2.5433  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1480(0.1604) Grad: 0.6868  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1252(0.1607) Grad: 1.2424  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2434(0.2434) 


Epoch 15 - avg_train_loss: 0.1607  avg_val_loss: 0.2319  time: 7s
Epoch 15 - Score: 0.2323
Epoch 15 - Save Best Score: 0.2323 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2064(0.2319) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1161(0.1161) Grad: 0.6034  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0885(0.1394) Grad: 0.8452  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1512(0.1353) Grad: 0.9203  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2423(0.2423) 


Epoch 16 - avg_train_loss: 0.1353  avg_val_loss: 0.2199  time: 7s
Epoch 16 - Score: 0.2212
Epoch 16 - Save Best Score: 0.2212 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1736(0.2199) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0840(0.0840) Grad: 0.6241  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1631(0.1261) Grad: 1.8636  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1964(0.1380) Grad: 0.6450  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2543(0.2543) 


Epoch 17 - avg_train_loss: 0.1380  avg_val_loss: 0.2358  time: 7s
Epoch 17 - Score: 0.2363


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2137(0.2358) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1152(0.1152) Grad: 3.5239  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1016(0.1311) Grad: 1.4153  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1332(0.1472) Grad: 2.6251  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2399(0.2399) 


Epoch 18 - avg_train_loss: 0.1472  avg_val_loss: 0.2254  time: 7s
Epoch 18 - Score: 0.2266


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1747(0.2254) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1210(0.1210) Grad: 2.2028  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2364(0.1388) Grad: 2.0388  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1829(0.1584) Grad: 1.0390  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2595(0.2595) 


Epoch 19 - avg_train_loss: 0.1584  avg_val_loss: 0.2383  time: 7s
Epoch 19 - Score: 0.2391


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2065(0.2383) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1627(0.1627) Grad: 0.9990  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1414(0.1487) Grad: 1.9692  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1239(0.1406) Grad: 2.0954  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2283(0.2283) 


Epoch 20 - avg_train_loss: 0.1406  avg_val_loss: 0.2183  time: 7s
Epoch 20 - Score: 0.2190
Epoch 20 - Save Best Score: 0.2190 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1800(0.2183) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1069(0.1069) Grad: 1.7487  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0789(0.1199) Grad: 2.2514  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1323(0.1196) Grad: 1.3166  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2114(0.2114) 


Epoch 21 - avg_train_loss: 0.1196  avg_val_loss: 0.2000  time: 7s
Epoch 21 - Score: 0.2010
Epoch 21 - Save Best Score: 0.2010 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1547(0.2000) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0940(0.0940) Grad: 1.7652  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1249(0.1064) Grad: 1.5383  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0964(0.1021) Grad: 0.6396  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2270(0.2270) 


Epoch 22 - avg_train_loss: 0.1021  avg_val_loss: 0.2070  time: 7s
Epoch 22 - Score: 0.2085


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1551(0.2070) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1507(0.1507) Grad: 1.2235  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1397(0.1044) Grad: 0.5101  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0754(0.1033) Grad: 0.8774  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2162(0.2162) 


Epoch 23 - avg_train_loss: 0.1033  avg_val_loss: 0.2045  time: 7s
Epoch 23 - Score: 0.2049


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1804(0.2045) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0830(0.0830) Grad: 2.5785  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1367(0.1101) Grad: 0.8764  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1289(0.1099) Grad: 2.4478  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2540(0.2540) 


Epoch 24 - avg_train_loss: 0.1099  avg_val_loss: 0.2259  time: 7s
Epoch 24 - Score: 0.2279


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1671(0.2259) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1408(0.1408) Grad: 1.7474  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1591(0.1380) Grad: 3.0712  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1179(0.1313) Grad: 1.4873  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2240(0.2240) 


Epoch 25 - avg_train_loss: 0.1313  avg_val_loss: 0.2085  time: 7s
Epoch 25 - Score: 0.2091


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1796(0.2085) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0977(0.0977) Grad: 2.5812  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1295(0.1098) Grad: 2.0867  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1484(0.1175) Grad: 2.1572  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2287(0.2287) 


Epoch 26 - avg_train_loss: 0.1175  avg_val_loss: 0.2149  time: 7s
Epoch 26 - Score: 0.2156


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1793(0.2149) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1109(0.1109) Grad: 1.4629  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1525(0.1129) Grad: 2.0930  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0574(0.1014) Grad: 2.7558  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2113(0.2113) 


Epoch 27 - avg_train_loss: 0.1014  avg_val_loss: 0.1974  time: 7s
Epoch 27 - Score: 0.1984
Epoch 27 - Save Best Score: 0.1984 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1541(0.1974) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1317(0.1317) Grad: 0.5017  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1131(0.0891) Grad: 1.0815  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0550(0.0856) Grad: 0.6442  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2048(0.2048) 


Epoch 28 - avg_train_loss: 0.0856  avg_val_loss: 0.1921  time: 7s
Epoch 28 - Score: 0.1928
Epoch 28 - Save Best Score: 0.1928 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1576(0.1921) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1115(0.1115) Grad: 0.6623  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1050(0.0784) Grad: 1.2823  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0669(0.0819) Grad: 1.4293  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1955(0.1955) 


Epoch 29 - avg_train_loss: 0.0819  avg_val_loss: 0.1840  time: 7s
Epoch 29 - Score: 0.1849
Epoch 29 - Save Best Score: 0.1849 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1435(0.1840) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0998(0.0998) Grad: 1.4414  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1002(0.1029) Grad: 3.2817  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1058(0.0913) Grad: 3.6422  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2264(0.2264) 


Epoch 30 - avg_train_loss: 0.0913  avg_val_loss: 0.2076  time: 7s
Epoch 30 - Score: 0.2094


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1472(0.2076) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1219(0.1219) Grad: 1.3455  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1086(0.1063) Grad: 1.2129  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1234(0.1134) Grad: 1.3547  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2356(0.2356) 


Epoch 31 - avg_train_loss: 0.1134  avg_val_loss: 0.2158  time: 7s
Epoch 31 - Score: 0.2168


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1790(0.2158) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1087(0.1087) Grad: 2.3019  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1175(0.1094) Grad: 1.0764  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0796(0.1059) Grad: 0.7708  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1997(0.1997) 


Epoch 32 - avg_train_loss: 0.1059  avg_val_loss: 0.1880  time: 7s
Epoch 32 - Score: 0.1884


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1651(0.1880) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0665(0.0665) Grad: 2.4066  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0569(0.0762) Grad: 1.1837  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0588(0.0744) Grad: 0.8674  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1932(0.1932) 


Epoch 33 - avg_train_loss: 0.0744  avg_val_loss: 0.1832  time: 7s
Epoch 33 - Score: 0.1845
Epoch 33 - Save Best Score: 0.1845 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1354(0.1832) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0642(0.0642) Grad: 1.1928  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0523(0.0694) Grad: 0.3756  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0669(0.0682) Grad: 1.7765  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2018(0.2018) 


Epoch 34 - avg_train_loss: 0.0682  avg_val_loss: 0.1874  time: 7s
Epoch 34 - Score: 0.1896


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1239(0.1874) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0667(0.0667) Grad: 1.4176  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0492(0.0624) Grad: 0.4896  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0597(0.0640) Grad: 0.9661  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1926(0.1926) 


Epoch 35 - avg_train_loss: 0.0640  avg_val_loss: 0.1819  time: 7s
Epoch 35 - Score: 0.1829
Epoch 35 - Save Best Score: 0.1829 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1400(0.1819) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0899(0.0899) Grad: 0.4778  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0503(0.0671) Grad: 0.8731  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0687(0.0671) Grad: 1.5425  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2071(0.2071) 


Epoch 36 - avg_train_loss: 0.0671  avg_val_loss: 0.1914  time: 7s
Epoch 36 - Score: 0.1926


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1455(0.1914) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0780(0.0780) Grad: 2.5831  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1311(0.0940) Grad: 0.6331  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1204(0.1099) Grad: 3.0885  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2031(0.2031) 


Epoch 37 - avg_train_loss: 0.1099  avg_val_loss: 0.1937  time: 7s
Epoch 37 - Score: 0.1940


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1717(0.1937) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0904(0.0904) Grad: 1.6296  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1049(0.0933) Grad: 0.5203  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0759(0.0903) Grad: 0.9101  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2038(0.2038) 


Epoch 38 - avg_train_loss: 0.0903  avg_val_loss: 0.2095  time: 7s
Epoch 38 - Score: 0.2115


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1554(0.2095) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1055(0.1055) Grad: 2.5573  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0593(0.0815) Grad: 0.5353  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0607(0.0727) Grad: 1.6352  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1849(0.1849) 


Epoch 39 - avg_train_loss: 0.0727  avg_val_loss: 0.1800  time: 7s
Epoch 39 - Score: 0.1807
Epoch 39 - Save Best Score: 0.1807 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1434(0.1800) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0672(0.0672) Grad: 2.6654  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0560(0.0602) Grad: 0.4917  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0528(0.0613) Grad: 0.5163  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2027(0.2027) 


Epoch 40 - avg_train_loss: 0.0613  avg_val_loss: 0.1905  time: 7s
Epoch 40 - Score: 0.1922


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1344(0.1905) 


Score: 0.1807


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.4568(3.4568) Grad: 3.4859  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6743(1.6441) Grad: 4.1234  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6840(1.2266) Grad: 1.9321  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6339(0.6339) 


Epoch 1 - avg_train_loss: 1.2266  avg_val_loss: 0.5963  time: 7s
Epoch 1 - Score: 0.6003
Epoch 1 - Save Best Score: 0.6003 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6961(0.5963) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6920(0.6920) Grad: 2.4542  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6368(0.5770) Grad: 0.7258  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4383(0.5500) Grad: 0.4927  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4590(0.4590) 


Epoch 2 - avg_train_loss: 0.5500  avg_val_loss: 0.4493  time: 7s
Epoch 2 - Score: 0.4498
Epoch 2 - Save Best Score: 0.4498 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4819(0.4493) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3526(0.3526) Grad: 1.1251  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5163(0.4415) Grad: 2.3747  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3483(0.4526) Grad: 0.9495  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4662(0.4662) 


Epoch 3 - avg_train_loss: 0.4526  avg_val_loss: 0.4393  time: 7s
Epoch 3 - Score: 0.4399
Epoch 3 - Save Best Score: 0.4399 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4302(0.4393) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5282(0.5282) Grad: 3.1242  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3998(0.4502) Grad: 3.3607  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3209(0.4271) Grad: 1.0568  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4316(0.4316) 


Epoch 4 - avg_train_loss: 0.4271  avg_val_loss: 0.4088  time: 7s
Epoch 4 - Score: 0.4093
Epoch 4 - Save Best Score: 0.4093 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4063(0.4088) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4784(0.4784) Grad: 0.9826  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3834(0.4869) Grad: 3.7288  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3554(0.4511) Grad: 1.8728  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4107(0.4107) 


Epoch 5 - avg_train_loss: 0.4511  avg_val_loss: 0.3814  time: 7s
Epoch 5 - Score: 0.3823
Epoch 5 - Save Best Score: 0.3823 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3441(0.3814) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4600(0.4600) Grad: 2.0841  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3021(0.4210) Grad: 2.5306  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3497(0.4054) Grad: 0.8458  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3977(0.3977) 


Epoch 6 - avg_train_loss: 0.4054  avg_val_loss: 0.3707  time: 7s
Epoch 6 - Score: 0.3719
Epoch 6 - Save Best Score: 0.3719 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3117(0.3707) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3202(0.3202) Grad: 1.4129  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3617(0.3388) Grad: 3.5351  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2401(0.3402) Grad: 0.8986  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3089(0.3089) 


Epoch 7 - avg_train_loss: 0.3402  avg_val_loss: 0.2901  time: 7s
Epoch 7 - Score: 0.2936
Epoch 7 - Save Best Score: 0.2936 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1890(0.2901) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2394(0.2394) Grad: 0.6438  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3745(0.2957) Grad: 1.7645  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2940(0.2831) Grad: 1.7150  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2760(0.2760) 


Epoch 8 - avg_train_loss: 0.2831  avg_val_loss: 0.2682  time: 7s
Epoch 8 - Score: 0.2742
Epoch 8 - Save Best Score: 0.2742 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1460(0.2682) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2323(0.2323) Grad: 1.3993  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1870(0.2538) Grad: 2.0422  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2554(0.2448) Grad: 2.0575  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2692(0.2692) 


Epoch 9 - avg_train_loss: 0.2448  avg_val_loss: 0.2719  time: 7s
Epoch 9 - Score: 0.2761


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1764(0.2719) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1916(0.1916) Grad: 2.4546  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2725(0.2388) Grad: 0.7180  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1671(0.2314) Grad: 1.0656  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2526(0.2526) 


Epoch 10 - avg_train_loss: 0.2314  avg_val_loss: 0.2693  time: 7s
Epoch 10 - Score: 0.2754


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1665(0.2693) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1939(0.1939) Grad: 1.2872  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1590(0.2416) Grad: 2.7303  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1864(0.2307) Grad: 0.8094  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2569(0.2569) 


Epoch 11 - avg_train_loss: 0.2307  avg_val_loss: 0.2712  time: 7s
Epoch 11 - Score: 0.2784


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1553(0.2712) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1784(0.1784) Grad: 1.3388  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2258(0.2206) Grad: 1.2773  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2269(0.2207) Grad: 1.3056  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2510(0.2510) 


Epoch 12 - avg_train_loss: 0.2207  avg_val_loss: 0.2634  time: 7s
Epoch 12 - Score: 0.2675
Epoch 12 - Save Best Score: 0.2675 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1787(0.2634) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2322(0.2322) Grad: 2.5470  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2127(0.2086) Grad: 2.0008  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2246(0.2347) Grad: 1.3749  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2788(0.2788) 


Epoch 13 - avg_train_loss: 0.2347  avg_val_loss: 0.3111  time: 7s
Epoch 13 - Score: 0.3173


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2182(0.3111) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2408(0.2408) Grad: 3.6864  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2788(0.2092) Grad: 4.8207  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2819(0.2175) Grad: 2.1877  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2281(0.2281) 


Epoch 14 - avg_train_loss: 0.2175  avg_val_loss: 0.2445  time: 7s
Epoch 14 - Score: 0.2493
Epoch 14 - Save Best Score: 0.2493 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1606(0.2445) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1737(0.1737) Grad: 1.3545  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1582(0.1691) Grad: 0.6268  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1996(0.1675) Grad: 0.4734  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1946(0.1946) 


Epoch 15 - avg_train_loss: 0.1675  avg_val_loss: 0.2315  time: 7s
Epoch 15 - Score: 0.2379
Epoch 15 - Save Best Score: 0.2379 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1648(0.2315) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1691(0.1691) Grad: 2.4591  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1560(0.1521) Grad: 0.7898  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1068(0.1544) Grad: 1.1678  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1955(0.1955) 


Epoch 16 - avg_train_loss: 0.1544  avg_val_loss: 0.2285  time: 7s
Epoch 16 - Score: 0.2360
Epoch 16 - Save Best Score: 0.2360 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1453(0.2285) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1884(0.1884) Grad: 0.6755  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1234(0.1689) Grad: 1.1263  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1287(0.1469) Grad: 0.7121  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1991(0.1991) 


Epoch 17 - avg_train_loss: 0.1469  avg_val_loss: 0.2353  time: 7s
Epoch 17 - Score: 0.2420


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1634(0.2353) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0946(0.0946) Grad: 1.6823  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1151(0.1418) Grad: 0.8824  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1419(0.1433) Grad: 1.0111  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2372(0.2372) 


Epoch 18 - avg_train_loss: 0.1433  avg_val_loss: 0.2538  time: 7s
Epoch 18 - Score: 0.2606


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1480(0.2538) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1520(0.1520) Grad: 3.8511  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1747(0.1611) Grad: 1.2420  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1352(0.1619) Grad: 3.2473  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2237(0.2237) 


Epoch 19 - avg_train_loss: 0.1619  avg_val_loss: 0.2688  time: 7s
Epoch 19 - Score: 0.2791


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1676(0.2688) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1623(0.1623) Grad: 0.8528  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1732(0.1521) Grad: 3.0805  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1932(0.1576) Grad: 2.2704  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2071(0.2071) 


Epoch 20 - avg_train_loss: 0.1576  avg_val_loss: 0.2340  time: 7s
Epoch 20 - Score: 0.2402


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1528(0.2340) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1514(0.1514) Grad: 3.0539  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1516(0.1459) Grad: 2.4168  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1309(0.1284) Grad: 0.8511  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1789(0.1789) 


Epoch 21 - avg_train_loss: 0.1284  avg_val_loss: 0.2243  time: 7s
Epoch 21 - Score: 0.2364


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1248(0.2243) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0994(0.0994) Grad: 2.2497  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1227(0.1071) Grad: 1.1753  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1304(0.1084) Grad: 0.5422  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1713(0.1713) 


Epoch 22 - avg_train_loss: 0.1084  avg_val_loss: 0.2203  time: 7s
Epoch 22 - Score: 0.2322
Epoch 22 - Save Best Score: 0.2322 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1307(0.2203) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1486(0.1486) Grad: 0.8316  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1662(0.1226) Grad: 0.8763  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0566(0.1070) Grad: 0.6411  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1756(0.1756) 


Epoch 23 - avg_train_loss: 0.1070  avg_val_loss: 0.2190  time: 7s
Epoch 23 - Score: 0.2305
Epoch 23 - Save Best Score: 0.2305 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1230(0.2190) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0729(0.0729) Grad: 2.1141  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1109(0.1015) Grad: 3.7591  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1155(0.1136) Grad: 4.2662  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2067(0.2067) 


Epoch 24 - avg_train_loss: 0.1136  avg_val_loss: 0.2333  time: 7s
Epoch 24 - Score: 0.2390


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1563(0.2333) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1352(0.1352) Grad: 3.4110  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1533(0.1367) Grad: 1.2783  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0907(0.1260) Grad: 3.2476  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1668(0.1668) 


Epoch 25 - avg_train_loss: 0.1260  avg_val_loss: 0.2082  time: 7s
Epoch 25 - Score: 0.2184
Epoch 25 - Save Best Score: 0.2184 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1223(0.2082) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0925(0.0925) Grad: 0.5943  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1320(0.1256) Grad: 2.4594  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1432(0.1229) Grad: 1.7033  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1750(0.1750) 


Epoch 26 - avg_train_loss: 0.1229  avg_val_loss: 0.2202  time: 7s
Epoch 26 - Score: 0.2313


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1300(0.2202) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1216(0.1216) Grad: 0.5657  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0784(0.0953) Grad: 3.6011  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0796(0.0948) Grad: 0.8045  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1726(0.1726) 


Epoch 27 - avg_train_loss: 0.0948  avg_val_loss: 0.2294  time: 7s
Epoch 27 - Score: 0.2439


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1306(0.2294) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0834(0.0834) Grad: 1.5859  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0831(0.0778) Grad: 1.7968  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0876(0.0797) Grad: 0.8547  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1623(0.1623) 


Epoch 28 - avg_train_loss: 0.0797  avg_val_loss: 0.2086  time: 7s
Epoch 28 - Score: 0.2199


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1238(0.2086) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0924(0.0924) Grad: 1.7669  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0764(0.0822) Grad: 2.0934  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0714(0.0798) Grad: 2.6447  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1504(0.1504) 


Epoch 29 - avg_train_loss: 0.0798  avg_val_loss: 0.2007  time: 7s
Epoch 29 - Score: 0.2121
Epoch 29 - Save Best Score: 0.2121 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1263(0.2007) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0472(0.0472) Grad: 1.4052  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0699(0.0754) Grad: 1.6565  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0728(0.0847) Grad: 0.8770  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1639(0.1639) 


Epoch 30 - avg_train_loss: 0.0847  avg_val_loss: 0.2133  time: 7s
Epoch 30 - Score: 0.2206


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1701(0.2133) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0774(0.0774) Grad: 0.8615  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0685(0.0880) Grad: 2.9126  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1142(0.0964) Grad: 1.2193  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1836(0.1836) 


Epoch 31 - avg_train_loss: 0.0964  avg_val_loss: 0.2418  time: 7s
Epoch 31 - Score: 0.2549


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1511(0.2418) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1144(0.1144) Grad: 2.6433  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0993(0.0979) Grad: 2.2595  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1034(0.0930) Grad: 1.9983  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1471(0.1471) 


Epoch 32 - avg_train_loss: 0.0930  avg_val_loss: 0.2186  time: 7s
Epoch 32 - Score: 0.2354


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1399(0.2186) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0909(0.0909) Grad: 0.6948  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0839(0.0837) Grad: 0.6477  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0727(0.0802) Grad: 1.2417  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1573(0.1573) 


Epoch 33 - avg_train_loss: 0.0802  avg_val_loss: 0.2140  time: 7s
Epoch 33 - Score: 0.2297


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1141(0.2140) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1218(0.1218) Grad: 1.9853  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0676(0.0708) Grad: 1.2488  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0530(0.0643) Grad: 0.4820  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1500(0.1500) 


Epoch 34 - avg_train_loss: 0.0643  avg_val_loss: 0.2138  time: 7s
Epoch 34 - Score: 0.2314


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1151(0.2138) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0464(0.0464) Grad: 0.8479  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0702(0.0622) Grad: 0.5243  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1250(0.0688) Grad: 1.0507  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1524(0.1524) 


Epoch 35 - avg_train_loss: 0.0688  avg_val_loss: 0.2175  time: 7s
Epoch 35 - Score: 0.2310


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1507(0.2175) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0616(0.0616) Grad: 2.5757  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1299(0.0820) Grad: 0.8891  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0696(0.0822) Grad: 1.7908  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1611(0.1611) 


Epoch 36 - avg_train_loss: 0.0822  avg_val_loss: 0.2049  time: 7s
Epoch 36 - Score: 0.2155


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1216(0.2049) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0947(0.0947) Grad: 2.3164  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0932(0.0835) Grad: 0.6046  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0892(0.0890) Grad: 2.0224  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1635(0.1635) 


Epoch 37 - avg_train_loss: 0.0890  avg_val_loss: 0.2040  time: 7s
Epoch 37 - Score: 0.2132


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1270(0.2040) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0969(0.0969) Grad: 3.6847  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0773(0.0949) Grad: 2.7393  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0777(0.0902) Grad: 1.8756  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1576(0.1576) 


Epoch 38 - avg_train_loss: 0.0902  avg_val_loss: 0.2205  time: 7s
Epoch 38 - Score: 0.2337


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1497(0.2205) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0975(0.0975) Grad: 2.8914  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0615(0.0698) Grad: 0.8005  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0560(0.0743) Grad: 2.4906  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1419(0.1419) 


Epoch 39 - avg_train_loss: 0.0743  avg_val_loss: 0.2009  time: 7s
Epoch 39 - Score: 0.2124


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1445(0.2009) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0590(0.0590) Grad: 1.6144  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0526(0.0706) Grad: 0.8377  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0566(0.0628) Grad: 1.2588  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1369(0.1369) 


Epoch 40 - avg_train_loss: 0.0628  avg_val_loss: 0.2084  time: 7s
Epoch 40 - Score: 0.2243


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1421(0.2084) 


Score: 0.2121


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 3.2334(3.2334) Grad: 3.1831  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7982(1.5908) Grad: 3.1249  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5437(1.1992) Grad: 1.5750  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6527(0.6527) 


Epoch 1 - avg_train_loss: 1.1992  avg_val_loss: 0.6640  time: 7s
Epoch 1 - Score: 0.6648
Epoch 1 - Save Best Score: 0.6648 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6063(0.6640) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6083(0.6083) Grad: 1.2044  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6594(0.5597) Grad: 0.8498  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5724(0.5670) Grad: 1.4136  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6391(0.6391) 


Epoch 2 - avg_train_loss: 0.5670  avg_val_loss: 0.5868  time: 7s
Epoch 2 - Score: 0.5927
Epoch 2 - Save Best Score: 0.5927 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4032(0.5868) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5651(0.5651) Grad: 1.3052  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5605(0.5547) Grad: 1.2316  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3983(0.5162) Grad: 1.6765  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6163(0.6163) 


Epoch 3 - avg_train_loss: 0.5162  avg_val_loss: 0.5745  time: 7s
Epoch 3 - Score: 0.5790
Epoch 3 - Save Best Score: 0.5790 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4168(0.5745) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4924(0.4924) Grad: 4.0784  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4353(0.4942) Grad: 3.8244  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5012(0.4941) Grad: 3.2953  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5906(0.5906) 


Epoch 4 - avg_train_loss: 0.4941  avg_val_loss: 0.5376  time: 7s
Epoch 4 - Score: 0.5436
Epoch 4 - Save Best Score: 0.5436 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3614(0.5376) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4939(0.4939) Grad: 2.4484  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5980(0.5258) Grad: 1.9809  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4398(0.5051) Grad: 2.2580  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5707(0.5707) 


Epoch 5 - avg_train_loss: 0.5051  avg_val_loss: 0.4967  time: 7s
Epoch 5 - Score: 0.5068
Epoch 5 - Save Best Score: 0.5068 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2816(0.4967) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4397(0.4397) Grad: 2.0111  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4134(0.4586) Grad: 3.4106  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3473(0.4430) Grad: 1.5794  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4846(0.4846) 


Epoch 6 - avg_train_loss: 0.4430  avg_val_loss: 0.4425  time: 7s
Epoch 6 - Score: 0.4488
Epoch 6 - Save Best Score: 0.4488 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2782(0.4425) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3538(0.3538) Grad: 2.6262  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3348(0.3531) Grad: 1.6542  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4446(0.3604) Grad: 2.2222  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4393(0.4393) 


Epoch 7 - avg_train_loss: 0.3604  avg_val_loss: 0.3878  time: 7s
Epoch 7 - Score: 0.3947
Epoch 7 - Save Best Score: 0.3947 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2292(0.3878) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2729(0.2729) Grad: 1.4862  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2844(0.2975) Grad: 0.5787  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2304(0.2948) Grad: 2.1734  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4106(0.4106) 


Epoch 8 - avg_train_loss: 0.2948  avg_val_loss: 0.3560  time: 7s
Epoch 8 - Score: 0.3611
Epoch 8 - Save Best Score: 0.3611 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2377(0.3560) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1880(0.1880) Grad: 0.8198  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2469(0.2530) Grad: 2.0746  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1673(0.2502) Grad: 1.5043  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4037(0.4037) 


Epoch 9 - avg_train_loss: 0.2502  avg_val_loss: 0.3459  time: 7s
Epoch 9 - Score: 0.3518
Epoch 9 - Save Best Score: 0.3518 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2194(0.3459) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3169(0.3169) Grad: 1.1189  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2066(0.2399) Grad: 0.9492  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2616(0.2360) Grad: 0.7677  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4094(0.4094) 


Epoch 10 - avg_train_loss: 0.2360  avg_val_loss: 0.3446  time: 7s
Epoch 10 - Score: 0.3519


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2039(0.3446) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2329(0.2329) Grad: 0.5558  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2096(0.2345) Grad: 2.0698  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3133(0.2421) Grad: 1.4323  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3852(0.3852) 


Epoch 11 - avg_train_loss: 0.2421  avg_val_loss: 0.3360  time: 7s
Epoch 11 - Score: 0.3421
Epoch 11 - Save Best Score: 0.3421 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2003(0.3360) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1688(0.1688) Grad: 2.3809  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1815(0.2169) Grad: 1.7148  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2027(0.2128) Grad: 2.1555  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3453(0.3453) 


Epoch 12 - avg_train_loss: 0.2128  avg_val_loss: 0.3006  time: 7s
Epoch 12 - Score: 0.3048
Epoch 12 - Save Best Score: 0.3048 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1990(0.3006) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1942(0.1942) Grad: 1.3054  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1893(0.2252) Grad: 1.0298  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1720(0.2075) Grad: 2.3457  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3298(0.3298) 


Epoch 13 - avg_train_loss: 0.2075  avg_val_loss: 0.2786  time: 7s
Epoch 13 - Score: 0.2834
Epoch 13 - Save Best Score: 0.2834 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1831(0.2786) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1723(0.1723) Grad: 0.9768  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2037(0.1796) Grad: 4.3495  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1833(0.1805) Grad: 3.8868  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3458(0.3458) 


Epoch 14 - avg_train_loss: 0.1805  avg_val_loss: 0.2778  time: 7s
Epoch 14 - Score: 0.2862


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1517(0.2778) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1312(0.1312) Grad: 1.8215  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2048(0.1435) Grad: 1.1908  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1881(0.1447) Grad: 1.2407  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3165(0.3165) 


Epoch 15 - avg_train_loss: 0.1447  avg_val_loss: 0.2666  time: 7s
Epoch 15 - Score: 0.2735
Epoch 15 - Save Best Score: 0.2735 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1397(0.2666) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1202(0.1202) Grad: 2.1142  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1030(0.1358) Grad: 0.6426  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1534(0.1361) Grad: 0.8920  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3187(0.3187) 


Epoch 16 - avg_train_loss: 0.1361  avg_val_loss: 0.2551  time: 7s
Epoch 16 - Score: 0.2660
Epoch 16 - Save Best Score: 0.2660 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1017(0.2551) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1341(0.1341) Grad: 0.8030  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1404(0.1277) Grad: 0.8598  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1960(0.1284) Grad: 1.4882  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3497(0.3497) 


Epoch 17 - avg_train_loss: 0.1284  avg_val_loss: 0.2781  time: 7s
Epoch 17 - Score: 0.2882


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1330(0.2781) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0992(0.0992) Grad: 0.5345  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1577(0.1367) Grad: 4.1515  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1605(0.1526) Grad: 4.4386  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3928(0.3928) 


Epoch 18 - avg_train_loss: 0.1526  avg_val_loss: 0.3358  time: 7s
Epoch 18 - Score: 0.3402


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2422(0.3358) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2334(0.2334) Grad: 5.0276  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1981(0.2114) Grad: 1.5466  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1089(0.1856) Grad: 1.9456  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3280(0.3280) 


Epoch 19 - avg_train_loss: 0.1856  avg_val_loss: 0.2652  time: 7s
Epoch 19 - Score: 0.2724


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1543(0.2652) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1930(0.1930) Grad: 2.1836  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1191(0.1415) Grad: 1.2998  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0912(0.1290) Grad: 1.1779  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3564(0.3564) 


Epoch 20 - avg_train_loss: 0.1290  avg_val_loss: 0.2790  time: 7s
Epoch 20 - Score: 0.2890


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1465(0.2790) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0973(0.0973) Grad: 2.8824  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0940(0.1092) Grad: 1.5521  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1364(0.1134) Grad: 0.9309  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3407(0.3407) 


Epoch 21 - avg_train_loss: 0.1134  avg_val_loss: 0.2476  time: 7s
Epoch 21 - Score: 0.2614
Epoch 21 - Save Best Score: 0.2614 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1207(0.2476) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1506(0.1506) Grad: 0.7859  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1011(0.1115) Grad: 2.6078  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1118(0.1042) Grad: 1.7198  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3596(0.3596) 


Epoch 22 - avg_train_loss: 0.1042  avg_val_loss: 0.2681  time: 7s
Epoch 22 - Score: 0.2813


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1292(0.2681) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1274(0.1274) Grad: 0.8267  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0945(0.1186) Grad: 3.8225  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1410(0.1059) Grad: 0.6000  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3504(0.3504) 


Epoch 23 - avg_train_loss: 0.1059  avg_val_loss: 0.2533  time: 7s
Epoch 23 - Score: 0.2697


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0950(0.2533) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1166(0.1166) Grad: 0.7242  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1192(0.1125) Grad: 1.4876  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1000(0.1123) Grad: 3.9263  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3590(0.3590) 


Epoch 24 - avg_train_loss: 0.1123  avg_val_loss: 0.2852  time: 7s
Epoch 24 - Score: 0.2937


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1657(0.2852) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1308(0.1308) Grad: 3.1820  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1358(0.1178) Grad: 3.7082  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1423(0.1214) Grad: 2.0376  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3477(0.3477) 


Epoch 25 - avg_train_loss: 0.1214  avg_val_loss: 0.2672  time: 7s
Epoch 25 - Score: 0.2770


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1535(0.2672) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1386(0.1386) Grad: 2.9555  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0870(0.1196) Grad: 2.3073  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1102(0.1192) Grad: 2.1183  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3190(0.3190) 


Epoch 26 - avg_train_loss: 0.1192  avg_val_loss: 0.2575  time: 7s
Epoch 26 - Score: 0.2657


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1306(0.2575) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1053(0.1053) Grad: 0.8812  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1044(0.0962) Grad: 1.9613  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0980(0.0917) Grad: 1.5426  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3353(0.3353) 


Epoch 27 - avg_train_loss: 0.0917  avg_val_loss: 0.2478  time: 7s
Epoch 27 - Score: 0.2599
Epoch 27 - Save Best Score: 0.2599 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1316(0.2478) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0714(0.0714) Grad: 0.4848  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0550(0.0815) Grad: 0.5996  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0592(0.0745) Grad: 1.5254  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3319(0.3319) 


Epoch 28 - avg_train_loss: 0.0745  avg_val_loss: 0.2411  time: 7s
Epoch 28 - Score: 0.2540
Epoch 28 - Save Best Score: 0.2540 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1290(0.2411) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1028(0.1028) Grad: 0.7578  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0975(0.0832) Grad: 0.7882  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0647(0.0802) Grad: 0.9394  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3176(0.3176) 


Epoch 29 - avg_train_loss: 0.0802  avg_val_loss: 0.2413  time: 7s
Epoch 29 - Score: 0.2511
Epoch 29 - Save Best Score: 0.2511 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1339(0.2413) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0824(0.0824) Grad: 1.1910  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0975(0.0800) Grad: 1.9928  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0524(0.0817) Grad: 1.3393  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3602(0.3602) 


Epoch 30 - avg_train_loss: 0.0817  avg_val_loss: 0.2686  time: 7s
Epoch 30 - Score: 0.2810


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1440(0.2686) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0733(0.0733) Grad: 2.4744  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1011(0.0861) Grad: 3.6486  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1251(0.0898) Grad: 1.3215  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3419(0.3419) 


Epoch 31 - avg_train_loss: 0.0898  avg_val_loss: 0.2480  time: 7s
Epoch 31 - Score: 0.2618


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1237(0.2480) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0942(0.0942) Grad: 0.6380  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0756(0.0871) Grad: 1.6038  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0890(0.0876) Grad: 3.9321  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3257(0.3257) 


Epoch 32 - avg_train_loss: 0.0876  avg_val_loss: 0.2507  time: 7s
Epoch 32 - Score: 0.2589


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1691(0.2507) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0754(0.0754) Grad: 0.9645  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0633(0.0803) Grad: 1.2892  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0586(0.0772) Grad: 0.9961  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3349(0.3349) 


Epoch 33 - avg_train_loss: 0.0772  avg_val_loss: 0.2534  time: 7s
Epoch 33 - Score: 0.2639


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1422(0.2534) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0479(0.0479) Grad: 2.6953  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0605(0.0704) Grad: 2.5093  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0584(0.0673) Grad: 0.7481  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3328(0.3328) 


Epoch 34 - avg_train_loss: 0.0673  avg_val_loss: 0.2430  time: 7s
Epoch 34 - Score: 0.2558


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1270(0.2430) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0500(0.0500) Grad: 0.9003  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0586(0.0827) Grad: 4.4857  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0748(0.0746) Grad: 2.8029  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3395(0.3395) 


Epoch 35 - avg_train_loss: 0.0746  avg_val_loss: 0.2595  time: 7s
Epoch 35 - Score: 0.2696


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1441(0.2595) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0563(0.0563) Grad: 3.3163  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0695(0.0587) Grad: 1.5002  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0679(0.0682) Grad: 2.4704  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3269(0.3269) 


Epoch 36 - avg_train_loss: 0.0682  avg_val_loss: 0.2486  time: 7s
Epoch 36 - Score: 0.2575


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1652(0.2486) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0958(0.0958) Grad: 2.9293  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1047(0.0839) Grad: 1.6936  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0842(0.0894) Grad: 3.3614  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3302(0.3302) 


Epoch 37 - avg_train_loss: 0.0894  avg_val_loss: 0.2362  time: 7s
Epoch 37 - Score: 0.2496
Epoch 37 - Save Best Score: 0.2496 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1385(0.2362) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1142(0.1142) Grad: 3.4162  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0741(0.0885) Grad: 2.8837  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0615(0.0881) Grad: 1.9444  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3531(0.3531) 


Epoch 38 - avg_train_loss: 0.0881  avg_val_loss: 0.2546  time: 7s
Epoch 38 - Score: 0.2685


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1451(0.2546) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0702(0.0702) Grad: 1.1107  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0532(0.0641) Grad: 1.0233  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0675(0.0654) Grad: 3.1166  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3187(0.3187) 


Epoch 39 - avg_train_loss: 0.0654  avg_val_loss: 0.2336  time: 7s
Epoch 39 - Score: 0.2455
Epoch 39 - Save Best Score: 0.2455 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1249(0.2336) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0516(0.0516) Grad: 1.0027  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0491(0.0522) Grad: 2.8297  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0409(0.0560) Grad: 1.3452  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3410(0.3410) 


Epoch 40 - avg_train_loss: 0.0560  avg_val_loss: 0.2452  time: 7s
Epoch 40 - Score: 0.2593


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1288(0.2452) 


Score: 0.2455
Score: 0.2030


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 3.3776(3.3776) Grad: 3.2629  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7759(1.6079) Grad: 2.7453  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5932(1.2053) Grad: 0.3577  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6675(0.6675) 


Epoch 1 - avg_train_loss: 1.2053  avg_val_loss: 0.6291  time: 7s
Epoch 1 - Score: 0.6303
Epoch 1 - Save Best Score: 0.6303 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5566(0.6291) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6212(0.6212) Grad: 0.9696  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5472(0.5909) Grad: 0.4775  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6086(0.5954) Grad: 1.0187  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7064(0.7064) 


Epoch 2 - avg_train_loss: 0.5954  avg_val_loss: 0.6913  time: 7s
Epoch 2 - Score: 0.6915


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6606(0.6913) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5222(0.5222) Grad: 3.3712  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6366(0.5742) Grad: 1.1196  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5202(0.5512) Grad: 1.7941  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5020(0.5020) 


Epoch 3 - avg_train_loss: 0.5512  avg_val_loss: 0.4875  time: 7s
Epoch 3 - Score: 0.4882
Epoch 3 - Save Best Score: 0.4882 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4276(0.4875) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5710(0.5710) Grad: 2.7276  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4854(0.4967) Grad: 0.6083  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5257(0.4925) Grad: 1.1447  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4759(0.4759) 


Epoch 4 - avg_train_loss: 0.4925  avg_val_loss: 0.4634  time: 7s
Epoch 4 - Score: 0.4641
Epoch 4 - Save Best Score: 0.4641 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4072(0.4634) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3491(0.3491) Grad: 1.5348  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3572(0.4533) Grad: 1.8405  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5260(0.4603) Grad: 1.4643  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4170(0.4170) 


Epoch 5 - avg_train_loss: 0.4603  avg_val_loss: 0.4080  time: 7s
Epoch 5 - Score: 0.4086
Epoch 5 - Save Best Score: 0.4086 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3596(0.4080) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3282(0.3282) Grad: 1.2975  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3707(0.3959) Grad: 0.9684  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3200(0.3843) Grad: 1.3980  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4455(0.4455) 


Epoch 6 - avg_train_loss: 0.3843  avg_val_loss: 0.4442  time: 7s
Epoch 6 - Score: 0.4443


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4251(0.4442) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4281(0.4281) Grad: 3.7427  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3933(0.3814) Grad: 3.4860  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3168(0.3575) Grad: 1.7424  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3732(0.3732) 


Epoch 7 - avg_train_loss: 0.3575  avg_val_loss: 0.3983  time: 7s
Epoch 7 - Score: 0.3995
Epoch 7 - Save Best Score: 0.3995 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3704(0.3983) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2720(0.2720) Grad: 2.3311  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2578(0.3004) Grad: 2.5348  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1946(0.2865) Grad: 1.1590  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2817(0.2817) 


Epoch 8 - avg_train_loss: 0.2865  avg_val_loss: 0.2837  time: 7s
Epoch 8 - Score: 0.2855
Epoch 8 - Save Best Score: 0.2855 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2194(0.2837) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2120(0.2120) Grad: 1.8753  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3103(0.2262) Grad: 1.4385  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1952(0.2356) Grad: 1.5129  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2829(0.2829) 


Epoch 9 - avg_train_loss: 0.2356  avg_val_loss: 0.2861  time: 7s
Epoch 9 - Score: 0.2868


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2488(0.2861) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2070(0.2070) Grad: 2.3489  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2802(0.2331) Grad: 0.5742  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2607(0.2185) Grad: 1.6135  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2757(0.2757) 


Epoch 10 - avg_train_loss: 0.2185  avg_val_loss: 0.2848  time: 7s
Epoch 10 - Score: 0.2861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2376(0.2848) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1988(0.1988) Grad: 1.3618  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2041(0.2248) Grad: 1.5710  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2143(0.2178) Grad: 1.1337  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2758(0.2758) 


Epoch 11 - avg_train_loss: 0.2178  avg_val_loss: 0.2903  time: 7s
Epoch 11 - Score: 0.2927


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2284(0.2903) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1898(0.1898) Grad: 2.7281  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1243(0.2074) Grad: 2.5369  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2625(0.2139) Grad: 2.1275  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2467(0.2467) 


Epoch 12 - avg_train_loss: 0.2139  avg_val_loss: 0.2474  time: 7s
Epoch 12 - Score: 0.2483
Epoch 12 - Save Best Score: 0.2483 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2040(0.2474) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1517(0.1517) Grad: 1.4595  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2320(0.2231) Grad: 2.8663  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3140(0.2314) Grad: 3.1945  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2830(0.2830) 


Epoch 13 - avg_train_loss: 0.2314  avg_val_loss: 0.3014  time: 7s
Epoch 13 - Score: 0.3022


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2812(0.3014) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3267(0.3267) Grad: 3.6489  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2509(0.2161) Grad: 2.8833  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1286(0.2040) Grad: 2.4222  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2454(0.2454) 


Epoch 14 - avg_train_loss: 0.2040  avg_val_loss: 0.2559  time: 7s
Epoch 14 - Score: 0.2571


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2165(0.2559) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1504(0.1504) Grad: 2.3894  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1753(0.1768) Grad: 0.9940  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1468(0.1641) Grad: 3.5275  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2215(0.2215) 


Epoch 15 - avg_train_loss: 0.1641  avg_val_loss: 0.2403  time: 7s
Epoch 15 - Score: 0.2423
Epoch 15 - Save Best Score: 0.2423 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1991(0.2403) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1512(0.1512) Grad: 0.9600  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1646(0.1352) Grad: 0.6536  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1524(0.1436) Grad: 1.3069  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1957(0.1957) 


Epoch 16 - avg_train_loss: 0.1436  avg_val_loss: 0.2163  time: 7s
Epoch 16 - Score: 0.2182
Epoch 16 - Save Best Score: 0.2182 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1846(0.2163) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2406(0.2406) Grad: 1.8193  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1301(0.1606) Grad: 0.9617  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1080(0.1547) Grad: 1.6098  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2009(0.2009) 


Epoch 17 - avg_train_loss: 0.1547  avg_val_loss: 0.2149  time: 7s
Epoch 17 - Score: 0.2174
Epoch 17 - Save Best Score: 0.2174 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1615(0.2149) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1469(0.1469) Grad: 1.9343  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1396(0.1585) Grad: 4.1934  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1429(0.1567) Grad: 0.9170  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2205(0.2205) 


Epoch 18 - avg_train_loss: 0.1567  avg_val_loss: 0.2365  time: 7s
Epoch 18 - Score: 0.2374


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2162(0.2365) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1755(0.1755) Grad: 1.5356  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1569(0.1568) Grad: 1.2657  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1203(0.1535) Grad: 2.5838  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1696(0.1696) 


Epoch 19 - avg_train_loss: 0.1535  avg_val_loss: 0.1964  time: 7s
Epoch 19 - Score: 0.1980
Epoch 19 - Save Best Score: 0.1980 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1941(0.1964) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1011(0.1011) Grad: 3.8435  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1008(0.1373) Grad: 1.2785  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1812(0.1371) Grad: 0.6696  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1821(0.1821) 


Epoch 20 - avg_train_loss: 0.1371  avg_val_loss: 0.2057  time: 7s
Epoch 20 - Score: 0.2089


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1599(0.2057) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1124(0.1124) Grad: 3.5760  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0839(0.1215) Grad: 0.6313  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0949(0.1142) Grad: 2.4654  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1801(0.1801) 


Epoch 21 - avg_train_loss: 0.1142  avg_val_loss: 0.1834  time: 7s
Epoch 21 - Score: 0.1858
Epoch 21 - Save Best Score: 0.1858 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1248(0.1834) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0803(0.0803) Grad: 0.9079  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0825(0.0989) Grad: 1.6345  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1354(0.1022) Grad: 1.6127  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1723(0.1723) 


Epoch 22 - avg_train_loss: 0.1022  avg_val_loss: 0.1896  time: 7s
Epoch 22 - Score: 0.1914


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1565(0.1896) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0821(0.0821) Grad: 1.4455  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1489(0.1056) Grad: 1.7709  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1203(0.1017) Grad: 0.8433  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1736(0.1736) 


Epoch 23 - avg_train_loss: 0.1017  avg_val_loss: 0.1884  time: 7s
Epoch 23 - Score: 0.1900


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1553(0.1884) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1155(0.1155) Grad: 1.3071  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1079(0.1091) Grad: 1.8430  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1251(0.1116) Grad: 2.8402  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1713(0.1713) 


Epoch 24 - avg_train_loss: 0.1116  avg_val_loss: 0.1867  time: 7s
Epoch 24 - Score: 0.1883


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1555(0.1867) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0959(0.0959) Grad: 1.9254  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1249(0.1495) Grad: 2.9142  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1387(0.1419) Grad: 1.3157  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2133(0.2133) 


Epoch 25 - avg_train_loss: 0.1419  avg_val_loss: 0.2368  time: 7s
Epoch 25 - Score: 0.2376


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2581(0.2368) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1504(0.1504) Grad: 1.0156  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1169(0.1447) Grad: 1.8362  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1173(0.1350) Grad: 0.8095  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1682(0.1682) 


Epoch 26 - avg_train_loss: 0.1350  avg_val_loss: 0.2150  time: 7s
Epoch 26 - Score: 0.2186


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2396(0.2150) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0949(0.0949) Grad: 2.4571  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0684(0.1093) Grad: 2.6966  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0862(0.0964) Grad: 0.7395  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1498(0.1498) 


Epoch 27 - avg_train_loss: 0.0964  avg_val_loss: 0.1793  time: 7s
Epoch 27 - Score: 0.1821
Epoch 27 - Save Best Score: 0.1821 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1595(0.1793) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1191(0.1191) Grad: 1.2229  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0745(0.0900) Grad: 1.9047  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0676(0.0827) Grad: 1.2334  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1581(0.1581) 


Epoch 28 - avg_train_loss: 0.0827  avg_val_loss: 0.1818  time: 7s
Epoch 28 - Score: 0.1837


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1649(0.1818) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0778(0.0778) Grad: 0.8030  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0699(0.0792) Grad: 1.0077  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0690(0.0868) Grad: 2.0379  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1452(0.1452) 


Epoch 29 - avg_train_loss: 0.0868  avg_val_loss: 0.1795  time: 7s
Epoch 29 - Score: 0.1821
Epoch 29 - Save Best Score: 0.1821 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1831(0.1795) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0872(0.0872) Grad: 0.9018  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0728(0.0846) Grad: 1.4947  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1534(0.0861) Grad: 2.1766  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1758(0.1758) 


Epoch 30 - avg_train_loss: 0.0861  avg_val_loss: 0.1857  time: 7s
Epoch 30 - Score: 0.1861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1722(0.1857) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0941(0.0941) Grad: 1.5048  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1121(0.1033) Grad: 4.8717  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1416(0.1125) Grad: 4.6496  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2413(0.2413) 


Epoch 31 - avg_train_loss: 0.1125  avg_val_loss: 0.2506  time: 7s
Epoch 31 - Score: 0.2515


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2145(0.2506) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1615(0.1615) Grad: 4.4674  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1615(0.1135) Grad: 2.4457  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0907(0.1057) Grad: 3.0210  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1736(0.1736) 


Epoch 32 - avg_train_loss: 0.1057  avg_val_loss: 0.1926  time: 7s
Epoch 32 - Score: 0.1935


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1858(0.1926) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0704(0.0704) Grad: 1.0117  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0874(0.0937) Grad: 0.9807  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0765(0.0875) Grad: 2.8217  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1502(0.1502) 


Epoch 33 - avg_train_loss: 0.0875  avg_val_loss: 0.1748  time: 7s
Epoch 33 - Score: 0.1765
Epoch 33 - Save Best Score: 0.1765 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1669(0.1748) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0977(0.0977) Grad: 0.8136  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0604(0.0757) Grad: 1.2708  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0639(0.0680) Grad: 0.6494  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1570(0.1570) 


Epoch 34 - avg_train_loss: 0.0680  avg_val_loss: 0.1785  time: 7s
Epoch 34 - Score: 0.1803


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1577(0.1785) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0664(0.0664) Grad: 0.6383  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0578(0.0694) Grad: 2.5123  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0702(0.0711) Grad: 1.6375  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1558(0.1558) 


Epoch 35 - avg_train_loss: 0.0711  avg_val_loss: 0.1749  time: 7s
Epoch 35 - Score: 0.1758
Epoch 35 - Save Best Score: 0.1758 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1715(0.1749) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0597(0.0597) Grad: 0.5704  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0660(0.0744) Grad: 1.3097  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0661(0.0711) Grad: 2.6816  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1546(0.1546) 


Epoch 36 - avg_train_loss: 0.0711  avg_val_loss: 0.1784  time: 7s
Epoch 36 - Score: 0.1801


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1660(0.1784) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0833(0.0833) Grad: 1.2471  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0819(0.0834) Grad: 4.1243  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0898(0.0836) Grad: 1.1753  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1610(0.1610) 


Epoch 37 - avg_train_loss: 0.0836  avg_val_loss: 0.1761  time: 7s
Epoch 37 - Score: 0.1776


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1484(0.1761) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0728(0.0728) Grad: 3.3542  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1163(0.0867) Grad: 1.9210  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0659(0.0852) Grad: 2.3610  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1506(0.1506) 


Epoch 38 - avg_train_loss: 0.0852  avg_val_loss: 0.1756  time: 8s
Epoch 38 - Score: 0.1769


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1873(0.1756) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0689(0.0689) Grad: 1.7668  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0679(0.0707) Grad: 3.3758  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0547(0.0662) Grad: 0.9805  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1506(0.1506) 


Epoch 39 - avg_train_loss: 0.0662  avg_val_loss: 0.1741  time: 7s
Epoch 39 - Score: 0.1755
Epoch 39 - Save Best Score: 0.1755 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1680(0.1741) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0516(0.0516) Grad: 1.1185  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0617(0.0608) Grad: 1.1401  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0490(0.0582) Grad: 2.2825  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1540(0.1540) 


Epoch 40 - avg_train_loss: 0.0582  avg_val_loss: 0.1723  time: 7s
Epoch 40 - Score: 0.1739
Epoch 40 - Save Best Score: 0.1739 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1483(0.1723) 


Score: 0.1739


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3665(3.3665) Grad: 3.2485  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.8477(1.5727) Grad: 2.2656  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5196(1.1640) Grad: 1.0256  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6503(0.6503) 


Epoch 1 - avg_train_loss: 1.1640  avg_val_loss: 0.6266  time: 7s
Epoch 1 - Score: 0.6277
Epoch 1 - Save Best Score: 0.6277 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6753(0.6266) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4964(0.4964) Grad: 0.2085  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4617(0.5908) Grad: 0.3393  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5728(0.5862) Grad: 2.1879  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5760(0.5760) 


Epoch 2 - avg_train_loss: 0.5862  avg_val_loss: 0.5803  time: 7s
Epoch 2 - Score: 0.5817
Epoch 2 - Save Best Score: 0.5817 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6658(0.5803) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4851(0.4851) Grad: 2.9598  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4620(0.5137) Grad: 2.5621  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3820(0.4927) Grad: 0.9389  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4755(0.4755) 


Epoch 3 - avg_train_loss: 0.4927  avg_val_loss: 0.4534  time: 7s
Epoch 3 - Score: 0.4539
Epoch 3 - Save Best Score: 0.4539 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4612(0.4534) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4388(0.4388) Grad: 1.5659  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4076(0.4257) Grad: 0.6649  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5352(0.4357) Grad: 2.1383  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4728(0.4728) 


Epoch 4 - avg_train_loss: 0.4357  avg_val_loss: 0.4474  time: 7s
Epoch 4 - Score: 0.4483
Epoch 4 - Save Best Score: 0.4483 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4664(0.4474) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5556(0.5556) Grad: 1.7508  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3997(0.4359) Grad: 1.8776  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4650(0.4405) Grad: 1.3025  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4170(0.4170) 


Epoch 5 - avg_train_loss: 0.4405  avg_val_loss: 0.3959  time: 7s
Epoch 5 - Score: 0.3963
Epoch 5 - Save Best Score: 0.3963 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3949(0.3959) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4820(0.4820) Grad: 1.2770  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5097(0.3961) Grad: 1.6622  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3032(0.3755) Grad: 0.9127  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3665(0.3665) 


Epoch 6 - avg_train_loss: 0.3755  avg_val_loss: 0.3737  time: 7s
Epoch 6 - Score: 0.3739
Epoch 6 - Save Best Score: 0.3739 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4020(0.3737) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3152(0.3152) Grad: 2.2008  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3952(0.3722) Grad: 0.8549  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3416(0.3750) Grad: 0.6741  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3785(0.3785) 


Epoch 7 - avg_train_loss: 0.3750  avg_val_loss: 0.3518  time: 7s
Epoch 7 - Score: 0.3526
Epoch 7 - Save Best Score: 0.3526 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3130(0.3518) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3613(0.3613) Grad: 0.8711  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3353(0.3239) Grad: 1.5447  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3001(0.3213) Grad: 1.2995  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3149(0.3149) 


Epoch 8 - avg_train_loss: 0.3213  avg_val_loss: 0.3127  time: 7s
Epoch 8 - Score: 0.3128
Epoch 8 - Save Best Score: 0.3128 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3289(0.3127) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3545(0.3545) Grad: 1.2062  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2772(0.2759) Grad: 1.0681  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2377(0.2728) Grad: 2.7435  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2675(0.2675) 


Epoch 9 - avg_train_loss: 0.2728  avg_val_loss: 0.2748  time: 7s
Epoch 9 - Score: 0.2749
Epoch 9 - Save Best Score: 0.2749 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2914(0.2748) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2896(0.2896) Grad: 1.8659  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2855(0.2688) Grad: 1.0619  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2516(0.2520) Grad: 0.8766  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2706(0.2706) 


Epoch 10 - avg_train_loss: 0.2520  avg_val_loss: 0.2664  time: 7s
Epoch 10 - Score: 0.2665
Epoch 10 - Save Best Score: 0.2665 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2591(0.2664) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2023(0.2023) Grad: 0.4952  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2354(0.2649) Grad: 1.0531  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2675(0.2489) Grad: 0.5408  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2546(0.2546) 


Epoch 11 - avg_train_loss: 0.2489  avg_val_loss: 0.2557  time: 7s
Epoch 11 - Score: 0.2558
Epoch 11 - Save Best Score: 0.2558 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2501(0.2557) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2375(0.2375) Grad: 0.6695  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2191(0.2357) Grad: 0.8821  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2283(0.2225) Grad: 2.7433  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2590(0.2590) 


Epoch 12 - avg_train_loss: 0.2225  avg_val_loss: 0.2663  time: 7s
Epoch 12 - Score: 0.2666


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2946(0.2663) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2570(0.2570) Grad: 1.4970  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2673(0.2389) Grad: 0.8828  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2330(0.2279) Grad: 0.9220  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2371(0.2371) 


Epoch 13 - avg_train_loss: 0.2279  avg_val_loss: 0.2498  time: 7s
Epoch 13 - Score: 0.2500
Epoch 13 - Save Best Score: 0.2500 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2473(0.2498) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1956(0.1956) Grad: 2.1136  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1728(0.1957) Grad: 1.2915  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1430(0.1834) Grad: 1.9429  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2293(0.2293) 


Epoch 14 - avg_train_loss: 0.1834  avg_val_loss: 0.2350  time: 7s
Epoch 14 - Score: 0.2351
Epoch 14 - Save Best Score: 0.2351 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2256(0.2350) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1365(0.1365) Grad: 1.6234  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1604(0.1668) Grad: 3.1977  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1558(0.1615) Grad: 0.8182  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1997(0.1997) 


Epoch 15 - avg_train_loss: 0.1615  avg_val_loss: 0.2152  time: 7s
Epoch 15 - Score: 0.2158
Epoch 15 - Save Best Score: 0.2158 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2048(0.2152) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1139(0.1139) Grad: 2.4441  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1841(0.1402) Grad: 1.1464  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1663(0.1394) Grad: 2.9511  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1931(0.1931) 


Epoch 16 - avg_train_loss: 0.1394  avg_val_loss: 0.2135  time: 7s
Epoch 16 - Score: 0.2145
Epoch 16 - Save Best Score: 0.2145 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2046(0.2135) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1202(0.1202) Grad: 0.9486  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1627(0.1354) Grad: 1.4258  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0817(0.1352) Grad: 2.3781  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1846(0.1846) 


Epoch 17 - avg_train_loss: 0.1352  avg_val_loss: 0.2030  time: 7s
Epoch 17 - Score: 0.2036
Epoch 17 - Save Best Score: 0.2036 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2090(0.2030) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1885(0.1885) Grad: 1.1727  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1194(0.1607) Grad: 2.0769  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1402(0.1590) Grad: 3.5123  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1920(0.1920) 


Epoch 18 - avg_train_loss: 0.1590  avg_val_loss: 0.1970  time: 7s
Epoch 18 - Score: 0.1972
Epoch 18 - Save Best Score: 0.1972 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2156(0.1970) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1336(0.1336) Grad: 0.5490  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1056(0.1678) Grad: 2.8011  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1489(0.1644) Grad: 1.5167  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2037(0.2037) 


Epoch 19 - avg_train_loss: 0.1644  avg_val_loss: 0.2224  time: 7s
Epoch 19 - Score: 0.2237


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2724(0.2224) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1638(0.1638) Grad: 1.6598  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1909(0.1383) Grad: 1.1307  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1637(0.1368) Grad: 2.1420  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1882(0.1882) 


Epoch 20 - avg_train_loss: 0.1368  avg_val_loss: 0.1983  time: 7s
Epoch 20 - Score: 0.1986


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2162(0.1983) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1155(0.1155) Grad: 1.1350  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1512(0.1233) Grad: 1.0079  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1069(0.1198) Grad: 2.5100  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1879(0.1879) 


Epoch 21 - avg_train_loss: 0.1198  avg_val_loss: 0.1976  time: 7s
Epoch 21 - Score: 0.1980


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1856(0.1976) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0686(0.0686) Grad: 2.2306  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0854(0.1102) Grad: 0.8548  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1245(0.1090) Grad: 0.9345  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1744(0.1744) 


Epoch 22 - avg_train_loss: 0.1090  avg_val_loss: 0.1864  time: 7s
Epoch 22 - Score: 0.1867
Epoch 22 - Save Best Score: 0.1867 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1936(0.1864) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1090(0.1090) Grad: 1.1836  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0814(0.1048) Grad: 1.1618  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0833(0.1034) Grad: 1.2283  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1774(0.1774) 


Epoch 23 - avg_train_loss: 0.1034  avg_val_loss: 0.1917  time: 7s
Epoch 23 - Score: 0.1920


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2029(0.1917) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0816(0.0816) Grad: 0.8179  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0979(0.0989) Grad: 1.8326  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0992(0.1056) Grad: 1.4502  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1664(0.1664) 


Epoch 24 - avg_train_loss: 0.1056  avg_val_loss: 0.1885  time: 7s
Epoch 24 - Score: 0.1895


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1896(0.1885) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1387(0.1387) Grad: 1.7449  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1274(0.1263) Grad: 2.8243  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1344(0.1397) Grad: 2.5952  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1841(0.1841) 


Epoch 25 - avg_train_loss: 0.1397  avg_val_loss: 0.1936  time: 7s
Epoch 25 - Score: 0.1938


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1929(0.1936) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1269(0.1269) Grad: 2.0695  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1071(0.1033) Grad: 2.6541  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1316(0.1142) Grad: 1.1182  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1808(0.1808) 


Epoch 26 - avg_train_loss: 0.1142  avg_val_loss: 0.2148  time: 7s
Epoch 26 - Score: 0.2170


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2618(0.2148) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1374(0.1374) Grad: 2.5017  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1028(0.1040) Grad: 1.4380  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0505(0.0956) Grad: 1.8408  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1731(0.1731) 


Epoch 27 - avg_train_loss: 0.0956  avg_val_loss: 0.1825  time: 7s
Epoch 27 - Score: 0.1827
Epoch 27 - Save Best Score: 0.1827 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1868(0.1825) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0530(0.0530) Grad: 1.2605  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0590(0.0777) Grad: 2.3174  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0571(0.0738) Grad: 1.6326  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1558(0.1558) 


Epoch 28 - avg_train_loss: 0.0738  avg_val_loss: 0.1875  time: 7s
Epoch 28 - Score: 0.1894


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2196(0.1875) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0739(0.0739) Grad: 0.6167  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0583(0.0820) Grad: 1.6299  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1002(0.0816) Grad: 1.0710  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1625(0.1625) 


Epoch 29 - avg_train_loss: 0.0816  avg_val_loss: 0.1902  time: 7s
Epoch 29 - Score: 0.1916


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2166(0.1902) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0832(0.0832) Grad: 2.2792  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0888(0.0919) Grad: 1.1562  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0687(0.0886) Grad: 0.8447  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1586(0.1586) 


Epoch 30 - avg_train_loss: 0.0886  avg_val_loss: 0.1920  time: 7s
Epoch 30 - Score: 0.1942


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2329(0.1920) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0474(0.0474) Grad: 1.0530  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0741(0.0910) Grad: 1.5392  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0857(0.0911) Grad: 1.1886  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1550(0.1550) 


Epoch 31 - avg_train_loss: 0.0911  avg_val_loss: 0.1790  time: 7s
Epoch 31 - Score: 0.1805
Epoch 31 - Save Best Score: 0.1805 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2180(0.1790) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1185(0.1185) Grad: 1.4771  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1367(0.1105) Grad: 3.3078  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0985(0.1039) Grad: 2.6313  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1598(0.1598) 


Epoch 32 - avg_train_loss: 0.1039  avg_val_loss: 0.1821  time: 7s
Epoch 32 - Score: 0.1838


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2324(0.1821) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0931(0.0931) Grad: 1.5988  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0865(0.0772) Grad: 2.2598  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0599(0.0752) Grad: 2.0296  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1527(0.1527) 


Epoch 33 - avg_train_loss: 0.0752  avg_val_loss: 0.1786  time: 7s
Epoch 33 - Score: 0.1802
Epoch 33 - Save Best Score: 0.1802 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2186(0.1786) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0608(0.0608) Grad: 1.1576  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0690(0.0642) Grad: 0.8620  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0575(0.0613) Grad: 0.7678  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1560(0.1560) 


Epoch 34 - avg_train_loss: 0.0613  avg_val_loss: 0.1737  time: 7s
Epoch 34 - Score: 0.1746
Epoch 34 - Save Best Score: 0.1746 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2041(0.1737) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0731(0.0731) Grad: 0.6507  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0626(0.0682) Grad: 3.7534  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0461(0.0605) Grad: 3.6062  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1524(0.1524) 


Epoch 35 - avg_train_loss: 0.0605  avg_val_loss: 0.1772  time: 7s
Epoch 35 - Score: 0.1789


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2208(0.1772) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0580(0.0580) Grad: 0.6367  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0811(0.0717) Grad: 2.7689  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1013(0.0750) Grad: 0.5632  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1606(0.1606) 


Epoch 36 - avg_train_loss: 0.0750  avg_val_loss: 0.1832  time: 7s
Epoch 36 - Score: 0.1842


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1892(0.1832) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1039(0.1039) Grad: 0.9299  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0757(0.0814) Grad: 0.8698  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1150(0.0890) Grad: 4.4164  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1895(0.1895) 


Epoch 37 - avg_train_loss: 0.0890  avg_val_loss: 0.1973  time: 7s
Epoch 37 - Score: 0.1979


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2316(0.1973) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1124(0.1124) Grad: 4.4877  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0723(0.0919) Grad: 1.4546  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0822(0.0848) Grad: 3.2838  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1666(0.1666) 


Epoch 38 - avg_train_loss: 0.0848  avg_val_loss: 0.1864  time: 7s
Epoch 38 - Score: 0.1875


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2230(0.1864) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1135(0.1135) Grad: 3.6892  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0484(0.0704) Grad: 1.5547  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0806(0.0674) Grad: 1.0071  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1709(0.1709) 


Epoch 39 - avg_train_loss: 0.0674  avg_val_loss: 0.1717  time: 7s
Epoch 39 - Score: 0.1717
Epoch 39 - Save Best Score: 0.1717 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1823(0.1717) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0427(0.0427) Grad: 1.6958  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0385(0.0520) Grad: 0.4466  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0529(0.0578) Grad: 0.7568  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1544(0.1544) 


Epoch 40 - avg_train_loss: 0.0578  avg_val_loss: 0.1710  time: 7s
Epoch 40 - Score: 0.1716
Epoch 40 - Save Best Score: 0.1716 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1837(0.1710) 


Score: 0.1716


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3298(3.3298) Grad: 3.1129  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6347(1.6106) Grad: 3.6415  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5374(1.1889) Grad: 0.9663  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6299(0.6299) 


Epoch 1 - avg_train_loss: 1.1889  avg_val_loss: 0.5932  time: 7s
Epoch 1 - Score: 0.5940
Epoch 1 - Save Best Score: 0.5940 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5723(0.5932) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6752(0.6752) Grad: 0.4316  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6055(0.5911) Grad: 2.1861  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4806(0.5665) Grad: 1.9733  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5886(0.5886) 


Epoch 2 - avg_train_loss: 0.5665  avg_val_loss: 0.4933  time: 7s
Epoch 2 - Score: 0.5001
Epoch 2 - Save Best Score: 0.5001 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3926(0.4933) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4843(0.4843) Grad: 1.9532  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3963(0.4700) Grad: 2.6643  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3599(0.4450) Grad: 1.4200  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5279(0.5279) 


Epoch 3 - avg_train_loss: 0.4450  avg_val_loss: 0.4397  time: 7s
Epoch 3 - Score: 0.4487
Epoch 3 - Save Best Score: 0.4487 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2750(0.4397) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4948(0.4948) Grad: 1.3989  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3351(0.4124) Grad: 1.6965  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3290(0.3949) Grad: 1.8343  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5173(0.5173) 


Epoch 4 - avg_train_loss: 0.3949  avg_val_loss: 0.4312  time: 7s
Epoch 4 - Score: 0.4399
Epoch 4 - Save Best Score: 0.4399 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2722(0.4312) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4152(0.4152) Grad: 1.7565  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5182(0.4099) Grad: 4.2922  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3377(0.3940) Grad: 3.4635  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4821(0.4821) 


Epoch 5 - avg_train_loss: 0.3940  avg_val_loss: 0.4484  time: 7s
Epoch 5 - Score: 0.4525


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3126(0.4484) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3596(0.3596) Grad: 3.9753  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3063(0.3710) Grad: 1.3387  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2297(0.3592) Grad: 0.4646  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4957(0.4957) 


Epoch 6 - avg_train_loss: 0.3592  avg_val_loss: 0.4694  time: 7s
Epoch 6 - Score: 0.4725


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3488(0.4694) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3777(0.3777) Grad: 4.1049  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3966(0.3541) Grad: 2.9472  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3914(0.3440) Grad: 0.9159  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4550(0.4550) 


Epoch 7 - avg_train_loss: 0.3440  avg_val_loss: 0.3787  time: 7s
Epoch 7 - Score: 0.3848
Epoch 7 - Save Best Score: 0.3848 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2795(0.3787) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3231(0.3231) Grad: 0.8469  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2439(0.3001) Grad: 0.9806  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2807(0.2837) Grad: 1.3470  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3789(0.3789) 


Epoch 8 - avg_train_loss: 0.2837  avg_val_loss: 0.3351  time: 7s
Epoch 8 - Score: 0.3389
Epoch 8 - Save Best Score: 0.3389 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2333(0.3351) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2601(0.2601) Grad: 0.8644  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3292(0.2577) Grad: 1.2188  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2582(0.2451) Grad: 1.1852  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3632(0.3632) 


Epoch 9 - avg_train_loss: 0.2451  avg_val_loss: 0.3240  time: 7s
Epoch 9 - Score: 0.3265
Epoch 9 - Save Best Score: 0.3265 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2484(0.3240) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2997(0.2997) Grad: 1.1034  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2186(0.2253) Grad: 0.8460  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2481(0.2245) Grad: 1.3374  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3609(0.3609) 


Epoch 10 - avg_train_loss: 0.2245  avg_val_loss: 0.3175  time: 7s
Epoch 10 - Score: 0.3210
Epoch 10 - Save Best Score: 0.3210 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2250(0.3175) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2771(0.2771) Grad: 0.6832  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2569(0.2409) Grad: 1.8326  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2133(0.2279) Grad: 2.7411  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3299(0.3299) 


Epoch 11 - avg_train_loss: 0.2279  avg_val_loss: 0.2944  time: 7s
Epoch 11 - Score: 0.2983
Epoch 11 - Save Best Score: 0.2983 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1918(0.2944) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2524(0.2524) Grad: 0.9150  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2011(0.2454) Grad: 2.2654  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2307(0.2288) Grad: 2.5111  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3115(0.3115) 


Epoch 12 - avg_train_loss: 0.2288  avg_val_loss: 0.2825  time: 7s
Epoch 12 - Score: 0.2861
Epoch 12 - Save Best Score: 0.2861 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1837(0.2825) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1577(0.1577) Grad: 1.2904  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2569(0.2077) Grad: 1.3348  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2959(0.2163) Grad: 5.0092  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3358(0.3358) 


Epoch 13 - avg_train_loss: 0.2163  avg_val_loss: 0.3207  time: 7s
Epoch 13 - Score: 0.3214


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2748(0.3207) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3295(0.3295) Grad: 1.1974  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1581(0.2126) Grad: 0.9615  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1792(0.1903) Grad: 3.3654  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3065(0.3065) 


Epoch 14 - avg_train_loss: 0.1903  avg_val_loss: 0.2773  time: 7s
Epoch 14 - Score: 0.2807
Epoch 14 - Save Best Score: 0.2807 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1824(0.2773) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2536(0.2536) Grad: 2.4015  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1183(0.1530) Grad: 2.7573  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1687(0.1515) Grad: 1.3412  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2741(0.2741) 


Epoch 15 - avg_train_loss: 0.1515  avg_val_loss: 0.2631  time: 7s
Epoch 15 - Score: 0.2649
Epoch 15 - Save Best Score: 0.2649 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1940(0.2631) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1576(0.1576) Grad: 2.8619  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1626(0.1482) Grad: 0.8045  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0951(0.1412) Grad: 1.3995  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2794(0.2794) 


Epoch 16 - avg_train_loss: 0.1412  avg_val_loss: 0.2539  time: 7s
Epoch 16 - Score: 0.2563
Epoch 16 - Save Best Score: 0.2563 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1777(0.2539) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1745(0.1745) Grad: 1.4698  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1249(0.1436) Grad: 1.3282  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1901(0.1488) Grad: 2.2300  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2938(0.2938) 


Epoch 17 - avg_train_loss: 0.1488  avg_val_loss: 0.2666  time: 7s
Epoch 17 - Score: 0.2695


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1818(0.2666) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1579(0.1579) Grad: 1.7504  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1179(0.1366) Grad: 1.6977  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1443(0.1375) Grad: 1.8685  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2623(0.2623) 


Epoch 18 - avg_train_loss: 0.1375  avg_val_loss: 0.2489  time: 7s
Epoch 18 - Score: 0.2511
Epoch 18 - Save Best Score: 0.2511 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1759(0.2489) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0910(0.0910) Grad: 2.1815  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1802(0.1328) Grad: 2.8977  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1532(0.1411) Grad: 3.5204  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2931(0.2931) 


Epoch 19 - avg_train_loss: 0.1411  avg_val_loss: 0.2526  time: 7s
Epoch 19 - Score: 0.2567


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1613(0.2526) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1317(0.1317) Grad: 0.9968  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1276(0.1335) Grad: 1.1583  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1151(0.1361) Grad: 1.3005  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2846(0.2846) 


Epoch 20 - avg_train_loss: 0.1361  avg_val_loss: 0.2546  time: 7s
Epoch 20 - Score: 0.2579


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1678(0.2546) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1183(0.1183) Grad: 0.6920  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0912(0.1160) Grad: 2.1758  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1267(0.1136) Grad: 1.1395  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2730(0.2730) 


Epoch 21 - avg_train_loss: 0.1136  avg_val_loss: 0.2537  time: 7s
Epoch 21 - Score: 0.2560


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1772(0.2537) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1423(0.1423) Grad: 2.2899  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0937(0.1127) Grad: 0.5679  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0736(0.1011) Grad: 0.7424  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2710(0.2710) 


Epoch 22 - avg_train_loss: 0.1011  avg_val_loss: 0.2435  time: 7s
Epoch 22 - Score: 0.2467
Epoch 22 - Save Best Score: 0.2467 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1584(0.2435) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1060(0.1060) Grad: 0.6185  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0752(0.1160) Grad: 0.9820  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0845(0.1083) Grad: 0.8600  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2617(0.2617) 


Epoch 23 - avg_train_loss: 0.1083  avg_val_loss: 0.2377  time: 7s
Epoch 23 - Score: 0.2407
Epoch 23 - Save Best Score: 0.2407 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1558(0.2377) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0779(0.0779) Grad: 0.7772  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1009(0.0943) Grad: 1.6674  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1404(0.1056) Grad: 0.6761  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2787(0.2787) 


Epoch 24 - avg_train_loss: 0.1056  avg_val_loss: 0.2509  time: 7s
Epoch 24 - Score: 0.2538


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1690(0.2509) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1068(0.1068) Grad: 1.9281  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1235(0.1184) Grad: 3.4847  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1164(0.1236) Grad: 1.8030  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2864(0.2864) 


Epoch 25 - avg_train_loss: 0.1236  avg_val_loss: 0.2566  time: 7s
Epoch 25 - Score: 0.2619


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1413(0.2566) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1517(0.1517) Grad: 0.9014  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0909(0.1155) Grad: 1.1938  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1076(0.1103) Grad: 1.2638  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2569(0.2569) 


Epoch 26 - avg_train_loss: 0.1103  avg_val_loss: 0.2531  time: 7s
Epoch 26 - Score: 0.2546


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1922(0.2531) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1061(0.1061) Grad: 1.7851  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0962(0.0913) Grad: 0.6763  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1227(0.0904) Grad: 0.4949  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2580(0.2580) 


Epoch 27 - avg_train_loss: 0.0904  avg_val_loss: 0.2388  time: 7s
Epoch 27 - Score: 0.2416


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1566(0.2388) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0824(0.0824) Grad: 1.4679  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0672(0.0819) Grad: 0.9116  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0802(0.0794) Grad: 0.7308  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2514(0.2514) 


Epoch 28 - avg_train_loss: 0.0794  avg_val_loss: 0.2442  time: 7s
Epoch 28 - Score: 0.2460


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1797(0.2442) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0668(0.0668) Grad: 1.0055  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0961(0.0758) Grad: 2.4743  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0849(0.0869) Grad: 3.2851  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2593(0.2593) 


Epoch 29 - avg_train_loss: 0.0869  avg_val_loss: 0.2436  time: 7s
Epoch 29 - Score: 0.2476


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1461(0.2436) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0656(0.0656) Grad: 2.9204  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0814(0.0853) Grad: 2.8117  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0685(0.0803) Grad: 1.4085  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2741(0.2741) 


Epoch 30 - avg_train_loss: 0.0803  avg_val_loss: 0.2453  time: 7s
Epoch 30 - Score: 0.2497


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1430(0.2453) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0887(0.0887) Grad: 2.7241  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0907(0.0883) Grad: 0.5748  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0861(0.0940) Grad: 2.0019  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2832(0.2832) 


Epoch 31 - avg_train_loss: 0.0940  avg_val_loss: 0.2497  time: 7s
Epoch 31 - Score: 0.2547


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1407(0.2497) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0834(0.0834) Grad: 2.3849  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1287(0.0976) Grad: 1.0262  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0790(0.0940) Grad: 1.6127  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2674(0.2674) 


Epoch 32 - avg_train_loss: 0.0940  avg_val_loss: 0.2469  time: 7s
Epoch 32 - Score: 0.2511


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1454(0.2469) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0724(0.0724) Grad: 0.7748  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0662(0.0756) Grad: 3.2224  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0632(0.0717) Grad: 2.3672  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2647(0.2647) 


Epoch 33 - avg_train_loss: 0.0717  avg_val_loss: 0.2428  time: 7s
Epoch 33 - Score: 0.2458


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1583(0.2428) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1112(0.1112) Grad: 3.0427  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0597(0.0728) Grad: 0.8568  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0695(0.0679) Grad: 1.6818  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2557(0.2557) 


Epoch 34 - avg_train_loss: 0.0679  avg_val_loss: 0.2309  time: 7s
Epoch 34 - Score: 0.2358
Epoch 34 - Save Best Score: 0.2358 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1248(0.2309) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0696(0.0696) Grad: 2.5092  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0661(0.0728) Grad: 2.5920  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0710(0.0699) Grad: 3.0783  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2600(0.2600) 


Epoch 35 - avg_train_loss: 0.0699  avg_val_loss: 0.2301  time: 7s
Epoch 35 - Score: 0.2335
Epoch 35 - Save Best Score: 0.2335 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1466(0.2301) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0686(0.0686) Grad: 1.3681  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0989(0.0785) Grad: 3.3523  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0584(0.0726) Grad: 3.0589  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2592(0.2592) 


Epoch 36 - avg_train_loss: 0.0726  avg_val_loss: 0.2401  time: 7s
Epoch 36 - Score: 0.2430


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1564(0.2401) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0964(0.0964) Grad: 1.8735  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0869(0.0948) Grad: 3.8086  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0626(0.0934) Grad: 0.7513  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2713(0.2713) 


Epoch 37 - avg_train_loss: 0.0934  avg_val_loss: 0.2411  time: 7s
Epoch 37 - Score: 0.2445


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1539(0.2411) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1199(0.1199) Grad: 1.8702  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0598(0.0894) Grad: 3.0477  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1110(0.0904) Grad: 1.1961  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2553(0.2553) 


Epoch 38 - avg_train_loss: 0.0904  avg_val_loss: 0.2346  time: 7s
Epoch 38 - Score: 0.2381


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1453(0.2346) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0973(0.0973) Grad: 0.5286  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0648(0.0726) Grad: 2.4120  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0584(0.0710) Grad: 2.2073  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2661(0.2661) 


Epoch 39 - avg_train_loss: 0.0710  avg_val_loss: 0.2297  time: 7s
Epoch 39 - Score: 0.2346


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1296(0.2297) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0572(0.0572) Grad: 1.6217  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0522(0.0615) Grad: 1.0139  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0418(0.0605) Grad: 0.4375  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2629(0.2629) 


Epoch 40 - avg_train_loss: 0.0605  avg_val_loss: 0.2305  time: 7s
Epoch 40 - Score: 0.2351


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1313(0.2305) 


Score: 0.2335


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2605(3.2605) Grad: 3.2791  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6357(1.6211) Grad: 3.0963  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5373(1.2166) Grad: 1.5036  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6788(0.6788) 


Epoch 1 - avg_train_loss: 1.2166  avg_val_loss: 0.6800  time: 7s
Epoch 1 - Score: 0.6805
Epoch 1 - Save Best Score: 0.6805 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7339(0.6800) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6963(0.6963) Grad: 0.3672  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5674(0.6113) Grad: 1.6055  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5275(0.5942) Grad: 0.6386  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6324(0.6324) 


Epoch 2 - avg_train_loss: 0.5942  avg_val_loss: 0.6476  time: 7s
Epoch 2 - Score: 0.6481
Epoch 2 - Save Best Score: 0.6481 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6989(0.6476) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5026(0.5026) Grad: 0.7173  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4866(0.5338) Grad: 0.3444  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3810(0.5458) Grad: 0.4152  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5449(0.5449) 


Epoch 3 - avg_train_loss: 0.5458  avg_val_loss: 0.5744  time: 7s
Epoch 3 - Score: 0.5750
Epoch 3 - Save Best Score: 0.5750 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6079(0.5744) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5097(0.5097) Grad: 1.2750  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5972(0.5065) Grad: 0.7079  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3605(0.5041) Grad: 0.7849  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5371(0.5371) 


Epoch 4 - avg_train_loss: 0.5041  avg_val_loss: 0.5555  time: 7s
Epoch 4 - Score: 0.5557
Epoch 4 - Save Best Score: 0.5557 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5794(0.5555) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4258(0.4258) Grad: 1.1028  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4608(0.4904) Grad: 2.1992  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5556(0.5046) Grad: 3.1668  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4810(0.4810) 


Epoch 5 - avg_train_loss: 0.5046  avg_val_loss: 0.4864  time: 7s
Epoch 5 - Score: 0.4864
Epoch 5 - Save Best Score: 0.4864 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4948(0.4864) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6420(0.6420) Grad: 1.1129  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4631(0.4623) Grad: 1.5755  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3182(0.4355) Grad: 0.8940  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4310(0.4310) 


Epoch 6 - avg_train_loss: 0.4355  avg_val_loss: 0.4052  time: 7s
Epoch 6 - Score: 0.4060
Epoch 6 - Save Best Score: 0.4060 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3567(0.4052) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3764(0.3764) Grad: 1.5798  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4708(0.3807) Grad: 3.1202  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2980(0.3648) Grad: 1.5254  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3920(0.3920) 


Epoch 7 - avg_train_loss: 0.3648  avg_val_loss: 0.3636  time: 7s
Epoch 7 - Score: 0.3644
Epoch 7 - Save Best Score: 0.3644 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3336(0.3636) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2303(0.2303) Grad: 1.0700  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2685(0.3146) Grad: 2.2168  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2421(0.3052) Grad: 1.0207  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3825(0.3825) 


Epoch 8 - avg_train_loss: 0.3052  avg_val_loss: 0.3534  time: 7s
Epoch 8 - Score: 0.3544
Epoch 8 - Save Best Score: 0.3544 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3537(0.3534) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2737(0.2737) Grad: 1.0737  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2508(0.2544) Grad: 0.4646  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2088(0.2652) Grad: 0.5972  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3383(0.3383) 


Epoch 9 - avg_train_loss: 0.2652  avg_val_loss: 0.3165  time: 7s
Epoch 9 - Score: 0.3170
Epoch 9 - Save Best Score: 0.3170 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2929(0.3165) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2067(0.2067) Grad: 2.8677  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2539(0.2643) Grad: 1.0676  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1931(0.2475) Grad: 1.2467  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3416(0.3416) 


Epoch 10 - avg_train_loss: 0.2475  avg_val_loss: 0.3143  time: 7s
Epoch 10 - Score: 0.3152
Epoch 10 - Save Best Score: 0.3152 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2928(0.3143) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1840(0.1840) Grad: 2.2199  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3126(0.2581) Grad: 0.8521  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2252(0.2410) Grad: 0.6294  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3212(0.3212) 


Epoch 11 - avg_train_loss: 0.2410  avg_val_loss: 0.2962  time: 7s
Epoch 11 - Score: 0.2970
Epoch 11 - Save Best Score: 0.2970 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2854(0.2962) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1560(0.1560) Grad: 1.0410  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2311(0.2113) Grad: 1.7815  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1938(0.2328) Grad: 0.8001  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3077(0.3077) 


Epoch 12 - avg_train_loss: 0.2328  avg_val_loss: 0.2936  time: 7s
Epoch 12 - Score: 0.2938
Epoch 12 - Save Best Score: 0.2938 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2802(0.2936) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2584(0.2584) Grad: 1.8313  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2695(0.2460) Grad: 4.6044  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1212(0.2326) Grad: 2.6025  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3162(0.3162) 


Epoch 13 - avg_train_loss: 0.2326  avg_val_loss: 0.2961  time: 7s
Epoch 13 - Score: 0.2967


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2632(0.2961) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1942(0.1942) Grad: 3.1480  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2171(0.2258) Grad: 0.6847  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2642(0.2170) Grad: 0.8763  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2711(0.2711) 


Epoch 14 - avg_train_loss: 0.2170  avg_val_loss: 0.2691  time: 7s
Epoch 14 - Score: 0.2691
Epoch 14 - Save Best Score: 0.2691 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2709(0.2691) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2507(0.2507) Grad: 1.2455  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2448(0.1751) Grad: 2.9023  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2035(0.1659) Grad: 1.6143  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2733(0.2733) 


Epoch 15 - avg_train_loss: 0.1659  avg_val_loss: 0.2496  time: 7s
Epoch 15 - Score: 0.2507
Epoch 15 - Save Best Score: 0.2507 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2108(0.2496) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1096(0.1096) Grad: 2.1077  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2163(0.1673) Grad: 0.7999  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1459(0.1506) Grad: 0.8889  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2645(0.2645) 


Epoch 16 - avg_train_loss: 0.1506  avg_val_loss: 0.2481  time: 7s
Epoch 16 - Score: 0.2486
Epoch 16 - Save Best Score: 0.2486 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2210(0.2481) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1986(0.1986) Grad: 1.9110  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1857(0.1528) Grad: 0.6638  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1508(0.1501) Grad: 0.8298  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2634(0.2634) 


Epoch 17 - avg_train_loss: 0.1501  avg_val_loss: 0.2407  time: 7s
Epoch 17 - Score: 0.2415
Epoch 17 - Save Best Score: 0.2415 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2227(0.2407) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1549(0.1549) Grad: 1.7987  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1221(0.1446) Grad: 1.3560  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1600(0.1454) Grad: 0.6779  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2817(0.2817) 


Epoch 18 - avg_train_loss: 0.1454  avg_val_loss: 0.2559  time: 7s
Epoch 18 - Score: 0.2573


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2716(0.2559) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1896(0.1896) Grad: 1.8422  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1455(0.1535) Grad: 1.7725  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1703(0.1521) Grad: 2.7586  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2805(0.2805) 


Epoch 19 - avg_train_loss: 0.1521  avg_val_loss: 0.2630  time: 7s
Epoch 19 - Score: 0.2635


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2660(0.2630) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1141(0.1141) Grad: 2.3246  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1119(0.1463) Grad: 0.9894  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1424(0.1450) Grad: 0.8021  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2571(0.2571) 


Epoch 20 - avg_train_loss: 0.1450  avg_val_loss: 0.2447  time: 7s
Epoch 20 - Score: 0.2455


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2691(0.2447) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1051(0.1051) Grad: 2.4414  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1969(0.1327) Grad: 1.6234  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1060(0.1275) Grad: 0.9875  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2444(0.2444) 


Epoch 21 - avg_train_loss: 0.1275  avg_val_loss: 0.2332  time: 7s
Epoch 21 - Score: 0.2339
Epoch 21 - Save Best Score: 0.2339 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2559(0.2332) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1060(0.1060) Grad: 1.4746  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0995(0.1168) Grad: 1.1454  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0677(0.1103) Grad: 1.7884  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2348(0.2348) 


Epoch 22 - avg_train_loss: 0.1103  avg_val_loss: 0.2275  time: 7s
Epoch 22 - Score: 0.2279
Epoch 22 - Save Best Score: 0.2279 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2466(0.2275) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1243(0.1243) Grad: 0.7447  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0693(0.1281) Grad: 1.2619  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1118(0.1230) Grad: 1.8993  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2522(0.2522) 


Epoch 23 - avg_train_loss: 0.1230  avg_val_loss: 0.2373  time: 7s
Epoch 23 - Score: 0.2376


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2282(0.2373) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1138(0.1138) Grad: 1.4021  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1070(0.1292) Grad: 0.8133  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0834(0.1255) Grad: 1.8025  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2549(0.2549) 


Epoch 24 - avg_train_loss: 0.1255  avg_val_loss: 0.2449  time: 7s
Epoch 24 - Score: 0.2452


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2581(0.2449) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1135(0.1135) Grad: 3.4527  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1691(0.1337) Grad: 4.2904  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0838(0.1381) Grad: 1.9410  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2537(0.2537) 


Epoch 25 - avg_train_loss: 0.1381  avg_val_loss: 0.2306  time: 7s
Epoch 25 - Score: 0.2318


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1877(0.2306) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1079(0.1079) Grad: 2.6876  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0800(0.1106) Grad: 1.9948  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1241(0.1114) Grad: 2.2726  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2528(0.2528) 


Epoch 26 - avg_train_loss: 0.1114  avg_val_loss: 0.2332  time: 7s
Epoch 26 - Score: 0.2338


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2170(0.2332) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0975(0.0975) Grad: 2.0453  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0905(0.0966) Grad: 0.5037  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0559(0.0931) Grad: 0.9892  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2536(0.2536) 


Epoch 27 - avg_train_loss: 0.0931  avg_val_loss: 0.2337  time: 7s
Epoch 27 - Score: 0.2344


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2318(0.2337) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0750(0.0750) Grad: 0.9317  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0698(0.0793) Grad: 0.9081  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0803(0.0802) Grad: 1.4881  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2401(0.2401) 


Epoch 28 - avg_train_loss: 0.0802  avg_val_loss: 0.2212  time: 7s
Epoch 28 - Score: 0.2218
Epoch 28 - Save Best Score: 0.2218 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2130(0.2212) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0915(0.0915) Grad: 0.9926  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0806(0.0809) Grad: 0.9590  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1244(0.0821) Grad: 1.0569  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2452(0.2452) 


Epoch 29 - avg_train_loss: 0.0821  avg_val_loss: 0.2268  time: 7s
Epoch 29 - Score: 0.2273


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2244(0.2268) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0846(0.0846) Grad: 1.8701  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1154(0.0877) Grad: 3.8849  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0727(0.0900) Grad: 1.9240  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2675(0.2675) 


Epoch 30 - avg_train_loss: 0.0900  avg_val_loss: 0.2407  time: 7s
Epoch 30 - Score: 0.2419


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2380(0.2407) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0850(0.0850) Grad: 1.9583  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0975(0.0937) Grad: 3.5029  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1176(0.1007) Grad: 4.3277  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2683(0.2683) 


Epoch 31 - avg_train_loss: 0.1007  avg_val_loss: 0.2468  time: 7s
Epoch 31 - Score: 0.2481


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2702(0.2468) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1175(0.1175) Grad: 3.8437  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0871(0.1021) Grad: 3.5996  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1108(0.0962) Grad: 2.8526  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2567(0.2567) 


Epoch 32 - avg_train_loss: 0.0962  avg_val_loss: 0.2438  time: 7s
Epoch 32 - Score: 0.2443


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2594(0.2438) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0871(0.0871) Grad: 2.3685  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0787(0.0914) Grad: 1.2625  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0647(0.0865) Grad: 1.4007  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2557(0.2557) 


Epoch 33 - avg_train_loss: 0.0865  avg_val_loss: 0.2390  time: 7s
Epoch 33 - Score: 0.2398


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2550(0.2390) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0749(0.0749) Grad: 1.5633  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0681(0.0782) Grad: 1.5091  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0553(0.0695) Grad: 0.6260  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2474(0.2474) 


Epoch 34 - avg_train_loss: 0.0695  avg_val_loss: 0.2310  time: 7s
Epoch 34 - Score: 0.2315


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2308(0.2310) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0819(0.0819) Grad: 1.3593  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0769(0.0851) Grad: 3.7152  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0710(0.0759) Grad: 3.2423  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2548(0.2548) 


Epoch 35 - avg_train_loss: 0.0759  avg_val_loss: 0.2370  time: 7s
Epoch 35 - Score: 0.2379


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2538(0.2370) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0663(0.0663) Grad: 4.8475  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0893(0.0749) Grad: 1.0192  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1624(0.0845) Grad: 1.8949  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2715(0.2715) 


Epoch 36 - avg_train_loss: 0.0845  avg_val_loss: 0.2415  time: 7s
Epoch 36 - Score: 0.2434


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2531(0.2415) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0863(0.0863) Grad: 3.2552  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0484(0.0795) Grad: 2.6592  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1028(0.0851) Grad: 2.0773  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2410(0.2410) 


Epoch 37 - avg_train_loss: 0.0851  avg_val_loss: 0.2199  time: 7s
Epoch 37 - Score: 0.2210
Epoch 37 - Save Best Score: 0.2210 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2336(0.2199) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0776(0.0776) Grad: 1.1916  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0628(0.0886) Grad: 0.9294  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0704(0.0828) Grad: 1.5902  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2536(0.2536) 


Epoch 38 - avg_train_loss: 0.0828  avg_val_loss: 0.2286  time: 7s
Epoch 38 - Score: 0.2300


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2381(0.2286) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0771(0.0771) Grad: 2.7609  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0624(0.0756) Grad: 2.2743  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0626(0.0709) Grad: 0.7714  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2463(0.2463) 


Epoch 39 - avg_train_loss: 0.0709  avg_val_loss: 0.2226  time: 7s
Epoch 39 - Score: 0.2235


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2158(0.2226) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0612(0.0612) Grad: 1.2241  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0522(0.0615) Grad: 0.6640  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0466(0.0559) Grad: 0.5126  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2421(0.2421) 


Epoch 40 - avg_train_loss: 0.0559  avg_val_loss: 0.2239  time: 7s
Epoch 40 - Score: 0.2246


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2263(0.2239) 


Score: 0.2210


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2501(3.2501) Grad: 3.3297  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.8777(1.6655) Grad: 3.4276  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6621(1.2225) Grad: 0.5115  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6301(0.6301) 


Epoch 1 - avg_train_loss: 1.2225  avg_val_loss: 0.5905  time: 7s
Epoch 1 - Score: 0.5920
Epoch 1 - Save Best Score: 0.5920 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6152(0.5905) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5891(0.5891) Grad: 2.1246  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6065(0.5830) Grad: 1.9482  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5493(0.5758) Grad: 0.5475  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5341(0.5341) 


Epoch 2 - avg_train_loss: 0.5758  avg_val_loss: 0.4870  time: 7s
Epoch 2 - Score: 0.4886
Epoch 2 - Save Best Score: 0.4886 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4416(0.4870) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3485(0.3485) Grad: 1.6101  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4374(0.4704) Grad: 1.9641  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4659(0.4782) Grad: 4.9504  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5707(0.5707) 


Epoch 3 - avg_train_loss: 0.4782  avg_val_loss: 0.5082  time: 7s
Epoch 3 - Score: 0.5114


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5095(0.5082) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5196(0.5196) Grad: 4.9719  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4762(0.4730) Grad: 2.0284  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3893(0.4521) Grad: 1.1203  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4814(0.4814) 


Epoch 4 - avg_train_loss: 0.4521  avg_val_loss: 0.4251  time: 7s
Epoch 4 - Score: 0.4280
Epoch 4 - Save Best Score: 0.4280 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4138(0.4251) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3714(0.3714) Grad: 1.2690  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6301(0.5842) Grad: 1.5630  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3543(0.5444) Grad: 1.5044  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4946(0.4946) 


Epoch 5 - avg_train_loss: 0.5444  avg_val_loss: 0.4432  time: 7s
Epoch 5 - Score: 0.4454


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3968(0.4432) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4453(0.4453) Grad: 1.6917  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3614(0.4142) Grad: 1.0634  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3494(0.3956) Grad: 1.1424  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4518(0.4518) 


Epoch 6 - avg_train_loss: 0.3956  avg_val_loss: 0.4092  time: 7s
Epoch 6 - Score: 0.4122
Epoch 6 - Save Best Score: 0.4122 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3081(0.4092) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4277(0.4277) Grad: 1.0498  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4424(0.3740) Grad: 2.5381  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4214(0.3569) Grad: 3.4728  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4244(0.4244) 


Epoch 7 - avg_train_loss: 0.3569  avg_val_loss: 0.3634  time: 7s
Epoch 7 - Score: 0.3687
Epoch 7 - Save Best Score: 0.3687 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2476(0.3634) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3410(0.3410) Grad: 2.3506  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2685(0.3144) Grad: 2.9338  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2485(0.3104) Grad: 2.5348  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3950(0.3950) 


Epoch 8 - avg_train_loss: 0.3104  avg_val_loss: 0.3568  time: 7s
Epoch 8 - Score: 0.3612
Epoch 8 - Save Best Score: 0.3612 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2349(0.3568) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2610(0.2610) Grad: 2.1547  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2770(0.2690) Grad: 1.1128  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3099(0.2657) Grad: 2.9397  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3581(0.3581) 


Epoch 9 - avg_train_loss: 0.2657  avg_val_loss: 0.3033  time: 7s
Epoch 9 - Score: 0.3085
Epoch 9 - Save Best Score: 0.3085 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1992(0.3033) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3293(0.3293) Grad: 2.7001  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2222(0.2589) Grad: 0.6477  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1829(0.2470) Grad: 0.8888  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3673(0.3673) 


Epoch 10 - avg_train_loss: 0.2470  avg_val_loss: 0.3124  time: 7s
Epoch 10 - Score: 0.3183


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1921(0.3124) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3071(0.3071) Grad: 0.5925  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2776(0.2369) Grad: 1.3581  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2078(0.2360) Grad: 2.5744  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3311(0.3311) 


Epoch 11 - avg_train_loss: 0.2360  avg_val_loss: 0.2954  time: 7s
Epoch 11 - Score: 0.3017
Epoch 11 - Save Best Score: 0.3017 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1599(0.2954) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1623(0.1623) Grad: 1.0275  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2265(0.2202) Grad: 0.8177  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2689(0.2320) Grad: 1.1467  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3151(0.3151) 


Epoch 12 - avg_train_loss: 0.2320  avg_val_loss: 0.2979  time: 7s
Epoch 12 - Score: 0.2994
Epoch 12 - Save Best Score: 0.2994 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2320(0.2979) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2690(0.2690) Grad: 0.7114  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2256(0.2234) Grad: 1.5105  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1658(0.2302) Grad: 1.7127  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3398(0.3398) 


Epoch 13 - avg_train_loss: 0.2302  avg_val_loss: 0.3197  time: 7s
Epoch 13 - Score: 0.3202


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3165(0.3197) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1644(0.1644) Grad: 0.7249  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2163(0.2524) Grad: 1.6266  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1664(0.2231) Grad: 3.5570  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2631(0.2631) 


Epoch 14 - avg_train_loss: 0.2231  avg_val_loss: 0.2554  time: 7s
Epoch 14 - Score: 0.2582
Epoch 14 - Save Best Score: 0.2582 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1736(0.2554) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2221(0.2221) Grad: 1.1285  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2379(0.1775) Grad: 0.9677  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1093(0.1546) Grad: 1.3518  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2632(0.2632) 


Epoch 15 - avg_train_loss: 0.1546  avg_val_loss: 0.2548  time: 7s
Epoch 15 - Score: 0.2586


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1596(0.2548) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1585(0.1585) Grad: 0.6750  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1476(0.1398) Grad: 1.5060  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1314(0.1442) Grad: 0.7414  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2679(0.2679) 


Epoch 16 - avg_train_loss: 0.1442  avg_val_loss: 0.2491  time: 7s
Epoch 16 - Score: 0.2521
Epoch 16 - Save Best Score: 0.2521 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1621(0.2491) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1251(0.1251) Grad: 0.9598  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1121(0.1276) Grad: 1.3460  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1117(0.1386) Grad: 1.4497  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2440(0.2440) 


Epoch 17 - avg_train_loss: 0.1386  avg_val_loss: 0.2388  time: 7s
Epoch 17 - Score: 0.2406
Epoch 17 - Save Best Score: 0.2406 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1758(0.2388) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1081(0.1081) Grad: 2.1726  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1234(0.1285) Grad: 0.9015  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2190(0.1438) Grad: 2.8283  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2461(0.2461) 


Epoch 18 - avg_train_loss: 0.1438  avg_val_loss: 0.2453  time: 7s
Epoch 18 - Score: 0.2478


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1734(0.2453) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1155(0.1155) Grad: 2.7252  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2337(0.1678) Grad: 4.3308  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1300(0.1698) Grad: 3.2888  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2945(0.2945) 


Epoch 19 - avg_train_loss: 0.1698  avg_val_loss: 0.2870  time: 7s
Epoch 19 - Score: 0.2928


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1637(0.2870) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1719(0.1719) Grad: 4.1477  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1397(0.1640) Grad: 3.3084  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1153(0.1511) Grad: 3.5910  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2607(0.2607) 


Epoch 20 - avg_train_loss: 0.1511  avg_val_loss: 0.2415  time: 7s
Epoch 20 - Score: 0.2436


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1713(0.2415) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1149(0.1149) Grad: 1.8971  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1051(0.1179) Grad: 1.9664  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1084(0.1193) Grad: 1.1797  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2401(0.2401) 


Epoch 21 - avg_train_loss: 0.1193  avg_val_loss: 0.2380  time: 7s
Epoch 21 - Score: 0.2417


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1507(0.2380) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0601(0.0601) Grad: 0.5077  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1396(0.1015) Grad: 0.8737  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0614(0.1059) Grad: 1.0802  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2475(0.2475) 


Epoch 22 - avg_train_loss: 0.1059  avg_val_loss: 0.2400  time: 7s
Epoch 22 - Score: 0.2453


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1314(0.2400) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0908(0.0908) Grad: 0.8750  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0769(0.0981) Grad: 0.8357  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0883(0.1033) Grad: 2.5754  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2247(0.2247) 


Epoch 23 - avg_train_loss: 0.1033  avg_val_loss: 0.2279  time: 7s
Epoch 23 - Score: 0.2314
Epoch 23 - Save Best Score: 0.2314 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1480(0.2279) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1481(0.1481) Grad: 2.7652  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0657(0.1001) Grad: 1.0033  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0777(0.0981) Grad: 1.0722  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2284(0.2284) 


Epoch 24 - avg_train_loss: 0.0981  avg_val_loss: 0.2354  time: 7s
Epoch 24 - Score: 0.2392


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1539(0.2354) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1404(0.1404) Grad: 1.9202  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1018(0.0957) Grad: 0.7575  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0875(0.0911) Grad: 1.1685  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2342(0.2342) 


Epoch 25 - avg_train_loss: 0.0911  avg_val_loss: 0.2413  time: 7s
Epoch 25 - Score: 0.2479


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1301(0.2413) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0953(0.0953) Grad: 0.6948  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0675(0.0886) Grad: 0.7126  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1115(0.1041) Grad: 1.0254  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2446(0.2446) 


Epoch 26 - avg_train_loss: 0.1041  avg_val_loss: 0.2423  time: 7s
Epoch 26 - Score: 0.2446


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1717(0.2423) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1069(0.1069) Grad: 1.2169  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0801(0.1051) Grad: 0.5670  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0642(0.0942) Grad: 1.7982  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2246(0.2246) 


Epoch 27 - avg_train_loss: 0.0942  avg_val_loss: 0.2386  time: 7s
Epoch 27 - Score: 0.2410


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1834(0.2386) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0760(0.0760) Grad: 0.9382  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0775(0.0703) Grad: 0.7075  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0580(0.0763) Grad: 0.5234  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2288(0.2288) 


Epoch 28 - avg_train_loss: 0.0763  avg_val_loss: 0.2384  time: 7s
Epoch 28 - Score: 0.2423


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1581(0.2384) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0343(0.0343) Grad: 0.6371  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0533(0.0714) Grad: 1.4447  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0722(0.0742) Grad: 0.8793  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2335(0.2335) 


Epoch 29 - avg_train_loss: 0.0742  avg_val_loss: 0.2396  time: 7s
Epoch 29 - Score: 0.2441


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1485(0.2396) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0624(0.0624) Grad: 0.6837  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1026(0.0846) Grad: 1.2118  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0830(0.0836) Grad: 1.3377  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2370(0.2370) 


Epoch 30 - avg_train_loss: 0.0836  avg_val_loss: 0.2541  time: 7s
Epoch 30 - Score: 0.2561


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2083(0.2541) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0795(0.0795) Grad: 4.2229  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1148(0.0917) Grad: 4.1385  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1142(0.1034) Grad: 2.8192  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2349(0.2349) 


Epoch 31 - avg_train_loss: 0.1034  avg_val_loss: 0.2297  time: 7s
Epoch 31 - Score: 0.2317


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1640(0.2297) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1094(0.1094) Grad: 2.2007  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0900(0.1013) Grad: 2.0141  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1066(0.0991) Grad: 3.7700  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2530(0.2530) 


Epoch 32 - avg_train_loss: 0.0991  avg_val_loss: 0.2499  time: 7s
Epoch 32 - Score: 0.2532


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1632(0.2499) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1035(0.1035) Grad: 4.0217  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1293(0.0876) Grad: 3.3861  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0822(0.0871) Grad: 1.2615  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2295(0.2295) 


Epoch 33 - avg_train_loss: 0.0871  avg_val_loss: 0.2420  time: 7s
Epoch 33 - Score: 0.2464


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1574(0.2420) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0644(0.0644) Grad: 1.4767  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0651(0.0608) Grad: 1.5145  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1153(0.0652) Grad: 0.5300  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2249(0.2249) 


Epoch 34 - avg_train_loss: 0.0652  avg_val_loss: 0.2341  time: 7s
Epoch 34 - Score: 0.2374


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1620(0.2341) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0607(0.0607) Grad: 0.7248  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0671(0.0671) Grad: 0.6339  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0596(0.0640) Grad: 0.7640  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2263(0.2263) 


Epoch 35 - avg_train_loss: 0.0640  avg_val_loss: 0.2337  time: 7s
Epoch 35 - Score: 0.2374


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1552(0.2337) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0627(0.0627) Grad: 0.6466  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0664(0.0630) Grad: 1.1432  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0847(0.0761) Grad: 3.2333  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2365(0.2365) 


Epoch 36 - avg_train_loss: 0.0761  avg_val_loss: 0.2423  time: 7s
Epoch 36 - Score: 0.2462


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1572(0.2423) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0665(0.0665) Grad: 3.4398  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1024(0.0918) Grad: 3.2178  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1165(0.0948) Grad: 1.9497  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2417(0.2417) 


Epoch 37 - avg_train_loss: 0.0948  avg_val_loss: 0.2458  time: 7s
Epoch 37 - Score: 0.2485


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1743(0.2458) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1004(0.1004) Grad: 4.3464  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0952(0.0961) Grad: 2.0865  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0816(0.0871) Grad: 2.2153  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2344(0.2344) 


Epoch 38 - avg_train_loss: 0.0871  avg_val_loss: 0.2353  time: 7s
Epoch 38 - Score: 0.2374


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1710(0.2353) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0705(0.0705) Grad: 0.6311  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0659(0.0622) Grad: 1.2167  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0550(0.0654) Grad: 1.1546  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2476(0.2476) 


Epoch 39 - avg_train_loss: 0.0654  avg_val_loss: 0.2410  time: 7s
Epoch 39 - Score: 0.2443


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1547(0.2410) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1081(0.1081) Grad: 0.4822  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0581(0.0590) Grad: 1.5225  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0574(0.0553) Grad: 0.5873  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2413(0.2413) 


Epoch 40 - avg_train_loss: 0.0553  avg_val_loss: 0.2411  time: 7s
Epoch 40 - Score: 0.2454


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1465(0.2411) 


Score: 0.2314
Score: 0.2081


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2037(3.2037) Grad: 3.1835  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7692(1.6313) Grad: 3.6328  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6223(1.2255) Grad: 1.1107  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6769(0.6769) 


Epoch 1 - avg_train_loss: 1.2255  avg_val_loss: 0.6152  time: 7s
Epoch 1 - Score: 0.6175
Epoch 1 - Save Best Score: 0.6175 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6024(0.6152) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6479(0.6479) Grad: 0.9300  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6601(0.6236) Grad: 0.4729  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5152(0.5996) Grad: 0.5232  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5878(0.5878) 


Epoch 2 - avg_train_loss: 0.5996  avg_val_loss: 0.5135  time: 7s
Epoch 2 - Score: 0.5179
Epoch 2 - Save Best Score: 0.5179 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4086(0.5135) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4519(0.4519) Grad: 3.5967  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4481(0.5139) Grad: 1.9921  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6121(0.5068) Grad: 1.9521  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5028(0.5028) 


Epoch 3 - avg_train_loss: 0.5068  avg_val_loss: 0.4189  time: 7s
Epoch 3 - Score: 0.4260
Epoch 3 - Save Best Score: 0.4260 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2941(0.4189) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5108(0.5108) Grad: 2.1913  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4462(0.4597) Grad: 1.7215  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4619(0.4451) Grad: 3.4639  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4909(0.4909) 


Epoch 4 - avg_train_loss: 0.4451  avg_val_loss: 0.4101  time: 7s
Epoch 4 - Score: 0.4167
Epoch 4 - Save Best Score: 0.4167 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2950(0.4101) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3894(0.3894) Grad: 1.3175  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5010(0.4305) Grad: 1.2580  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3634(0.4298) Grad: 2.5549  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4299(0.4299) 


Epoch 5 - avg_train_loss: 0.4298  avg_val_loss: 0.3601  time: 7s
Epoch 5 - Score: 0.3661
Epoch 5 - Save Best Score: 0.3661 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2481(0.3601) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3611(0.3611) Grad: 2.2566  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3457(0.4495) Grad: 0.7584  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3515(0.4393) Grad: 0.6924  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3846(0.3846) 


Epoch 6 - avg_train_loss: 0.4393  avg_val_loss: 0.3401  time: 7s
Epoch 6 - Score: 0.3425
Epoch 6 - Save Best Score: 0.3425 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2741(0.3401) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3139(0.3139) Grad: 1.1757  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2886(0.3449) Grad: 1.0564  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4087(0.3562) Grad: 2.1138  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3963(0.3963) 


Epoch 7 - avg_train_loss: 0.3562  avg_val_loss: 0.3872  time: 7s
Epoch 7 - Score: 0.3873


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3870(0.3872) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4619(0.4619) Grad: 2.6692  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2307(0.3315) Grad: 2.0645  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2950(0.3339) Grad: 1.7491  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3514(0.3514) 


Epoch 8 - avg_train_loss: 0.3339  avg_val_loss: 0.3036  time: 7s
Epoch 8 - Score: 0.3069
Epoch 8 - Save Best Score: 0.3069 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2269(0.3036) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3060(0.3060) Grad: 1.0929  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2230(0.2496) Grad: 2.1155  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2772(0.2713) Grad: 0.9798  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3195(0.3195) 


Epoch 9 - avg_train_loss: 0.2713  avg_val_loss: 0.2880  time: 7s
Epoch 9 - Score: 0.2893
Epoch 9 - Save Best Score: 0.2893 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2517(0.2880) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2432(0.2432) Grad: 1.0198  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1912(0.2469) Grad: 0.7534  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2270(0.2571) Grad: 1.5137  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3161(0.3161) 


Epoch 10 - avg_train_loss: 0.2571  avg_val_loss: 0.2843  time: 7s
Epoch 10 - Score: 0.2857
Epoch 10 - Save Best Score: 0.2857 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2441(0.2843) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2545(0.2545) Grad: 0.5217  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2474(0.2543) Grad: 0.5683  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1555(0.2491) Grad: 0.8833  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3105(0.3105) 


Epoch 11 - avg_train_loss: 0.2491  avg_val_loss: 0.2781  time: 7s
Epoch 11 - Score: 0.2797
Epoch 11 - Save Best Score: 0.2797 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2285(0.2781) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2968(0.2968) Grad: 1.8454  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3158(0.2541) Grad: 0.8718  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2966(0.2570) Grad: 2.8092  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3478(0.3478) 


Epoch 12 - avg_train_loss: 0.2570  avg_val_loss: 0.2983  time: 7s
Epoch 12 - Score: 0.3015


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2375(0.2983) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2738(0.2738) Grad: 3.0625  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2536(0.2555) Grad: 0.9325  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2418(0.2442) Grad: 0.8089  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3114(0.3114) 


Epoch 13 - avg_train_loss: 0.2442  avg_val_loss: 0.2743  time: 7s
Epoch 13 - Score: 0.2769
Epoch 13 - Save Best Score: 0.2769 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2058(0.2743) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1776(0.1776) Grad: 1.0378  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2062(0.2190) Grad: 3.4519  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2180(0.2148) Grad: 0.8576  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2695(0.2695) 


Epoch 14 - avg_train_loss: 0.2148  avg_val_loss: 0.2448  time: 7s
Epoch 14 - Score: 0.2457
Epoch 14 - Save Best Score: 0.2457 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2282(0.2448) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1869(0.1869) Grad: 0.4229  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2230(0.1667) Grad: 1.7010  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2183(0.1789) Grad: 2.5606  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2317(0.2317) 


Epoch 15 - avg_train_loss: 0.1789  avg_val_loss: 0.2189  time: 7s
Epoch 15 - Score: 0.2196
Epoch 15 - Save Best Score: 0.2196 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1808(0.2189) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1204(0.1204) Grad: 0.5383  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2050(0.1551) Grad: 2.1436  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1304(0.1532) Grad: 0.7639  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2426(0.2426) 


Epoch 16 - avg_train_loss: 0.1532  avg_val_loss: 0.2229  time: 7s
Epoch 16 - Score: 0.2242


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1754(0.2229) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1625(0.1625) Grad: 1.0016  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2335(0.1724) Grad: 1.1833  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1788(0.1559) Grad: 1.9514  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2290(0.2290) 


Epoch 17 - avg_train_loss: 0.1559  avg_val_loss: 0.2145  time: 7s
Epoch 17 - Score: 0.2153
Epoch 17 - Save Best Score: 0.2153 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1744(0.2145) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1963(0.1963) Grad: 1.5900  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1695(0.1649) Grad: 1.3875  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1790(0.1644) Grad: 1.1749  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2306(0.2306) 


Epoch 18 - avg_train_loss: 0.1644  avg_val_loss: 0.2441  time: 7s
Epoch 18 - Score: 0.2444


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2594(0.2441) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1685(0.1685) Grad: 2.8051  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2162(0.1608) Grad: 3.0391  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1785(0.1739) Grad: 1.7342  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2494(0.2494) 


Epoch 19 - avg_train_loss: 0.1739  avg_val_loss: 0.2554  time: 7s
Epoch 19 - Score: 0.2566


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2089(0.2554) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1856(0.1856) Grad: 3.7446  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2695(0.1927) Grad: 3.6146  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2347(0.1853) Grad: 0.9604  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2484(0.2484) 


Epoch 20 - avg_train_loss: 0.1853  avg_val_loss: 0.2306  time: 7s
Epoch 20 - Score: 0.2312


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2056(0.2306) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1337(0.1337) Grad: 1.2066  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1410(0.1331) Grad: 1.4226  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1803(0.1292) Grad: 0.8979  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2086(0.2086) 


Epoch 21 - avg_train_loss: 0.1292  avg_val_loss: 0.1990  time: 7s
Epoch 21 - Score: 0.1997
Epoch 21 - Save Best Score: 0.1997 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1632(0.1990) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0861(0.0861) Grad: 0.9508  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1149(0.1204) Grad: 0.6224  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1466(0.1174) Grad: 1.1379  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2076(0.2076) 


Epoch 22 - avg_train_loss: 0.1174  avg_val_loss: 0.2093  time: 7s
Epoch 22 - Score: 0.2093


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2176(0.2093) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1021(0.1021) Grad: 0.8998  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1060(0.1215) Grad: 2.1666  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1227(0.1134) Grad: 1.3742  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1940(0.1940) 


Epoch 23 - avg_train_loss: 0.1134  avg_val_loss: 0.1915  time: 7s
Epoch 23 - Score: 0.1915
Epoch 23 - Save Best Score: 0.1915 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1925(0.1915) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0811(0.0811) Grad: 0.7169  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0917(0.1126) Grad: 0.5984  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1240(0.1139) Grad: 2.2585  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2035(0.2035) 


Epoch 24 - avg_train_loss: 0.1139  avg_val_loss: 0.2043  time: 7s
Epoch 24 - Score: 0.2052


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1654(0.2043) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1255(0.1255) Grad: 0.6689  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1401(0.1199) Grad: 1.7886  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2347(0.1320) Grad: 3.9229  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2364(0.2364) 


Epoch 25 - avg_train_loss: 0.1320  avg_val_loss: 0.2312  time: 7s
Epoch 25 - Score: 0.2320


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1881(0.2312) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1243(0.1243) Grad: 2.5999  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1489(0.1245) Grad: 1.7262  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1662(0.1196) Grad: 1.0638  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2388(0.2388) 


Epoch 26 - avg_train_loss: 0.1196  avg_val_loss: 0.2171  time: 7s
Epoch 26 - Score: 0.2191


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1548(0.2171) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0969(0.0969) Grad: 0.7452  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0802(0.0979) Grad: 0.9819  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1056(0.0943) Grad: 1.8659  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2129(0.2129) 


Epoch 27 - avg_train_loss: 0.0943  avg_val_loss: 0.1907  time: 7s
Epoch 27 - Score: 0.1919


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1544(0.1907) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0883(0.0883) Grad: 3.2591  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0933(0.0906) Grad: 2.9143  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0590(0.0876) Grad: 0.8345  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1985(0.1985) 


Epoch 28 - avg_train_loss: 0.0876  avg_val_loss: 0.1938  time: 7s
Epoch 28 - Score: 0.1939


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1788(0.1938) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0622(0.0622) Grad: 2.7599  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0843(0.0805) Grad: 3.5293  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1014(0.0896) Grad: 2.1956  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1995(0.1995) 


Epoch 29 - avg_train_loss: 0.0896  avg_val_loss: 0.1781  time: 7s
Epoch 29 - Score: 0.1805
Epoch 29 - Save Best Score: 0.1805 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1150(0.1781) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0536(0.0536) Grad: 0.9217  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0986(0.0923) Grad: 0.4414  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1136(0.1001) Grad: 3.9494  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2107(0.2107) 


Epoch 30 - avg_train_loss: 0.1001  avg_val_loss: 0.2069  time: 7s
Epoch 30 - Score: 0.2075


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1736(0.2069) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1316(0.1316) Grad: 3.4646  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1197(0.1131) Grad: 3.1777  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1286(0.1212) Grad: 2.8033  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2339(0.2339) 


Epoch 31 - avg_train_loss: 0.1212  avg_val_loss: 0.2137  time: 7s
Epoch 31 - Score: 0.2149


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1691(0.2137) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1067(0.1067) Grad: 1.5532  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0993(0.1013) Grad: 0.6485  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1136(0.1026) Grad: 2.2208  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2563(0.2563) 


Epoch 32 - avg_train_loss: 0.1026  avg_val_loss: 0.2120  time: 7s
Epoch 32 - Score: 0.2157


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1520(0.2120) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0879(0.0879) Grad: 1.1723  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0812(0.0746) Grad: 1.4441  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0735(0.0798) Grad: 1.0574  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2235(0.2235) 


Epoch 33 - avg_train_loss: 0.0798  avg_val_loss: 0.2070  time: 7s
Epoch 33 - Score: 0.2076


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1807(0.2070) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0722(0.0722) Grad: 0.7333  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1055(0.0713) Grad: 1.2413  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0872(0.0720) Grad: 0.5622  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2075(0.2075) 


Epoch 34 - avg_train_loss: 0.0720  avg_val_loss: 0.1996  time: 7s
Epoch 34 - Score: 0.2006


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1540(0.1996) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1030(0.1030) Grad: 1.4421  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0662(0.0751) Grad: 1.3957  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0682(0.0737) Grad: 1.8361  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2080(0.2080) 


Epoch 35 - avg_train_loss: 0.0737  avg_val_loss: 0.1905  time: 7s
Epoch 35 - Score: 0.1915


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1517(0.1905) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0525(0.0525) Grad: 0.8598  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0654(0.0686) Grad: 0.8485  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0721(0.0779) Grad: 1.9324  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2235(0.2235) 


Epoch 36 - avg_train_loss: 0.0779  avg_val_loss: 0.1906  time: 7s
Epoch 36 - Score: 0.1926


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1563(0.1906) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0861(0.0861) Grad: 2.7702  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0675(0.0836) Grad: 2.1406  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0963(0.0873) Grad: 1.7581  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2282(0.2282) 


Epoch 37 - avg_train_loss: 0.0873  avg_val_loss: 0.2096  time: 7s
Epoch 37 - Score: 0.2104


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1753(0.2096) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0721(0.0721) Grad: 2.3057  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0982(0.0846) Grad: 0.6191  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0731(0.0852) Grad: 2.8701  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2278(0.2278) 


Epoch 38 - avg_train_loss: 0.0852  avg_val_loss: 0.1951  time: 7s
Epoch 38 - Score: 0.1971


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1563(0.1951) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0753(0.0753) Grad: 2.7344  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0945(0.0755) Grad: 1.5016  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0460(0.0700) Grad: 1.7290  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2353(0.2353) 


Epoch 39 - avg_train_loss: 0.0700  avg_val_loss: 0.1874  time: 7s
Epoch 39 - Score: 0.1923


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1236(0.1874) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0404(0.0404) Grad: 0.6424  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0535(0.0541) Grad: 0.8009  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0958(0.0585) Grad: 1.4905  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2282(0.2282) 


Epoch 40 - avg_train_loss: 0.0585  avg_val_loss: 0.1915  time: 7s
Epoch 40 - Score: 0.1951


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1220(0.1915) 


Score: 0.1805


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.1983(3.1983) Grad: 3.2619  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7499(1.5560) Grad: 3.1747  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7263(1.1633) Grad: 1.5949  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6664(0.6664) 


Epoch 1 - avg_train_loss: 1.1633  avg_val_loss: 0.6590  time: 7s
Epoch 1 - Score: 0.6593
Epoch 1 - Save Best Score: 0.6593 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6132(0.6590) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5438(0.5438) Grad: 1.9365  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5848(0.6023) Grad: 1.8911  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5724(0.5912) Grad: 0.8401  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5985(0.5985) 


Epoch 2 - avg_train_loss: 0.5912  avg_val_loss: 0.5759  time: 7s
Epoch 2 - Score: 0.5762
Epoch 2 - Save Best Score: 0.5762 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5533(0.5759) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5004(0.5004) Grad: 2.7174  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4096(0.5232) Grad: 1.5587  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5046(0.4893) Grad: 2.2701  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4790(0.4790) 


Epoch 3 - avg_train_loss: 0.4893  avg_val_loss: 0.4631  time: 7s
Epoch 3 - Score: 0.4633
Epoch 3 - Save Best Score: 0.4633 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4365(0.4631) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4918(0.4918) Grad: 1.7224  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4614(0.4263) Grad: 3.2626  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3856(0.4346) Grad: 2.0963  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4784(0.4784) 


Epoch 4 - avg_train_loss: 0.4346  avg_val_loss: 0.4585  time: 7s
Epoch 4 - Score: 0.4588
Epoch 4 - Save Best Score: 0.4588 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4332(0.4585) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3872(0.3872) Grad: 1.2474  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5339(0.4220) Grad: 3.9631  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4334(0.4264) Grad: 4.1152  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4194(0.4194) 


Epoch 5 - avg_train_loss: 0.4264  avg_val_loss: 0.3859  time: 7s
Epoch 5 - Score: 0.3873
Epoch 5 - Save Best Score: 0.3873 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3317(0.3859) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4912(0.4912) Grad: 2.6551  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5194(0.5260) Grad: 1.9648  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3723(0.4683) Grad: 2.0915  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4143(0.4143) 


Epoch 6 - avg_train_loss: 0.4683  avg_val_loss: 0.3886  time: 7s
Epoch 6 - Score: 0.3895


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3389(0.3886) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3732(0.3732) Grad: 0.8637  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4124(0.3617) Grad: 0.9847  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3251(0.3522) Grad: 0.9133  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4341(0.4341) 


Epoch 7 - avg_train_loss: 0.3522  avg_val_loss: 0.4076  time: 7s
Epoch 7 - Score: 0.4089


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3406(0.4076) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3320(0.3320) Grad: 2.4034  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2796(0.3078) Grad: 1.3063  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3166(0.3159) Grad: 0.8237  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3824(0.3824) 


Epoch 8 - avg_train_loss: 0.3159  avg_val_loss: 0.3368  time: 7s
Epoch 8 - Score: 0.3391
Epoch 8 - Save Best Score: 0.3391 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2894(0.3368) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2630(0.2630) Grad: 2.3057  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3514(0.2709) Grad: 0.6226  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2727(0.2735) Grad: 2.3883  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3183(0.3183) 


Epoch 9 - avg_train_loss: 0.2735  avg_val_loss: 0.2958  time: 7s
Epoch 9 - Score: 0.2965
Epoch 9 - Save Best Score: 0.2965 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2701(0.2958) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3491(0.3491) Grad: 2.9623  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2548(0.2621) Grad: 0.2766  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2681(0.2661) Grad: 2.1414  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3233(0.3233) 


Epoch 10 - avg_train_loss: 0.2661  avg_val_loss: 0.2937  time: 7s
Epoch 10 - Score: 0.2947
Epoch 10 - Save Best Score: 0.2947 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2696(0.2937) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2814(0.2814) Grad: 3.1709  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2991(0.2728) Grad: 2.9011  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2419(0.2634) Grad: 1.2047  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3160(0.3160) 


Epoch 11 - avg_train_loss: 0.2634  avg_val_loss: 0.2929  time: 7s
Epoch 11 - Score: 0.2935
Epoch 11 - Save Best Score: 0.2935 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2674(0.2929) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2179(0.2179) Grad: 0.6157  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1961(0.2503) Grad: 1.2925  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1676(0.2353) Grad: 1.7147  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2901(0.2901) 


Epoch 12 - avg_train_loss: 0.2353  avg_val_loss: 0.2588  time: 7s
Epoch 12 - Score: 0.2601
Epoch 12 - Save Best Score: 0.2601 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2301(0.2588) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2327(0.2327) Grad: 0.5602  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2788(0.2629) Grad: 4.3550  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2345(0.2562) Grad: 3.1038  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2919(0.2919) 


Epoch 13 - avg_train_loss: 0.2562  avg_val_loss: 0.2673  time: 7s
Epoch 13 - Score: 0.2681


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2558(0.2673) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2027(0.2027) Grad: 1.6502  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2443(0.1902) Grad: 2.0532  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2210(0.1941) Grad: 0.6717  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3364(0.3364) 


Epoch 14 - avg_train_loss: 0.1941  avg_val_loss: 0.2823  time: 7s
Epoch 14 - Score: 0.2861


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2232(0.2823) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1890(0.1890) Grad: 1.6950  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1285(0.1804) Grad: 2.0081  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1720(0.1783) Grad: 1.1391  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3334(0.3334) 


Epoch 15 - avg_train_loss: 0.1783  avg_val_loss: 0.2573  time: 7s
Epoch 15 - Score: 0.2653


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1873(0.2573) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1385(0.1385) Grad: 2.3947  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1223(0.1556) Grad: 1.3974  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2277(0.1519) Grad: 1.8862  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3124(0.3124) 


Epoch 16 - avg_train_loss: 0.1519  avg_val_loss: 0.2503  time: 7s
Epoch 16 - Score: 0.2557
Epoch 16 - Save Best Score: 0.2557 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2065(0.2503) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1421(0.1421) Grad: 0.9825  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1437(0.1432) Grad: 0.8199  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0896(0.1436) Grad: 1.1183  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2804(0.2804) 


Epoch 17 - avg_train_loss: 0.1436  avg_val_loss: 0.2266  time: 7s
Epoch 17 - Score: 0.2311
Epoch 17 - Save Best Score: 0.2311 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1804(0.2266) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0953(0.0953) Grad: 1.4181  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1391(0.1323) Grad: 2.4775  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1475(0.1379) Grad: 1.3459  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2976(0.2976) 


Epoch 18 - avg_train_loss: 0.1379  avg_val_loss: 0.2395  time: 7s
Epoch 18 - Score: 0.2445


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1910(0.2395) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1334(0.1334) Grad: 0.9636  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1261(0.1342) Grad: 1.2815  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1528(0.1434) Grad: 1.3560  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3059(0.3059) 


Epoch 19 - avg_train_loss: 0.1434  avg_val_loss: 0.2429  time: 7s
Epoch 19 - Score: 0.2486


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1941(0.2429) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1832(0.1832) Grad: 0.9234  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1207(0.1379) Grad: 1.8088  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1425(0.1405) Grad: 1.2934  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3495(0.3495) 


Epoch 20 - avg_train_loss: 0.1405  avg_val_loss: 0.2687  time: 7s
Epoch 20 - Score: 0.2777


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1785(0.2687) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1283(0.1283) Grad: 2.8575  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1245(0.1190) Grad: 1.3420  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0974(0.1152) Grad: 1.6581  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3292(0.3292) 


Epoch 21 - avg_train_loss: 0.1152  avg_val_loss: 0.2408  time: 7s
Epoch 21 - Score: 0.2521


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1737(0.2408) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0634(0.0634) Grad: 1.0584  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0762(0.0941) Grad: 0.9490  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1190(0.0927) Grad: 2.1975  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3163(0.3163) 


Epoch 22 - avg_train_loss: 0.0927  avg_val_loss: 0.2408  time: 7s
Epoch 22 - Score: 0.2494


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1613(0.2408) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1276(0.1276) Grad: 1.2853  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0854(0.0943) Grad: 1.3902  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1406(0.0938) Grad: 0.9368  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3047(0.3047) 


Epoch 23 - avg_train_loss: 0.0938  avg_val_loss: 0.2340  time: 7s
Epoch 23 - Score: 0.2416


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1694(0.2340) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0848(0.0848) Grad: 0.7812  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1109(0.1095) Grad: 0.9254  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0882(0.1068) Grad: 2.0040  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3083(0.3083) 


Epoch 24 - avg_train_loss: 0.1068  avg_val_loss: 0.2539  time: 7s
Epoch 24 - Score: 0.2581


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2186(0.2539) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0911(0.0911) Grad: 1.3200  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1243(0.1194) Grad: 3.5691  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1074(0.1301) Grad: 2.3342  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2811(0.2811) 


Epoch 25 - avg_train_loss: 0.1301  avg_val_loss: 0.2397  time: 7s
Epoch 25 - Score: 0.2429


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1727(0.2397) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1233(0.1233) Grad: 2.2353  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1357(0.1160) Grad: 2.1657  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1318(0.1217) Grad: 1.4479  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2548(0.2548) 


Epoch 26 - avg_train_loss: 0.1217  avg_val_loss: 0.2242  time: 7s
Epoch 26 - Score: 0.2257
Epoch 26 - Save Best Score: 0.2257 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1895(0.2242) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0829(0.0829) Grad: 1.4462  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1185(0.0956) Grad: 0.8190  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0911(0.0890) Grad: 1.9869  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2766(0.2766) 


Epoch 27 - avg_train_loss: 0.0890  avg_val_loss: 0.2248  time: 7s
Epoch 27 - Score: 0.2291


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1703(0.2248) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0834(0.0834) Grad: 0.6940  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0501(0.0670) Grad: 1.3556  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0656(0.0695) Grad: 0.5228  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2757(0.2757) 


Epoch 28 - avg_train_loss: 0.0695  avg_val_loss: 0.2237  time: 7s
Epoch 28 - Score: 0.2285


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1537(0.2237) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0484(0.0484) Grad: 0.9943  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0678(0.0772) Grad: 2.5870  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0503(0.0726) Grad: 2.6831  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2865(0.2865) 


Epoch 29 - avg_train_loss: 0.0726  avg_val_loss: 0.2302  time: 7s
Epoch 29 - Score: 0.2353


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1679(0.2302) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0481(0.0481) Grad: 1.8779  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0977(0.0765) Grad: 2.7395  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0713(0.0825) Grad: 1.8531  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2782(0.2782) 


Epoch 30 - avg_train_loss: 0.0825  avg_val_loss: 0.2223  time: 7s
Epoch 30 - Score: 0.2273


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1720(0.2223) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1170(0.1170) Grad: 4.3229  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0981(0.0876) Grad: 2.3099  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0728(0.0862) Grad: 3.7456  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2796(0.2796) 


Epoch 31 - avg_train_loss: 0.0862  avg_val_loss: 0.2259  time: 7s
Epoch 31 - Score: 0.2305


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1740(0.2259) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0857(0.0857) Grad: 0.5351  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1058(0.0915) Grad: 1.1491  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0782(0.0868) Grad: 0.8100  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2806(0.2806) 


Epoch 32 - avg_train_loss: 0.0868  avg_val_loss: 0.2260  time: 7s
Epoch 32 - Score: 0.2307


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1745(0.2260) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0698(0.0698) Grad: 0.8262  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0610(0.0656) Grad: 2.1375  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0651(0.0643) Grad: 1.3161  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2630(0.2630) 


Epoch 33 - avg_train_loss: 0.0643  avg_val_loss: 0.2079  time: 7s
Epoch 33 - Score: 0.2130
Epoch 33 - Save Best Score: 0.2130 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1733(0.2079) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0541(0.0541) Grad: 0.6294  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0478(0.0580) Grad: 1.2020  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0566(0.0575) Grad: 0.6613  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2797(0.2797) 


Epoch 34 - avg_train_loss: 0.0575  avg_val_loss: 0.2136  time: 7s
Epoch 34 - Score: 0.2208


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1701(0.2136) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0549(0.0549) Grad: 0.7708  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0564(0.0565) Grad: 0.8137  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0742(0.0557) Grad: 2.1298  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2689(0.2689) 


Epoch 35 - avg_train_loss: 0.0557  avg_val_loss: 0.2135  time: 7s
Epoch 35 - Score: 0.2185


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1660(0.2135) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0573(0.0573) Grad: 2.7072  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0563(0.0601) Grad: 0.6140  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0741(0.0644) Grad: 2.1505  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2633(0.2633) 


Epoch 36 - avg_train_loss: 0.0644  avg_val_loss: 0.2158  time: 7s
Epoch 36 - Score: 0.2198


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1570(0.2158) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0622(0.0622) Grad: 0.5711  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0747(0.0790) Grad: 0.8501  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0785(0.0825) Grad: 1.0762  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2557(0.2557) 


Epoch 37 - avg_train_loss: 0.0825  avg_val_loss: 0.2137  time: 7s
Epoch 37 - Score: 0.2169


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1627(0.2137) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0752(0.0752) Grad: 3.8719  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0623(0.0813) Grad: 0.5363  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0734(0.0771) Grad: 2.3029  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2705(0.2705) 


Epoch 38 - avg_train_loss: 0.0771  avg_val_loss: 0.2179  time: 7s
Epoch 38 - Score: 0.2223


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1814(0.2179) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0784(0.0784) Grad: 2.2927  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0536(0.0620) Grad: 0.8812  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0434(0.0610) Grad: 0.5236  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2581(0.2581) 


Epoch 39 - avg_train_loss: 0.0610  avg_val_loss: 0.2095  time: 7s
Epoch 39 - Score: 0.2138


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1528(0.2095) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0477(0.0477) Grad: 1.8950  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0425(0.0530) Grad: 0.8270  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0382(0.0509) Grad: 1.2302  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2652(0.2652) 


Epoch 40 - avg_train_loss: 0.0509  avg_val_loss: 0.2130  time: 7s
Epoch 40 - Score: 0.2175


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1678(0.2130) 


Score: 0.2130


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 8s) Loss: 3.3412(3.3412) Grad: 3.3284  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7245(1.4965) Grad: 2.6430  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6710(1.1295) Grad: 1.8382  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6642(0.6642) 


Epoch 1 - avg_train_loss: 1.1295  avg_val_loss: 0.6664  time: 7s
Epoch 1 - Score: 0.6669
Epoch 1 - Save Best Score: 0.6669 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6152(0.6664) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.8019(0.8019) Grad: 0.3031  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5255(0.5909) Grad: 4.4055  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5050(0.5867) Grad: 0.4937  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6705(0.6705) 


Epoch 2 - avg_train_loss: 0.5867  avg_val_loss: 0.6679  time: 7s
Epoch 2 - Score: 0.6684


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6135(0.6679) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5018(0.5018) Grad: 0.2620  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5510(0.5658) Grad: 1.3346  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6526(0.5620) Grad: 0.7328  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5725(0.5725) 


Epoch 3 - avg_train_loss: 0.5620  avg_val_loss: 0.5712  time: 7s
Epoch 3 - Score: 0.5733
Epoch 3 - Save Best Score: 0.5733 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4702(0.5712) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5331(0.5331) Grad: 1.4744  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3890(0.4928) Grad: 1.7018  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4733(0.4960) Grad: 1.4667  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5604(0.5604) 


Epoch 4 - avg_train_loss: 0.4960  avg_val_loss: 0.5659  time: 7s
Epoch 4 - Score: 0.5679
Epoch 4 - Save Best Score: 0.5679 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4733(0.5659) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4103(0.4103) Grad: 0.7954  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4940(0.4467) Grad: 1.5844  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3756(0.4385) Grad: 1.1132  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4946(0.4946) 


Epoch 5 - avg_train_loss: 0.4385  avg_val_loss: 0.4932  time: 7s
Epoch 5 - Score: 0.4950
Epoch 5 - Save Best Score: 0.4950 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4037(0.4932) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3057(0.3057) Grad: 2.1131  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4381(0.4033) Grad: 2.4830  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3845(0.3985) Grad: 0.8967  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4690(0.4690) 


Epoch 6 - avg_train_loss: 0.3985  avg_val_loss: 0.4244  time: 7s
Epoch 6 - Score: 0.4277
Epoch 6 - Save Best Score: 0.4277 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3160(0.4244) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2624(0.2624) Grad: 1.4019  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3601(0.3526) Grad: 1.2465  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2982(0.3278) Grad: 0.7389  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4379(0.4379) 


Epoch 7 - avg_train_loss: 0.3278  avg_val_loss: 0.3971  time: 7s
Epoch 7 - Score: 0.3990
Epoch 7 - Save Best Score: 0.3990 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3300(0.3971) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2719(0.2719) Grad: 1.5085  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2614(0.3019) Grad: 3.4117  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2584(0.3082) Grad: 1.3104  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5045(0.5045) 


Epoch 8 - avg_train_loss: 0.3082  avg_val_loss: 0.4354  time: 7s
Epoch 8 - Score: 0.4393


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3842(0.4354) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3296(0.3296) Grad: 1.6465  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3427(0.2923) Grad: 1.5256  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2084(0.2673) Grad: 1.2020  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3897(0.3897) 


Epoch 9 - avg_train_loss: 0.2673  avg_val_loss: 0.3554  time: 7s
Epoch 9 - Score: 0.3584
Epoch 9 - Save Best Score: 0.3584 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2558(0.3554) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1667(0.1667) Grad: 1.1190  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2023(0.2355) Grad: 0.6377  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3182(0.2341) Grad: 1.5570  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3760(0.3760) 


Epoch 10 - avg_train_loss: 0.2341  avg_val_loss: 0.3463  time: 7s
Epoch 10 - Score: 0.3487
Epoch 10 - Save Best Score: 0.3487 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2588(0.3463) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2764(0.2764) Grad: 1.3340  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2250(0.2607) Grad: 1.1311  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1904(0.2357) Grad: 0.8214  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3536(0.3536) 


Epoch 11 - avg_train_loss: 0.2357  avg_val_loss: 0.3352  time: 7s
Epoch 11 - Score: 0.3372
Epoch 11 - Save Best Score: 0.3372 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2554(0.3352) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2097(0.2097) Grad: 1.6150  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2251(0.2117) Grad: 3.6611  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2334(0.2198) Grad: 1.8921  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3681(0.3681) 


Epoch 12 - avg_train_loss: 0.2198  avg_val_loss: 0.3484  time: 7s
Epoch 12 - Score: 0.3494


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2918(0.3484) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1988(0.1988) Grad: 3.2311  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1961(0.1934) Grad: 4.8077  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1634(0.2160) Grad: 0.9458  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3611(0.3611) 


Epoch 13 - avg_train_loss: 0.2160  avg_val_loss: 0.3511  time: 7s
Epoch 13 - Score: 0.3520


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2958(0.3511) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3294(0.3294) Grad: 3.4461  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2307(0.1861) Grad: 1.3397  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1277(0.1964) Grad: 3.0203  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3097(0.3097) 


Epoch 14 - avg_train_loss: 0.1964  avg_val_loss: 0.2895  time: 7s
Epoch 14 - Score: 0.2917
Epoch 14 - Save Best Score: 0.2917 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2118(0.2895) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2205(0.2205) Grad: 1.6120  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1132(0.1685) Grad: 1.4449  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1776(0.1577) Grad: 3.1591  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3184(0.3184) 


Epoch 15 - avg_train_loss: 0.1577  avg_val_loss: 0.2870  time: 7s
Epoch 15 - Score: 0.2891
Epoch 15 - Save Best Score: 0.2891 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2185(0.2870) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1573(0.1573) Grad: 0.6237  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0972(0.1415) Grad: 1.3359  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2163(0.1342) Grad: 1.5981  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3207(0.3207) 


Epoch 16 - avg_train_loss: 0.1342  avg_val_loss: 0.2890  time: 7s
Epoch 16 - Score: 0.2912


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2177(0.2890) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1581(0.1581) Grad: 1.3970  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1145(0.1186) Grad: 2.1490  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2537(0.1286) Grad: 0.7550  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2902(0.2902) 


Epoch 17 - avg_train_loss: 0.1286  avg_val_loss: 0.2772  time: 7s
Epoch 17 - Score: 0.2787
Epoch 17 - Save Best Score: 0.2787 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2119(0.2772) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1654(0.1654) Grad: 1.3061  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1816(0.1358) Grad: 3.5815  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1330(0.1412) Grad: 0.8480  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3026(0.3026) 


Epoch 18 - avg_train_loss: 0.1412  avg_val_loss: 0.2832  time: 7s
Epoch 18 - Score: 0.2855


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2022(0.2832) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1182(0.1182) Grad: 2.1168  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1616(0.1403) Grad: 1.4458  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1442(0.1464) Grad: 0.6166  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3210(0.3210) 


Epoch 19 - avg_train_loss: 0.1464  avg_val_loss: 0.2812  time: 7s
Epoch 19 - Score: 0.2845


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1970(0.2812) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1072(0.1072) Grad: 2.1682  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1544(0.1536) Grad: 1.4510  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1475(0.1490) Grad: 3.3747  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3043(0.3043) 


Epoch 20 - avg_train_loss: 0.1490  avg_val_loss: 0.2740  time: 7s
Epoch 20 - Score: 0.2773
Epoch 20 - Save Best Score: 0.2773 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1817(0.2740) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1176(0.1176) Grad: 1.1277  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0860(0.1168) Grad: 0.6590  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1194(0.1119) Grad: 1.3704  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2889(0.2889) 


Epoch 21 - avg_train_loss: 0.1119  avg_val_loss: 0.2622  time: 7s
Epoch 21 - Score: 0.2649
Epoch 21 - Save Best Score: 0.2649 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1798(0.2622) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0784(0.0784) Grad: 0.5915  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0695(0.0950) Grad: 0.5078  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0758(0.0985) Grad: 1.2170  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2884(0.2884) 


Epoch 22 - avg_train_loss: 0.0985  avg_val_loss: 0.2580  time: 7s
Epoch 22 - Score: 0.2610
Epoch 22 - Save Best Score: 0.2610 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1762(0.2580) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0564(0.0564) Grad: 0.5540  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0869(0.0899) Grad: 3.0739  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0797(0.1005) Grad: 2.9458  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2668(0.2668) 


Epoch 23 - avg_train_loss: 0.1005  avg_val_loss: 0.2543  time: 7s
Epoch 23 - Score: 0.2553
Epoch 23 - Save Best Score: 0.2553 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2042(0.2543) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0660(0.0660) Grad: 1.6339  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0910(0.1103) Grad: 0.7385  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0763(0.1059) Grad: 1.3052  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2881(0.2881) 


Epoch 24 - avg_train_loss: 0.1059  avg_val_loss: 0.2707  time: 7s
Epoch 24 - Score: 0.2724


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2039(0.2707) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0819(0.0819) Grad: 2.9365  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0810(0.1031) Grad: 0.4834  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1018(0.1054) Grad: 1.1885  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3048(0.3048) 


Epoch 25 - avg_train_loss: 0.1054  avg_val_loss: 0.2880  time: 7s
Epoch 25 - Score: 0.2895


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2225(0.2880) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0807(0.0807) Grad: 1.1860  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1328(0.1001) Grad: 2.9812  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0894(0.1046) Grad: 1.4165  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2844(0.2844) 


Epoch 26 - avg_train_loss: 0.1046  avg_val_loss: 0.2641  time: 7s
Epoch 26 - Score: 0.2651


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2186(0.2641) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0786(0.0786) Grad: 0.8057  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0761(0.0867) Grad: 2.6054  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0766(0.0846) Grad: 0.9077  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2785(0.2785) 


Epoch 27 - avg_train_loss: 0.0846  avg_val_loss: 0.2597  time: 7s
Epoch 27 - Score: 0.2614


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1949(0.2597) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0839(0.0839) Grad: 1.8991  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0786(0.0748) Grad: 1.0770  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0630(0.0759) Grad: 0.8293  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2952(0.2952) 


Epoch 28 - avg_train_loss: 0.0759  avg_val_loss: 0.2656  time: 7s
Epoch 28 - Score: 0.2671


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2142(0.2656) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0795(0.0795) Grad: 0.7426  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1034(0.0850) Grad: 2.7972  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0726(0.0826) Grad: 0.5979  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2883(0.2883) 


Epoch 29 - avg_train_loss: 0.0826  avg_val_loss: 0.2572  time: 7s
Epoch 29 - Score: 0.2596


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1851(0.2572) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0667(0.0667) Grad: 1.3291  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0886(0.0897) Grad: 1.9378  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0948(0.0913) Grad: 0.9026  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2729(0.2729) 


Epoch 30 - avg_train_loss: 0.0913  avg_val_loss: 0.2549  time: 7s
Epoch 30 - Score: 0.2583


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1632(0.2549) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0767(0.0767) Grad: 1.8098  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1060(0.0920) Grad: 1.8300  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.1103(0.0912) Grad: 1.0170  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2735(0.2735) 


Epoch 31 - avg_train_loss: 0.0912  avg_val_loss: 0.2598  time: 7s
Epoch 31 - Score: 0.2625


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1766(0.2598) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0682(0.0682) Grad: 1.4462  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1052(0.1055) Grad: 1.2042  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1571(0.1063) Grad: 4.4762  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2765(0.2765) 


Epoch 32 - avg_train_loss: 0.1063  avg_val_loss: 0.2551  time: 7s
Epoch 32 - Score: 0.2561


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2119(0.2551) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0678(0.0678) Grad: 4.0837  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0606(0.0773) Grad: 2.1710  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0600(0.0770) Grad: 0.8725  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2652(0.2652) 


Epoch 33 - avg_train_loss: 0.0770  avg_val_loss: 0.2490  time: 7s
Epoch 33 - Score: 0.2504
Epoch 33 - Save Best Score: 0.2504 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1915(0.2490) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0701(0.0701) Grad: 3.5669  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0350(0.0623) Grad: 0.6158  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0682(0.0594) Grad: 0.8551  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2746(0.2746) 


Epoch 34 - avg_train_loss: 0.0594  avg_val_loss: 0.2602  time: 7s
Epoch 34 - Score: 0.2617


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1987(0.2602) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0831(0.0831) Grad: 1.7092  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0817(0.0714) Grad: 0.7930  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0412(0.0688) Grad: 1.1456  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2581(0.2581) 


Epoch 35 - avg_train_loss: 0.0688  avg_val_loss: 0.2334  time: 7s
Epoch 35 - Score: 0.2356
Epoch 35 - Save Best Score: 0.2356 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1656(0.2334) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0622(0.0622) Grad: 1.1452  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0942(0.0771) Grad: 4.4143  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0680(0.0772) Grad: 0.5640  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2576(0.2576) 


Epoch 36 - avg_train_loss: 0.0772  avg_val_loss: 0.2500  time: 7s
Epoch 36 - Score: 0.2510


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1997(0.2500) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0914(0.0914) Grad: 4.1182  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1087(0.0989) Grad: 2.5965  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1230(0.1045) Grad: 2.9362  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2862(0.2862) 


Epoch 37 - avg_train_loss: 0.1045  avg_val_loss: 0.2593  time: 7s
Epoch 37 - Score: 0.2636


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1538(0.2593) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1688(0.1688) Grad: 3.4703  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0963(0.1116) Grad: 2.3060  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1099(0.1004) Grad: 1.0761  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2892(0.2892) 


Epoch 38 - avg_train_loss: 0.1004  avg_val_loss: 0.2733  time: 7s
Epoch 38 - Score: 0.2741


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2281(0.2733) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1007(0.1007) Grad: 0.7152  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0628(0.0703) Grad: 2.9334  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0470(0.0676) Grad: 0.6659  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2466(0.2466) 


Epoch 39 - avg_train_loss: 0.0676  avg_val_loss: 0.2339  time: 7s
Epoch 39 - Score: 0.2350
Epoch 39 - Save Best Score: 0.2350 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1849(0.2339) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0557(0.0557) Grad: 0.7172  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0478(0.0609) Grad: 1.3142  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0301(0.0576) Grad: 2.7024  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2506(0.2506) 


Epoch 40 - avg_train_loss: 0.0576  avg_val_loss: 0.2451  time: 7s
Epoch 40 - Score: 0.2458


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2042(0.2451) 


Score: 0.2350


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.4662(3.4662) Grad: 3.1335  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7490(1.7097) Grad: 1.9741  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6152(1.2586) Grad: 1.0423  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5462(0.5462) 


Epoch 1 - avg_train_loss: 1.2586  avg_val_loss: 0.5443  time: 7s
Epoch 1 - Score: 0.5450
Epoch 1 - Save Best Score: 0.5450 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6000(0.5443) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7071(0.7071) Grad: 2.0579  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5018(0.6462) Grad: 0.8566  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5742(0.6013) Grad: 1.7424  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4150(0.4150) 


Epoch 2 - avg_train_loss: 0.6013  avg_val_loss: 0.4269  time: 7s
Epoch 2 - Score: 0.4294
Epoch 2 - Save Best Score: 0.4294 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5280(0.4269) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5734(0.5734) Grad: 3.1505  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4471(0.5010) Grad: 1.5323  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4630(0.4858) Grad: 2.4492  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3957(0.3957) 


Epoch 3 - avg_train_loss: 0.4858  avg_val_loss: 0.3941  time: 7s
Epoch 3 - Score: 0.3974
Epoch 3 - Save Best Score: 0.3974 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4978(0.3941) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3806(0.3806) Grad: 0.9166  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5185(0.4411) Grad: 1.0976  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4160(0.4484) Grad: 3.3719  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3766(0.3766) 


Epoch 4 - avg_train_loss: 0.4484  avg_val_loss: 0.3837  time: 7s
Epoch 4 - Score: 0.3868
Epoch 4 - Save Best Score: 0.3868 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4882(0.3837) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4659(0.4659) Grad: 1.7066  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3323(0.4419) Grad: 1.6687  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3790(0.4258) Grad: 1.8184  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3613(0.3613) 


Epoch 5 - avg_train_loss: 0.4258  avg_val_loss: 0.3620  time: 7s
Epoch 5 - Score: 0.3643
Epoch 5 - Save Best Score: 0.3643 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4460(0.3620) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4715(0.4715) Grad: 1.3574  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3893(0.4206) Grad: 2.7950  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4058(0.4258) Grad: 3.5089  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3393(0.3393) 


Epoch 6 - avg_train_loss: 0.4258  avg_val_loss: 0.3245  time: 7s
Epoch 6 - Score: 0.3258
Epoch 6 - Save Best Score: 0.3258 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3700(0.3245) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3744(0.3744) Grad: 1.9304  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4013(0.3829) Grad: 0.7902  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3824(0.3501) Grad: 2.5612  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3318(0.3318) 


Epoch 7 - avg_train_loss: 0.3501  avg_val_loss: 0.3040  time: 7s
Epoch 7 - Score: 0.3051
Epoch 7 - Save Best Score: 0.3051 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3058(0.3040) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2679(0.2679) Grad: 1.5588  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2810(0.2976) Grad: 1.0605  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2815(0.3031) Grad: 1.4545  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2884(0.2884) 


Epoch 8 - avg_train_loss: 0.3031  avg_val_loss: 0.2678  time: 7s
Epoch 8 - Score: 0.2697
Epoch 8 - Save Best Score: 0.2697 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3096(0.2678) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2251(0.2251) Grad: 0.5671  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2336(0.2529) Grad: 1.1709  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1626(0.2533) Grad: 0.9709  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2628(0.2628) 


Epoch 9 - avg_train_loss: 0.2533  avg_val_loss: 0.2381  time: 7s
Epoch 9 - Score: 0.2399
Epoch 9 - Save Best Score: 0.2399 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2617(0.2381) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2883(0.2883) Grad: 0.4618  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2869(0.2488) Grad: 1.7949  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1943(0.2386) Grad: 0.6944  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2530(0.2530) 


Epoch 10 - avg_train_loss: 0.2386  avg_val_loss: 0.2363  time: 7s
Epoch 10 - Score: 0.2373
Epoch 10 - Save Best Score: 0.2373 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2584(0.2363) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2559(0.2559) Grad: 1.1403  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2529(0.2522) Grad: 1.9293  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2189(0.2319) Grad: 0.8187  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2451(0.2451) 


Epoch 11 - avg_train_loss: 0.2319  avg_val_loss: 0.2397  time: 7s
Epoch 11 - Score: 0.2398


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2457(0.2397) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2358(0.2358) Grad: 3.0976  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3347(0.2417) Grad: 3.6132  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2734(0.2351) Grad: 1.3468  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2654(0.2654) 


Epoch 12 - avg_train_loss: 0.2351  avg_val_loss: 0.2662  time: 7s
Epoch 12 - Score: 0.2669


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3060(0.2662) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2290(0.2290) Grad: 1.1796  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2599(0.2425) Grad: 0.7709  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2283(0.2326) Grad: 2.6533  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2300(0.2300) 


Epoch 13 - avg_train_loss: 0.2326  avg_val_loss: 0.2282  time: 7s
Epoch 13 - Score: 0.2286
Epoch 13 - Save Best Score: 0.2286 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1996(0.2282) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1651(0.1651) Grad: 1.5125  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2021(0.2001) Grad: 1.3882  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2522(0.2039) Grad: 2.7585  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2282(0.2282) 


Epoch 14 - avg_train_loss: 0.2039  avg_val_loss: 0.2099  time: 7s
Epoch 14 - Score: 0.2105
Epoch 14 - Save Best Score: 0.2105 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1952(0.2099) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2076(0.2076) Grad: 0.8854  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1912(0.1603) Grad: 1.4208  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1129(0.1565) Grad: 1.4174  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2326(0.2326) 


Epoch 15 - avg_train_loss: 0.1565  avg_val_loss: 0.1935  time: 7s
Epoch 15 - Score: 0.1963
Epoch 15 - Save Best Score: 0.1963 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1688(0.1935) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1255(0.1255) Grad: 2.0338  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1453(0.1467) Grad: 1.2663  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1007(0.1403) Grad: 0.9492  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2364(0.2364) 


Epoch 16 - avg_train_loss: 0.1403  avg_val_loss: 0.1994  time: 7s
Epoch 16 - Score: 0.2018


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1726(0.1994) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1317(0.1317) Grad: 1.1322  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0960(0.1373) Grad: 1.7997  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1478(0.1377) Grad: 1.6981  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2209(0.2209) 


Epoch 17 - avg_train_loss: 0.1377  avg_val_loss: 0.1902  time: 7s
Epoch 17 - Score: 0.1926
Epoch 17 - Save Best Score: 0.1926 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1995(0.1902) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2058(0.2058) Grad: 0.9423  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2024(0.1978) Grad: 3.8903  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1528(0.1887) Grad: 3.1263  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2323(0.2323) 


Epoch 18 - avg_train_loss: 0.1887  avg_val_loss: 0.1927  time: 7s
Epoch 18 - Score: 0.1957


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1754(0.1927) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1969(0.1969) Grad: 1.0496  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1408(0.1627) Grad: 1.4976  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1999(0.1538) Grad: 3.0253  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2484(0.2484) 


Epoch 19 - avg_train_loss: 0.1538  avg_val_loss: 0.2163  time: 7s
Epoch 19 - Score: 0.2181


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1854(0.2163) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1624(0.1624) Grad: 2.8837  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1424(0.1536) Grad: 0.5837  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1427(0.1487) Grad: 1.7647  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2008(0.2008) 


Epoch 20 - avg_train_loss: 0.1487  avg_val_loss: 0.1794  time: 7s
Epoch 20 - Score: 0.1803
Epoch 20 - Save Best Score: 0.1803 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1651(0.1794) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1492(0.1492) Grad: 1.6344  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1354(0.1056) Grad: 1.7294  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0715(0.1095) Grad: 0.4581  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2193(0.2193) 


Epoch 21 - avg_train_loss: 0.1095  avg_val_loss: 0.1812  time: 7s
Epoch 21 - Score: 0.1840


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1532(0.1812) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1281(0.1281) Grad: 1.4494  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1590(0.1030) Grad: 1.7503  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0707(0.0936) Grad: 0.6768  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2063(0.2063) 


Epoch 22 - avg_train_loss: 0.0936  avg_val_loss: 0.1721  time: 7s
Epoch 22 - Score: 0.1745
Epoch 22 - Save Best Score: 0.1745 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1453(0.1721) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0738(0.0738) Grad: 1.4849  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0960(0.0951) Grad: 1.3769  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0980(0.0983) Grad: 1.2072  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2120(0.2120) 


Epoch 23 - avg_train_loss: 0.0983  avg_val_loss: 0.1744  time: 7s
Epoch 23 - Score: 0.1772


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1465(0.1744) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0551(0.0551) Grad: 1.2651  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0877(0.0989) Grad: 0.5891  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0838(0.0993) Grad: 2.8105  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2060(0.2060) 


Epoch 24 - avg_train_loss: 0.0993  avg_val_loss: 0.1815  time: 7s
Epoch 24 - Score: 0.1828


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1786(0.1815) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0747(0.0747) Grad: 0.9596  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1391(0.1247) Grad: 3.4327  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0877(0.1306) Grad: 0.8134  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2339(0.2339) 


Epoch 25 - avg_train_loss: 0.1306  avg_val_loss: 0.1900  time: 7s
Epoch 25 - Score: 0.1936


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1502(0.1900) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1753(0.1753) Grad: 2.6663  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1189(0.1283) Grad: 1.8578  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0816(0.1183) Grad: 1.6321  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2239(0.2239) 


Epoch 26 - avg_train_loss: 0.1183  avg_val_loss: 0.1778  time: 7s
Epoch 26 - Score: 0.1820


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1521(0.1778) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0924(0.0924) Grad: 2.7337  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0925(0.0986) Grad: 1.0765  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1075(0.0939) Grad: 0.6372  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2145(0.2145) 


Epoch 27 - avg_train_loss: 0.0939  avg_val_loss: 0.1684  time: 7s
Epoch 27 - Score: 0.1728
Epoch 27 - Save Best Score: 0.1728 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1321(0.1684) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0739(0.0739) Grad: 2.4413  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1016(0.0796) Grad: 1.3713  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0721(0.0790) Grad: 0.6719  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2171(0.2171) 


Epoch 28 - avg_train_loss: 0.0790  avg_val_loss: 0.1745  time: 7s
Epoch 28 - Score: 0.1781


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1464(0.1745) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0847(0.0847) Grad: 0.5666  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0792(0.0861) Grad: 0.7685  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0571(0.0796) Grad: 1.8004  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2074(0.2074) 


Epoch 29 - avg_train_loss: 0.0796  avg_val_loss: 0.1731  time: 7s
Epoch 29 - Score: 0.1755


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1461(0.1731) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0703(0.0703) Grad: 1.2484  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0753(0.0767) Grad: 1.3335  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1089(0.0891) Grad: 3.3771  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2076(0.2076) 


Epoch 30 - avg_train_loss: 0.0891  avg_val_loss: 0.1738  time: 7s
Epoch 30 - Score: 0.1761


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1491(0.1738) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0894(0.0894) Grad: 1.0754  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1071(0.0992) Grad: 0.9518  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0867(0.0978) Grad: 1.7280  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2290(0.2290) 


Epoch 31 - avg_train_loss: 0.0978  avg_val_loss: 0.1852  time: 7s
Epoch 31 - Score: 0.1891


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1344(0.1852) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1259(0.1259) Grad: 0.8064  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1259(0.0978) Grad: 3.0046  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0707(0.0945) Grad: 1.1942  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2189(0.2189) 


Epoch 32 - avg_train_loss: 0.0945  avg_val_loss: 0.1821  time: 7s
Epoch 32 - Score: 0.1847


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1527(0.1821) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0737(0.0737) Grad: 0.5057  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0782(0.0810) Grad: 0.9065  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0750(0.0780) Grad: 1.0772  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2018(0.2018) 


Epoch 33 - avg_train_loss: 0.0780  avg_val_loss: 0.1655  time: 7s
Epoch 33 - Score: 0.1685
Epoch 33 - Save Best Score: 0.1685 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1253(0.1655) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0863(0.0863) Grad: 1.2538  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0475(0.0689) Grad: 2.6009  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0533(0.0638) Grad: 2.4880  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2026(0.2026) 


Epoch 34 - avg_train_loss: 0.0638  avg_val_loss: 0.1644  time: 7s
Epoch 34 - Score: 0.1675
Epoch 34 - Save Best Score: 0.1675 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1371(0.1644) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0553(0.0553) Grad: 0.6279  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0555(0.0662) Grad: 2.0900  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0693(0.0680) Grad: 1.3657  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2143(0.2143) 


Epoch 35 - avg_train_loss: 0.0680  avg_val_loss: 0.1747  time: 7s
Epoch 35 - Score: 0.1779


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1549(0.1747) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0517(0.0517) Grad: 1.5340  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0724(0.0609) Grad: 1.0893  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0778(0.0627) Grad: 4.2007  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2187(0.2187) 


Epoch 36 - avg_train_loss: 0.0627  avg_val_loss: 0.1878  time: 7s
Epoch 36 - Score: 0.1896


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1615(0.1878) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0669(0.0669) Grad: 2.9827  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0680(0.0741) Grad: 1.9376  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1760(0.0845) Grad: 3.0421  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2291(0.2291) 


Epoch 37 - avg_train_loss: 0.0845  avg_val_loss: 0.1799  time: 7s
Epoch 37 - Score: 0.1846


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1460(0.1799) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0754(0.0754) Grad: 1.0707  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0772(0.0908) Grad: 1.1125  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0798(0.0941) Grad: 2.3269  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2105(0.2105) 


Epoch 38 - avg_train_loss: 0.0941  avg_val_loss: 0.1676  time: 7s
Epoch 38 - Score: 0.1715


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1283(0.1676) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0677(0.0677) Grad: 1.1366  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0745(0.0731) Grad: 2.4496  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0625(0.0716) Grad: 2.3334  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2076(0.2076) 


Epoch 39 - avg_train_loss: 0.0716  avg_val_loss: 0.1734  time: 7s
Epoch 39 - Score: 0.1758


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1411(0.1734) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0759(0.0759) Grad: 2.4171  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0641(0.0572) Grad: 0.8351  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0466(0.0572) Grad: 0.4006  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2119(0.2119) 


Epoch 40 - avg_train_loss: 0.0572  avg_val_loss: 0.1680  time: 7s
Epoch 40 - Score: 0.1720


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1311(0.1680) 


Score: 0.1675


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.3373(3.3373) Grad: 3.4204  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6276(1.6138) Grad: 2.6814  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7051(1.2097) Grad: 0.4152  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6902(0.6902) 


Epoch 1 - avg_train_loss: 1.2097  avg_val_loss: 0.6093  time: 7s
Epoch 1 - Score: 0.6133
Epoch 1 - Save Best Score: 0.6133 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5200(0.6093) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5336(0.5336) Grad: 2.3352  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4872(0.5741) Grad: 0.8282  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3741(0.5672) Grad: 1.9414  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6155(0.6155) 


Epoch 2 - avg_train_loss: 0.5672  avg_val_loss: 0.5049  time: 7s
Epoch 2 - Score: 0.5140
Epoch 2 - Save Best Score: 0.5140 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3770(0.5049) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4357(0.4357) Grad: 1.6537  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5204(0.5094) Grad: 1.1865  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3737(0.4736) Grad: 2.8308  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5489(0.5489) 


Epoch 3 - avg_train_loss: 0.4736  avg_val_loss: 0.4769  time: 7s
Epoch 3 - Score: 0.4824
Epoch 3 - Save Best Score: 0.4824 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3422(0.4769) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5004(0.5004) Grad: 3.8372  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4679(0.4337) Grad: 1.4209  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4960(0.4326) Grad: 2.1373  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5454(0.5454) 


Epoch 4 - avg_train_loss: 0.4326  avg_val_loss: 0.4550  time: 7s
Epoch 4 - Score: 0.4639
Epoch 4 - Save Best Score: 0.4639 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2910(0.4550) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3870(0.3870) Grad: 0.8397  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3383(0.3970) Grad: 1.6610  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4194(0.4158) Grad: 4.2690  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5325(0.5325) 


Epoch 5 - avg_train_loss: 0.4158  avg_val_loss: 0.4581  time: 7s
Epoch 5 - Score: 0.4653


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2985(0.4581) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4236(0.4236) Grad: 3.5933  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4021(0.4664) Grad: 2.9130  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2874(0.4248) Grad: 0.8885  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4953(0.4953) 


Epoch 6 - avg_train_loss: 0.4248  avg_val_loss: 0.4011  time: 7s
Epoch 6 - Score: 0.4138
Epoch 6 - Save Best Score: 0.4138 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2040(0.4011) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3900(0.3900) Grad: 3.2235  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2321(0.3209) Grad: 2.5562  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2751(0.3228) Grad: 0.5740  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4844(0.4844) 


Epoch 7 - avg_train_loss: 0.3228  avg_val_loss: 0.3759  time: 7s
Epoch 7 - Score: 0.3884
Epoch 7 - Save Best Score: 0.3884 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2279(0.3759) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2887(0.2887) Grad: 1.0893  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3111(0.2733) Grad: 1.5385  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3479(0.2887) Grad: 2.9375  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4551(0.4551) 


Epoch 8 - avg_train_loss: 0.2887  avg_val_loss: 0.3759  time: 7s
Epoch 8 - Score: 0.3850
Epoch 8 - Save Best Score: 0.3850 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2176(0.3759) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2707(0.2707) Grad: 2.8543  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2351(0.2496) Grad: 1.4532  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1943(0.2432) Grad: 2.5166  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4180(0.4180) 


Epoch 9 - avg_train_loss: 0.2432  avg_val_loss: 0.3406  time: 7s
Epoch 9 - Score: 0.3502
Epoch 9 - Save Best Score: 0.3502 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1852(0.3406) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2438(0.2438) Grad: 0.9678  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2596(0.2321) Grad: 1.4987  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2465(0.2275) Grad: 0.8094  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4153(0.4153) 


Epoch 10 - avg_train_loss: 0.2275  avg_val_loss: 0.3405  time: 7s
Epoch 10 - Score: 0.3492
Epoch 10 - Save Best Score: 0.3492 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1949(0.3405) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1904(0.1904) Grad: 0.2944  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2721(0.2294) Grad: 1.6685  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2457(0.2300) Grad: 2.0461  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4055(0.4055) 


Epoch 11 - avg_train_loss: 0.2300  avg_val_loss: 0.3360  time: 7s
Epoch 11 - Score: 0.3436
Epoch 11 - Save Best Score: 0.3436 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2010(0.3360) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1727(0.1727) Grad: 1.2505  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1504(0.2148) Grad: 4.0175  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2955(0.2196) Grad: 3.9635  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4472(0.4472) 


Epoch 12 - avg_train_loss: 0.2196  avg_val_loss: 0.4051  time: 7s
Epoch 12 - Score: 0.4084


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2983(0.4051) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3046(0.3046) Grad: 2.7154  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3426(0.2505) Grad: 4.6395  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2901(0.2352) Grad: 3.4157  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4185(0.4185) 


Epoch 13 - avg_train_loss: 0.2352  avg_val_loss: 0.3659  time: 7s
Epoch 13 - Score: 0.3693


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2794(0.3659) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2570(0.2570) Grad: 1.6332  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2366(0.1946) Grad: 2.3221  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1526(0.1882) Grad: 0.8166  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3787(0.3787) 


Epoch 14 - avg_train_loss: 0.1882  avg_val_loss: 0.3443  time: 7s
Epoch 14 - Score: 0.3481


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2330(0.3443) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1933(0.1933) Grad: 1.8613  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1520(0.1468) Grad: 2.3032  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1087(0.1469) Grad: 2.9491  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3912(0.3912) 


Epoch 15 - avg_train_loss: 0.1469  avg_val_loss: 0.3207  time: 7s
Epoch 15 - Score: 0.3312
Epoch 15 - Save Best Score: 0.3312 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1534(0.3207) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0937(0.0937) Grad: 2.9989  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1813(0.1331) Grad: 0.6694  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1166(0.1357) Grad: 0.9400  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3627(0.3627) 


Epoch 16 - avg_train_loss: 0.1357  avg_val_loss: 0.3084  time: 7s
Epoch 16 - Score: 0.3152
Epoch 16 - Save Best Score: 0.3152 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1746(0.3084) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1160(0.1160) Grad: 1.0501  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2007(0.1518) Grad: 2.6051  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1920(0.1465) Grad: 1.5032  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3474(0.3474) 


Epoch 17 - avg_train_loss: 0.1465  avg_val_loss: 0.3074  time: 7s
Epoch 17 - Score: 0.3123
Epoch 17 - Save Best Score: 0.3123 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1906(0.3074) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0843(0.0843) Grad: 2.3288  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1253(0.1509) Grad: 2.3597  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1197(0.1404) Grad: 0.7623  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3653(0.3653) 


Epoch 18 - avg_train_loss: 0.1404  avg_val_loss: 0.3129  time: 7s
Epoch 18 - Score: 0.3196


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1774(0.3129) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1951(0.1951) Grad: 2.1332  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1884(0.1390) Grad: 1.2992  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1235(0.1480) Grad: 2.4111  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3652(0.3652) 


Epoch 19 - avg_train_loss: 0.1480  avg_val_loss: 0.3326  time: 7s
Epoch 19 - Score: 0.3357


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2338(0.3326) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1688(0.1688) Grad: 1.1711  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1587(0.1525) Grad: 1.2901  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1234(0.1405) Grad: 2.7345  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3189(0.3189) 


Epoch 20 - avg_train_loss: 0.1405  avg_val_loss: 0.2896  time: 7s
Epoch 20 - Score: 0.2952
Epoch 20 - Save Best Score: 0.2952 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1630(0.2896) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1139(0.1139) Grad: 1.5107  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1271(0.1204) Grad: 0.7984  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0854(0.1141) Grad: 1.1381  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3049(0.3049) 


Epoch 21 - avg_train_loss: 0.1141  avg_val_loss: 0.2706  time: 7s
Epoch 21 - Score: 0.2755
Epoch 21 - Save Best Score: 0.2755 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1573(0.2706) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0698(0.0698) Grad: 0.6157  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1152(0.1013) Grad: 0.5097  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0862(0.0963) Grad: 1.5112  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3258(0.3258) 


Epoch 22 - avg_train_loss: 0.0963  avg_val_loss: 0.2832  time: 7s
Epoch 22 - Score: 0.2894


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1555(0.2832) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0782(0.0782) Grad: 0.6439  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0811(0.0916) Grad: 0.7729  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1038(0.0931) Grad: 0.7450  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3165(0.3165) 


Epoch 23 - avg_train_loss: 0.0931  avg_val_loss: 0.2683  time: 7s
Epoch 23 - Score: 0.2749
Epoch 23 - Save Best Score: 0.2749 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1440(0.2683) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0592(0.0592) Grad: 0.5914  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0802(0.0949) Grad: 2.6436  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0863(0.0990) Grad: 1.6570  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3251(0.3251) 


Epoch 24 - avg_train_loss: 0.0990  avg_val_loss: 0.3070  time: 7s
Epoch 24 - Score: 0.3092


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2254(0.3070) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1409(0.1409) Grad: 1.2901  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1498(0.1300) Grad: 5.2394  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0738(0.1239) Grad: 0.8616  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3360(0.3360) 


Epoch 25 - avg_train_loss: 0.1239  avg_val_loss: 0.3119  time: 7s
Epoch 25 - Score: 0.3167


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1910(0.3119) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0944(0.0944) Grad: 3.9274  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1160(0.1182) Grad: 3.8153  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1145(0.1107) Grad: 2.3852  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3425(0.3425) 


Epoch 26 - avg_train_loss: 0.1107  avg_val_loss: 0.2978  time: 7s
Epoch 26 - Score: 0.3035


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1733(0.2978) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0881(0.0881) Grad: 0.7415  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0794(0.0914) Grad: 2.5245  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1192(0.0900) Grad: 1.0274  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3062(0.3062) 


Epoch 27 - avg_train_loss: 0.0900  avg_val_loss: 0.2664  time: 7s
Epoch 27 - Score: 0.2725
Epoch 27 - Save Best Score: 0.2725 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1420(0.2664) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1041(0.1041) Grad: 0.5626  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0864(0.0755) Grad: 0.9599  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0840(0.0731) Grad: 0.9295  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3307(0.3307) 


Epoch 28 - avg_train_loss: 0.0731  avg_val_loss: 0.2835  time: 7s
Epoch 28 - Score: 0.2906


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1477(0.2835) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0618(0.0618) Grad: 0.8322  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0803(0.0797) Grad: 1.1445  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0532(0.0777) Grad: 1.0248  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3231(0.3231) 


Epoch 29 - avg_train_loss: 0.0777  avg_val_loss: 0.2742  time: 7s
Epoch 29 - Score: 0.2820


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1340(0.2742) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0783(0.0783) Grad: 1.2453  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0872(0.0814) Grad: 3.2699  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0943(0.0853) Grad: 2.7462  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3195(0.3195) 


Epoch 30 - avg_train_loss: 0.0853  avg_val_loss: 0.2743  time: 7s
Epoch 30 - Score: 0.2818


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1353(0.2743) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1042(0.1042) Grad: 2.5772  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1081(0.0891) Grad: 1.6778  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0970(0.0878) Grad: 1.9767  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3198(0.3198) 


Epoch 31 - avg_train_loss: 0.0878  avg_val_loss: 0.2759  time: 7s
Epoch 31 - Score: 0.2822


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1499(0.2759) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0916(0.0916) Grad: 1.4794  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0927(0.0963) Grad: 2.1976  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0860(0.0949) Grad: 1.1622  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3172(0.3172) 


Epoch 32 - avg_train_loss: 0.0949  avg_val_loss: 0.2810  time: 7s
Epoch 32 - Score: 0.2856


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1710(0.2810) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1014(0.1014) Grad: 2.7475  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0782(0.0765) Grad: 1.7461  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0516(0.0785) Grad: 1.8887  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3167(0.3167) 


Epoch 33 - avg_train_loss: 0.0785  avg_val_loss: 0.2742  time: 7s
Epoch 33 - Score: 0.2797


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1571(0.2742) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0871(0.0871) Grad: 1.1965  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0529(0.0695) Grad: 0.9992  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0468(0.0671) Grad: 1.5944  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3091(0.3091) 


Epoch 34 - avg_train_loss: 0.0671  avg_val_loss: 0.2718  time: 7s
Epoch 34 - Score: 0.2760


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1712(0.2718) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0938(0.0938) Grad: 1.5676  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0579(0.0615) Grad: 1.0057  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0978(0.0659) Grad: 1.3610  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3200(0.3200) 


Epoch 35 - avg_train_loss: 0.0659  avg_val_loss: 0.2773  time: 7s
Epoch 35 - Score: 0.2835


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1506(0.2773) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0467(0.0467) Grad: 0.7152  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0921(0.0682) Grad: 4.1421  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0600(0.0717) Grad: 1.7906  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3157(0.3157) 


Epoch 36 - avg_train_loss: 0.0717  avg_val_loss: 0.2733  time: 7s
Epoch 36 - Score: 0.2782


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1656(0.2733) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0803(0.0803) Grad: 1.5286  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0765(0.0839) Grad: 0.8968  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0984(0.0890) Grad: 1.8608  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3185(0.3185) 


Epoch 37 - avg_train_loss: 0.0890  avg_val_loss: 0.2729  time: 7s
Epoch 37 - Score: 0.2778


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1693(0.2729) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0773(0.0773) Grad: 0.7430  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0835(0.0727) Grad: 1.2337  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0625(0.0754) Grad: 2.0019  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3170(0.3170) 


Epoch 38 - avg_train_loss: 0.0754  avg_val_loss: 0.2796  time: 7s
Epoch 38 - Score: 0.2837


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1791(0.2796) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1357(0.1357) Grad: 0.8652  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0610(0.0786) Grad: 1.5982  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0551(0.0666) Grad: 1.2281  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3015(0.3015) 


Epoch 39 - avg_train_loss: 0.0666  avg_val_loss: 0.2589  time: 7s
Epoch 39 - Score: 0.2660
Epoch 39 - Save Best Score: 0.2660 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1279(0.2589) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0570(0.0570) Grad: 1.1236  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0589(0.0534) Grad: 0.8588  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1042(0.0563) Grad: 1.2628  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2884(0.2884) 


Epoch 40 - avg_train_loss: 0.0563  avg_val_loss: 0.2611  time: 7s
Epoch 40 - Score: 0.2671


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1363(0.2611) 


Score: 0.2660
Score: 0.2154


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.4166(3.4166) Grad: 3.3932  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7971(1.6015) Grad: 4.7114  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6179(1.2097) Grad: 2.5716  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6128(0.6128) 


Epoch 1 - avg_train_loss: 1.2097  avg_val_loss: 0.6322  time: 7s
Epoch 1 - Score: 0.6368
Epoch 1 - Save Best Score: 0.6368 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8002(0.6322) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5162(0.5162) Grad: 2.0434  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5362(0.6196) Grad: 1.5373  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5486(0.5854) Grad: 2.3577  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5563(0.5563) 


Epoch 2 - avg_train_loss: 0.5854  avg_val_loss: 0.5390  time: 7s
Epoch 2 - Score: 0.5396
Epoch 2 - Save Best Score: 0.5396 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5706(0.5390) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4453(0.4453) Grad: 3.1389  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3066(0.4589) Grad: 2.6392  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4549(0.4584) Grad: 0.7956  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5365(0.5365) 


Epoch 3 - avg_train_loss: 0.4584  avg_val_loss: 0.5426  time: 7s
Epoch 3 - Score: 0.5426


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5348(0.5426) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4825(0.4825) Grad: 4.6534  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4117(0.4562) Grad: 1.0206  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5380(0.4424) Grad: 1.0958  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4573(0.4573) 


Epoch 4 - avg_train_loss: 0.4424  avg_val_loss: 0.4624  time: 7s
Epoch 4 - Score: 0.4626
Epoch 4 - Save Best Score: 0.4626 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4453(0.4624) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3790(0.3790) Grad: 1.2692  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4447(0.4938) Grad: 1.8338  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4635(0.4584) Grad: 1.8100  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4396(0.4396) 


Epoch 5 - avg_train_loss: 0.4584  avg_val_loss: 0.4337  time: 7s
Epoch 5 - Score: 0.4340
Epoch 5 - Save Best Score: 0.4340 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3954(0.4337) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3525(0.3525) Grad: 1.8308  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3626(0.3999) Grad: 1.6179  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3118(0.3853) Grad: 1.0088  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4491(0.4491) 


Epoch 6 - avg_train_loss: 0.3853  avg_val_loss: 0.4102  time: 7s
Epoch 6 - Score: 0.4116
Epoch 6 - Save Best Score: 0.4116 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3747(0.4102) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3504(0.3504) Grad: 0.7808  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2234(0.3377) Grad: 1.1239  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2781(0.3370) Grad: 1.1131  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4065(0.4065) 


Epoch 7 - avg_train_loss: 0.3370  avg_val_loss: 0.3866  time: 7s
Epoch 7 - Score: 0.3872
Epoch 7 - Save Best Score: 0.3872 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3420(0.3866) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2679(0.2679) Grad: 1.0942  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2347(0.2795) Grad: 0.7168  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3689(0.2893) Grad: 1.6153  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4182(0.4182) 


Epoch 8 - avg_train_loss: 0.2893  avg_val_loss: 0.3774  time: 7s
Epoch 8 - Score: 0.3794
Epoch 8 - Save Best Score: 0.3794 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3102(0.3774) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2905(0.2905) Grad: 1.6568  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2680(0.2598) Grad: 0.4245  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2048(0.2568) Grad: 0.6040  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3651(0.3651) 


Epoch 9 - avg_train_loss: 0.2568  avg_val_loss: 0.3407  time: 7s
Epoch 9 - Score: 0.3418
Epoch 9 - Save Best Score: 0.3418 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2869(0.3407) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2738(0.2738) Grad: 0.9326  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2718(0.2431) Grad: 0.7782  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1950(0.2373) Grad: 0.4747  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3711(0.3711) 


Epoch 10 - avg_train_loss: 0.2373  avg_val_loss: 0.3424  time: 7s
Epoch 10 - Score: 0.3437


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2866(0.3424) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1757(0.1757) Grad: 1.4956  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2906(0.2461) Grad: 0.6898  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2131(0.2322) Grad: 0.6607  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3581(0.3581) 


Epoch 11 - avg_train_loss: 0.2322  avg_val_loss: 0.3207  time: 7s
Epoch 11 - Score: 0.3222
Epoch 11 - Save Best Score: 0.3222 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2874(0.3207) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2385(0.2385) Grad: 3.1387  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2810(0.2310) Grad: 1.4507  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1927(0.2281) Grad: 1.6014  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3475(0.3475) 


Epoch 12 - avg_train_loss: 0.2281  avg_val_loss: 0.3289  time: 7s
Epoch 12 - Score: 0.3297


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2824(0.3289) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1950(0.1950) Grad: 1.6133  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2660(0.2118) Grad: 1.5285  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2571(0.2136) Grad: 2.0879  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3083(0.3083) 


Epoch 13 - avg_train_loss: 0.2136  avg_val_loss: 0.3007  time: 7s
Epoch 13 - Score: 0.3015
Epoch 13 - Save Best Score: 0.3015 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2497(0.3007) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2476(0.2476) Grad: 0.9348  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1934(0.1990) Grad: 2.1315  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1714(0.1975) Grad: 1.4334  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3085(0.3085) 


Epoch 14 - avg_train_loss: 0.1975  avg_val_loss: 0.2979  time: 7s
Epoch 14 - Score: 0.2984
Epoch 14 - Save Best Score: 0.2984 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2619(0.2979) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2564(0.2564) Grad: 3.2149  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1761(0.1782) Grad: 2.8921  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1641(0.1713) Grad: 1.7743  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2941(0.2941) 


Epoch 15 - avg_train_loss: 0.1713  avg_val_loss: 0.2695  time: 7s
Epoch 15 - Score: 0.2713
Epoch 15 - Save Best Score: 0.2713 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2042(0.2695) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1030(0.1030) Grad: 0.7495  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1153(0.1417) Grad: 0.7747  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1546(0.1511) Grad: 1.5730  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2896(0.2896) 


Epoch 16 - avg_train_loss: 0.1511  avg_val_loss: 0.2701  time: 7s
Epoch 16 - Score: 0.2728


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1868(0.2701) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1469(0.1469) Grad: 0.5989  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2066(0.1562) Grad: 0.9516  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1485(0.1514) Grad: 0.6855  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2833(0.2833) 


Epoch 17 - avg_train_loss: 0.1514  avg_val_loss: 0.2541  time: 7s
Epoch 17 - Score: 0.2575
Epoch 17 - Save Best Score: 0.2575 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1651(0.2541) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1688(0.1688) Grad: 0.6151  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1637(0.1573) Grad: 1.0536  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1275(0.1511) Grad: 1.2069  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3108(0.3108) 


Epoch 18 - avg_train_loss: 0.1511  avg_val_loss: 0.2738  time: 7s
Epoch 18 - Score: 0.2786


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1637(0.2738) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1393(0.1393) Grad: 1.3230  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1937(0.1653) Grad: 3.6966  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1765(0.1672) Grad: 1.8930  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2773(0.2773) 


Epoch 19 - avg_train_loss: 0.1672  avg_val_loss: 0.3010  time: 7s
Epoch 19 - Score: 0.3046


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2313(0.3010) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2229(0.2229) Grad: 2.1326  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2160(0.1527) Grad: 1.3062  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1753(0.1607) Grad: 3.9476  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2476(0.2476) 


Epoch 20 - avg_train_loss: 0.1607  avg_val_loss: 0.2531  time: 7s
Epoch 20 - Score: 0.2549
Epoch 20 - Save Best Score: 0.2549 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1954(0.2531) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0995(0.0995) Grad: 3.1679  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1592(0.1245) Grad: 1.0749  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1216(0.1201) Grad: 0.9443  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2598(0.2598) 


Epoch 21 - avg_train_loss: 0.1201  avg_val_loss: 0.2431  time: 7s
Epoch 21 - Score: 0.2457
Epoch 21 - Save Best Score: 0.2457 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1638(0.2431) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1242(0.1242) Grad: 1.4224  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1130(0.1101) Grad: 2.5940  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1310(0.1054) Grad: 0.9685  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2524(0.2524) 


Epoch 22 - avg_train_loss: 0.1054  avg_val_loss: 0.2443  time: 7s
Epoch 22 - Score: 0.2465


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1720(0.2443) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0754(0.0754) Grad: 0.6449  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0571(0.0911) Grad: 1.0020  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1416(0.0989) Grad: 3.1255  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2603(0.2603) 


Epoch 23 - avg_train_loss: 0.0989  avg_val_loss: 0.2396  time: 7s
Epoch 23 - Score: 0.2410
Epoch 23 - Save Best Score: 0.2410 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1840(0.2396) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0761(0.0761) Grad: 1.1704  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0796(0.1105) Grad: 1.7636  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1029(0.1146) Grad: 1.5092  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2652(0.2652) 


Epoch 24 - avg_train_loss: 0.1146  avg_val_loss: 0.2521  time: 7s
Epoch 24 - Score: 0.2542


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1789(0.2521) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0843(0.0843) Grad: 0.6051  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1224(0.1142) Grad: 1.8540  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1014(0.1158) Grad: 3.5522  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2403(0.2403) 


Epoch 25 - avg_train_loss: 0.1158  avg_val_loss: 0.2551  time: 7s
Epoch 25 - Score: 0.2571


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2053(0.2551) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1305(0.1305) Grad: 1.6608  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1018(0.1134) Grad: 2.1819  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0954(0.1107) Grad: 1.5859  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2666(0.2666) 


Epoch 26 - avg_train_loss: 0.1107  avg_val_loss: 0.2652  time: 7s
Epoch 26 - Score: 0.2668


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2039(0.2652) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1058(0.1058) Grad: 2.9710  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0648(0.0897) Grad: 0.9496  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1089(0.0947) Grad: 1.1670  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2472(0.2472) 


Epoch 27 - avg_train_loss: 0.0947  avg_val_loss: 0.2470  time: 7s
Epoch 27 - Score: 0.2495


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1745(0.2470) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0643(0.0643) Grad: 2.8216  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0638(0.0770) Grad: 1.9081  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1054(0.0822) Grad: 1.5003  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2292(0.2292) 


Epoch 28 - avg_train_loss: 0.0822  avg_val_loss: 0.2290  time: 7s
Epoch 28 - Score: 0.2305
Epoch 28 - Save Best Score: 0.2305 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1756(0.2290) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1296(0.1296) Grad: 1.8996  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0659(0.0846) Grad: 1.9398  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0599(0.0756) Grad: 0.9735  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2485(0.2485) 


Epoch 29 - avg_train_loss: 0.0756  avg_val_loss: 0.2454  time: 7s
Epoch 29 - Score: 0.2486


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1626(0.2454) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0798(0.0798) Grad: 1.8022  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1217(0.0896) Grad: 1.5637  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1152(0.1031) Grad: 1.2320  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2763(0.2763) 


Epoch 30 - avg_train_loss: 0.1031  avg_val_loss: 0.2626  time: 7s
Epoch 30 - Score: 0.2631


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2310(0.2626) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0908(0.0908) Grad: 0.9201  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0781(0.1166) Grad: 2.6412  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0757(0.1239) Grad: 0.7694  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2717(0.2717) 


Epoch 31 - avg_train_loss: 0.1239  avg_val_loss: 0.2858  time: 7s
Epoch 31 - Score: 0.2887


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2161(0.2858) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1342(0.1342) Grad: 2.9846  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1154(0.1200) Grad: 2.9080  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1242(0.1186) Grad: 2.9123  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2437(0.2437) 


Epoch 32 - avg_train_loss: 0.1186  avg_val_loss: 0.2486  time: 7s
Epoch 32 - Score: 0.2540


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1456(0.2486) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0760(0.0760) Grad: 0.7217  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0826(0.0794) Grad: 0.6192  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0591(0.0848) Grad: 0.7517  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2372(0.2372) 


Epoch 33 - avg_train_loss: 0.0848  avg_val_loss: 0.2467  time: 7s
Epoch 33 - Score: 0.2503


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1683(0.2467) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0823(0.0823) Grad: 0.6921  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0692(0.0713) Grad: 0.9758  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0794(0.0722) Grad: 2.5632  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2311(0.2311) 


Epoch 34 - avg_train_loss: 0.0722  avg_val_loss: 0.2397  time: 7s
Epoch 34 - Score: 0.2424


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1740(0.2397) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0878(0.0878) Grad: 0.5205  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0991(0.0685) Grad: 0.7469  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0499(0.0672) Grad: 0.9758  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2191(0.2191) 


Epoch 35 - avg_train_loss: 0.0672  avg_val_loss: 0.2312  time: 7s
Epoch 35 - Score: 0.2331


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1824(0.2312) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0524(0.0524) Grad: 1.0840  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0701(0.0714) Grad: 0.8065  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0845(0.0759) Grad: 0.7433  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2434(0.2434) 


Epoch 36 - avg_train_loss: 0.0759  avg_val_loss: 0.2299  time: 7s
Epoch 36 - Score: 0.2316


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1673(0.2299) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0776(0.0776) Grad: 0.6837  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0583(0.0786) Grad: 2.4949  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0650(0.0828) Grad: 1.1331  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2315(0.2315) 


Epoch 37 - avg_train_loss: 0.0828  avg_val_loss: 0.2355  time: 7s
Epoch 37 - Score: 0.2394


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1509(0.2355) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0637(0.0637) Grad: 0.6961  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1081(0.0809) Grad: 0.9571  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0892(0.0807) Grad: 1.6255  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2504(0.2504) 


Epoch 38 - avg_train_loss: 0.0807  avg_val_loss: 0.2511  time: 7s
Epoch 38 - Score: 0.2520


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2085(0.2511) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0704(0.0704) Grad: 2.0903  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0965(0.0756) Grad: 1.1707  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0551(0.0699) Grad: 0.9247  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2358(0.2358) 


Epoch 39 - avg_train_loss: 0.0699  avg_val_loss: 0.2205  time: 7s
Epoch 39 - Score: 0.2216
Epoch 39 - Save Best Score: 0.2216 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1734(0.2205) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0519(0.0519) Grad: 1.0693  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0960(0.0592) Grad: 1.2525  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0597(0.0583) Grad: 1.3804  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2264(0.2264) 


Epoch 40 - avg_train_loss: 0.0583  avg_val_loss: 0.2315  time: 7s
Epoch 40 - Score: 0.2328


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1843(0.2315) 


Score: 0.2216


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.1546(3.1546) Grad: 3.3588  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7767(1.5795) Grad: 3.8645  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5755(1.1967) Grad: 1.3068  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6056(0.6056) 


Epoch 1 - avg_train_loss: 1.1967  avg_val_loss: 0.5987  time: 7s
Epoch 1 - Score: 0.6060
Epoch 1 - Save Best Score: 0.6060 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4007(0.5987) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.7190(0.7190) Grad: 1.0731  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7490(0.6263) Grad: 1.8763  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4974(0.5750) Grad: 1.3312  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5210(0.5210) 


Epoch 2 - avg_train_loss: 0.5750  avg_val_loss: 0.5140  time: 7s
Epoch 2 - Score: 0.5216
Epoch 2 - Save Best Score: 0.5216 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3269(0.5140) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5336(0.5336) Grad: 1.6406  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4609(0.4813) Grad: 2.5769  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3911(0.4899) Grad: 0.9859  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4320(0.4320) 


Epoch 3 - avg_train_loss: 0.4899  avg_val_loss: 0.4308  time: 7s
Epoch 3 - Score: 0.4376
Epoch 3 - Save Best Score: 0.4376 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2729(0.4308) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5325(0.5325) Grad: 1.5045  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4039(0.4206) Grad: 1.8599  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3100(0.4274) Grad: 0.4654  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4308(0.4308) 


Epoch 4 - avg_train_loss: 0.4274  avg_val_loss: 0.4296  time: 7s
Epoch 4 - Score: 0.4364
Epoch 4 - Save Best Score: 0.4364 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2721(0.4296) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5338(0.5338) Grad: 2.4490  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4252(0.4227) Grad: 1.9041  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3703(0.4125) Grad: 2.0615  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3793(0.3793) 


Epoch 5 - avg_train_loss: 0.4125  avg_val_loss: 0.3847  time: 7s
Epoch 5 - Score: 0.3891
Epoch 5 - Save Best Score: 0.3891 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2691(0.3847) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3569(0.3569) Grad: 3.6129  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4156(0.4139) Grad: 2.5303  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3834(0.3799) Grad: 2.4708  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3543(0.3543) 


Epoch 6 - avg_train_loss: 0.3799  avg_val_loss: 0.3268  time: 7s
Epoch 6 - Score: 0.3350
Epoch 6 - Save Best Score: 0.3350 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1638(0.3268) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3406(0.3406) Grad: 1.7643  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4572(0.3966) Grad: 1.0704  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3106(0.3624) Grad: 3.8018  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3241(0.3241) 


Epoch 7 - avg_train_loss: 0.3624  avg_val_loss: 0.3083  time: 7s
Epoch 7 - Score: 0.3126
Epoch 7 - Save Best Score: 0.3126 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1946(0.3083) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3103(0.3103) Grad: 0.4843  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2867(0.3056) Grad: 0.9374  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2737(0.3028) Grad: 0.7376  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2962(0.2962) 


Epoch 8 - avg_train_loss: 0.3028  avg_val_loss: 0.2633  time: 7s
Epoch 8 - Score: 0.2679
Epoch 8 - Save Best Score: 0.2679 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1560(0.2633) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2780(0.2780) Grad: 1.1325  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2081(0.2587) Grad: 2.4009  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2209(0.2520) Grad: 0.5723  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2601(0.2601) 


Epoch 9 - avg_train_loss: 0.2520  avg_val_loss: 0.2406  time: 7s
Epoch 9 - Score: 0.2428
Epoch 9 - Save Best Score: 0.2428 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1684(0.2406) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1954(0.1954) Grad: 0.9623  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2219(0.2372) Grad: 1.3672  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2934(0.2279) Grad: 1.4531  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2684(0.2684) 


Epoch 10 - avg_train_loss: 0.2279  avg_val_loss: 0.2497  time: 7s
Epoch 10 - Score: 0.2537


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1502(0.2497) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2782(0.2782) Grad: 0.9098  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2235(0.2322) Grad: 2.2585  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2798(0.2321) Grad: 1.3782  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2571(0.2571) 


Epoch 11 - avg_train_loss: 0.2321  avg_val_loss: 0.2464  time: 7s
Epoch 11 - Score: 0.2483


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1782(0.2464) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1544(0.1544) Grad: 0.7014  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2378(0.2264) Grad: 1.4392  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2640(0.2276) Grad: 4.0509  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3088(0.3088) 


Epoch 12 - avg_train_loss: 0.2276  avg_val_loss: 0.2746  time: 7s
Epoch 12 - Score: 0.2773


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1961(0.2746) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2218(0.2218) Grad: 2.6575  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2620(0.2332) Grad: 3.4166  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1776(0.2261) Grad: 2.7054  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2457(0.2457) 


Epoch 13 - avg_train_loss: 0.2261  avg_val_loss: 0.2267  time: 7s
Epoch 13 - Score: 0.2299
Epoch 13 - Save Best Score: 0.2299 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1424(0.2267) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2764(0.2764) Grad: 1.2035  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1936(0.2036) Grad: 0.8496  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1888(0.1880) Grad: 2.0315  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2346(0.2346) 


Epoch 14 - avg_train_loss: 0.1880  avg_val_loss: 0.2147  time: 7s
Epoch 14 - Score: 0.2160
Epoch 14 - Save Best Score: 0.2160 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1677(0.2147) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2006(0.2006) Grad: 0.6888  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1316(0.1599) Grad: 1.2384  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2172(0.1612) Grad: 2.9700  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2649(0.2649) 


Epoch 15 - avg_train_loss: 0.1612  avg_val_loss: 0.2281  time: 7s
Epoch 15 - Score: 0.2334


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1230(0.2281) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1166(0.1166) Grad: 0.7723  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1845(0.1494) Grad: 1.4514  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0878(0.1435) Grad: 1.0232  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2359(0.2359) 


Epoch 16 - avg_train_loss: 0.1435  avg_val_loss: 0.2084  time: 7s
Epoch 16 - Score: 0.2113
Epoch 16 - Save Best Score: 0.2113 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1360(0.2084) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1641(0.1641) Grad: 1.3769  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1982(0.1404) Grad: 1.9963  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1664(0.1413) Grad: 1.7045  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2100(0.2100) 


Epoch 17 - avg_train_loss: 0.1413  avg_val_loss: 0.1981  time: 7s
Epoch 17 - Score: 0.1990
Epoch 17 - Save Best Score: 0.1990 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1580(0.1981) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1766(0.1766) Grad: 1.9096  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1183(0.1498) Grad: 1.4591  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1418(0.1477) Grad: 0.9287  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2337(0.2337) 


Epoch 18 - avg_train_loss: 0.1477  avg_val_loss: 0.2229  time: 7s
Epoch 18 - Score: 0.2246


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1618(0.2229) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1585(0.1585) Grad: 2.6792  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1184(0.1463) Grad: 2.3699  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1714(0.1461) Grad: 2.3525  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2359(0.2359) 


Epoch 19 - avg_train_loss: 0.1461  avg_val_loss: 0.2141  time: 7s
Epoch 19 - Score: 0.2170


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1374(0.2141) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1489(0.1489) Grad: 0.8909  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1663(0.1331) Grad: 1.9724  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1389(0.1388) Grad: 3.8596  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2703(0.2703) 


Epoch 20 - avg_train_loss: 0.1388  avg_val_loss: 0.2323  time: 7s
Epoch 20 - Score: 0.2356


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1572(0.2323) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1448(0.1448) Grad: 3.7833  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1258(0.1326) Grad: 0.8144  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1237(0.1216) Grad: 1.1215  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2398(0.2398) 


Epoch 21 - avg_train_loss: 0.1216  avg_val_loss: 0.2008  time: 7s
Epoch 21 - Score: 0.2045


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1323(0.2008) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1188(0.1188) Grad: 1.8514  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1079(0.0959) Grad: 0.6515  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0754(0.0980) Grad: 0.8946  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2167(0.2167) 


Epoch 22 - avg_train_loss: 0.0980  avg_val_loss: 0.1869  time: 7s
Epoch 22 - Score: 0.1889
Epoch 22 - Save Best Score: 0.1889 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1409(0.1869) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1230(0.1230) Grad: 1.8447  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1161(0.1098) Grad: 2.7151  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0515(0.1002) Grad: 0.9600  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2246(0.2246) 


Epoch 23 - avg_train_loss: 0.1002  avg_val_loss: 0.1895  time: 7s
Epoch 23 - Score: 0.1937


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1087(0.1895) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0784(0.0784) Grad: 1.3174  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0997(0.1026) Grad: 2.4403  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1202(0.1059) Grad: 3.4449  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2311(0.2311) 


Epoch 24 - avg_train_loss: 0.1059  avg_val_loss: 0.2034  time: 7s
Epoch 24 - Score: 0.2051


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1585(0.2034) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1305(0.1305) Grad: 1.1322  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2002(0.1358) Grad: 2.8314  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1314(0.1379) Grad: 3.0428  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2236(0.2236) 


Epoch 25 - avg_train_loss: 0.1379  avg_val_loss: 0.1982  time: 7s
Epoch 25 - Score: 0.2008


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1306(0.1982) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1117(0.1117) Grad: 0.8508  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0800(0.1137) Grad: 1.9251  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1143(0.1109) Grad: 1.8011  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2720(0.2720) 


Epoch 26 - avg_train_loss: 0.1109  avg_val_loss: 0.2304  time: 7s
Epoch 26 - Score: 0.2337


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1641(0.2304) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1253(0.1253) Grad: 3.7110  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0878(0.1151) Grad: 3.5547  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0920(0.1014) Grad: 0.6201  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2315(0.2315) 


Epoch 27 - avg_train_loss: 0.1014  avg_val_loss: 0.2007  time: 7s
Epoch 27 - Score: 0.2048


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1141(0.2007) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0898(0.0898) Grad: 0.8298  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0811(0.0743) Grad: 1.3655  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0675(0.0773) Grad: 0.6963  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1994(0.1994) 


Epoch 28 - avg_train_loss: 0.0773  avg_val_loss: 0.1759  time: 7s
Epoch 28 - Score: 0.1782
Epoch 28 - Save Best Score: 0.1782 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1171(0.1759) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0952(0.0952) Grad: 1.5892  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0561(0.0744) Grad: 0.7567  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0580(0.0751) Grad: 0.8959  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2064(0.2064) 


Epoch 29 - avg_train_loss: 0.0751  avg_val_loss: 0.1863  time: 7s
Epoch 29 - Score: 0.1890


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1171(0.1863) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0534(0.0534) Grad: 2.1937  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0706(0.0764) Grad: 1.8182  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0886(0.0765) Grad: 4.4830  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2181(0.2181) 


Epoch 30 - avg_train_loss: 0.0765  avg_val_loss: 0.1867  time: 7s
Epoch 30 - Score: 0.1897


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1221(0.1867) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0779(0.0779) Grad: 2.9486  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1088(0.0939) Grad: 2.5801  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0837(0.0986) Grad: 2.3440  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2279(0.2279) 


Epoch 31 - avg_train_loss: 0.0986  avg_val_loss: 0.2067  time: 7s
Epoch 31 - Score: 0.2098


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1278(0.2067) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0895(0.0895) Grad: 0.9268  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1419(0.1130) Grad: 4.5478  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0870(0.1032) Grad: 1.6004  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2551(0.2551) 


Epoch 32 - avg_train_loss: 0.1032  avg_val_loss: 0.2138  time: 7s
Epoch 32 - Score: 0.2178


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1388(0.2138) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0828(0.0828) Grad: 3.9163  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1271(0.0948) Grad: 2.4303  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0701(0.0889) Grad: 0.4947  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2137(0.2137) 


Epoch 33 - avg_train_loss: 0.0889  avg_val_loss: 0.1923  time: 7s
Epoch 33 - Score: 0.1953


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1184(0.1923) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0525(0.0525) Grad: 2.5338  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0717(0.0643) Grad: 0.8900  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0639(0.0668) Grad: 2.5027  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2204(0.2204) 


Epoch 34 - avg_train_loss: 0.0668  avg_val_loss: 0.2004  time: 7s
Epoch 34 - Score: 0.2036


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1208(0.2004) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0548(0.0548) Grad: 0.7400  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0490(0.0773) Grad: 3.0374  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0529(0.0754) Grad: 0.6959  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2243(0.2243) 


Epoch 35 - avg_train_loss: 0.0754  avg_val_loss: 0.1923  time: 7s
Epoch 35 - Score: 0.1972


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.0987(0.1923) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0431(0.0431) Grad: 0.7747  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1608(0.0898) Grad: 2.7500  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0924(0.0913) Grad: 3.2015  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2193(0.2193) 


Epoch 36 - avg_train_loss: 0.0913  avg_val_loss: 0.1998  time: 7s
Epoch 36 - Score: 0.2031


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1191(0.1998) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0704(0.0704) Grad: 2.2352  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1845(0.0957) Grad: 3.1957  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0949(0.0994) Grad: 1.8368  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2671(0.2671) 


Epoch 37 - avg_train_loss: 0.0994  avg_val_loss: 0.2333  time: 7s
Epoch 37 - Score: 0.2362


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1623(0.2333) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1330(0.1330) Grad: 4.6499  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0850(0.1027) Grad: 2.0625  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0826(0.0988) Grad: 2.6869  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2136(0.2136) 


Epoch 38 - avg_train_loss: 0.0988  avg_val_loss: 0.1929  time: 7s
Epoch 38 - Score: 0.1952


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1277(0.1929) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0613(0.0613) Grad: 1.8241  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0550(0.0689) Grad: 2.0834  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0622(0.0656) Grad: 1.5514  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2094(0.2094) 


Epoch 39 - avg_train_loss: 0.0656  avg_val_loss: 0.1856  time: 7s
Epoch 39 - Score: 0.1889


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1100(0.1856) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0527(0.0527) Grad: 1.2085  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0690(0.0610) Grad: 1.5531  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0466(0.0563) Grad: 0.7807  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2117(0.2117) 


Epoch 40 - avg_train_loss: 0.0563  avg_val_loss: 0.1807  time: 7s
Epoch 40 - Score: 0.1847


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1011(0.1807) 


Score: 0.1782


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.4326(3.4326) Grad: 3.1977  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6086(1.5139) Grad: 3.0135  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5889(1.1410) Grad: 1.2305  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6090(0.6090) 


Epoch 1 - avg_train_loss: 1.1410  avg_val_loss: 0.6193  time: 7s
Epoch 1 - Score: 0.6198
Epoch 1 - Save Best Score: 0.6198 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6714(0.6193) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5929(0.5929) Grad: 1.0102  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5954(0.6319) Grad: 0.6930  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5347(0.5770) Grad: 3.1761  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4963(0.4963) 


Epoch 2 - avg_train_loss: 0.5770  avg_val_loss: 0.4695  time: 7s
Epoch 2 - Score: 0.4705
Epoch 2 - Save Best Score: 0.4705 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4125(0.4695) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4347(0.4347) Grad: 2.7428  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5166(0.4446) Grad: 1.6024  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4984(0.4457) Grad: 1.6712  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4299(0.4299) 


Epoch 3 - avg_train_loss: 0.4457  avg_val_loss: 0.4113  time: 7s
Epoch 3 - Score: 0.4118
Epoch 3 - Save Best Score: 0.4118 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3715(0.4113) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2978(0.2978) Grad: 1.4081  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3580(0.3942) Grad: 0.9326  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3588(0.3932) Grad: 2.8123  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4167(0.4167) 


Epoch 4 - avg_train_loss: 0.3932  avg_val_loss: 0.4078  time: 7s
Epoch 4 - Score: 0.4083
Epoch 4 - Save Best Score: 0.4083 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3646(0.4078) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3470(0.3470) Grad: 3.0711  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3263(0.3520) Grad: 3.3208  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4450(0.3795) Grad: 0.9893  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3807(0.3807) 


Epoch 5 - avg_train_loss: 0.3795  avg_val_loss: 0.3572  time: 7s
Epoch 5 - Score: 0.3578
Epoch 5 - Save Best Score: 0.3578 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3249(0.3572) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2914(0.2914) Grad: 0.8149  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2720(0.3651) Grad: 0.7903  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2498(0.3584) Grad: 3.8405  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3366(0.3366) 


Epoch 6 - avg_train_loss: 0.3584  avg_val_loss: 0.3443  time: 7s
Epoch 6 - Score: 0.3443
Epoch 6 - Save Best Score: 0.3443 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3472(0.3443) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3554(0.3554) Grad: 0.9211  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2674(0.3300) Grad: 4.3573  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4074(0.3450) Grad: 2.0990  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3202(0.3202) 


Epoch 7 - avg_train_loss: 0.3450  avg_val_loss: 0.3195  time: 7s
Epoch 7 - Score: 0.3196
Epoch 7 - Save Best Score: 0.3196 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3388(0.3195) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2955(0.2955) Grad: 1.7381  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3164(0.3026) Grad: 0.5046  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2652(0.2942) Grad: 1.0559  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2747(0.2747) 


Epoch 8 - avg_train_loss: 0.2942  avg_val_loss: 0.2835  time: 7s
Epoch 8 - Score: 0.2838
Epoch 8 - Save Best Score: 0.2838 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2721(0.2835) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2249(0.2249) Grad: 0.5064  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2298(0.2495) Grad: 2.5118  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1923(0.2525) Grad: 1.1156  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2626(0.2626) 


Epoch 9 - avg_train_loss: 0.2525  avg_val_loss: 0.2750  time: 7s
Epoch 9 - Score: 0.2755
Epoch 9 - Save Best Score: 0.2755 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2591(0.2750) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1703(0.1703) Grad: 2.0756  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2004(0.2176) Grad: 0.7666  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3054(0.2349) Grad: 1.1570  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2440(0.2440) 


Epoch 10 - avg_train_loss: 0.2349  avg_val_loss: 0.2598  time: 7s
Epoch 10 - Score: 0.2601
Epoch 10 - Save Best Score: 0.2601 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2693(0.2598) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2820(0.2820) Grad: 2.3063  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2399(0.2494) Grad: 1.9631  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2440(0.2404) Grad: 1.3855  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2498(0.2498) 


Epoch 11 - avg_train_loss: 0.2404  avg_val_loss: 0.2637  time: 7s
Epoch 11 - Score: 0.2639


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2673(0.2637) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2940(0.2940) Grad: 0.8205  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1354(0.2373) Grad: 0.9907  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2904(0.2278) Grad: 2.1548  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2526(0.2526) 


Epoch 12 - avg_train_loss: 0.2278  avg_val_loss: 0.2604  time: 7s
Epoch 12 - Score: 0.2609


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2362(0.2604) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1967(0.1967) Grad: 1.8182  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2024(0.2062) Grad: 2.0783  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2243(0.2187) Grad: 2.9862  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2424(0.2424) 


Epoch 13 - avg_train_loss: 0.2187  avg_val_loss: 0.2567  time: 7s
Epoch 13 - Score: 0.2570
Epoch 13 - Save Best Score: 0.2570 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2617(0.2567) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2777(0.2777) Grad: 1.3863  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1746(0.2004) Grad: 0.7250  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1715(0.1867) Grad: 1.7235  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1931(0.1931) 


Epoch 14 - avg_train_loss: 0.1867  avg_val_loss: 0.2117  time: 7s
Epoch 14 - Score: 0.2123
Epoch 14 - Save Best Score: 0.2123 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2374(0.2117) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1901(0.1901) Grad: 1.7515  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1568(0.1547) Grad: 0.7315  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1732(0.1557) Grad: 1.1156  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1762(0.1762) 


Epoch 15 - avg_train_loss: 0.1557  avg_val_loss: 0.2071  time: 7s
Epoch 15 - Score: 0.2088
Epoch 15 - Save Best Score: 0.2088 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2395(0.2071) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1619(0.1619) Grad: 1.3715  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1752(0.1471) Grad: 0.9261  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1503(0.1438) Grad: 0.9319  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1875(0.1875) 


Epoch 16 - avg_train_loss: 0.1438  avg_val_loss: 0.2060  time: 7s
Epoch 16 - Score: 0.2070
Epoch 16 - Save Best Score: 0.2070 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2430(0.2060) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1303(0.1303) Grad: 0.9679  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1686(0.1571) Grad: 0.9550  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1492(0.1452) Grad: 1.5340  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1522(0.1522) 


Epoch 17 - avg_train_loss: 0.1452  avg_val_loss: 0.1917  time: 7s
Epoch 17 - Score: 0.1956
Epoch 17 - Save Best Score: 0.1956 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2604(0.1917) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1211(0.1211) Grad: 1.1861  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1154(0.1241) Grad: 2.9756  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1548(0.1373) Grad: 1.5203  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1708(0.1708) 


Epoch 18 - avg_train_loss: 0.1373  avg_val_loss: 0.2041  time: 7s
Epoch 18 - Score: 0.2061


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2302(0.2041) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1499(0.1499) Grad: 1.7965  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1836(0.1344) Grad: 1.0222  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1324(0.1436) Grad: 0.9150  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1948(0.1948) 


Epoch 19 - avg_train_loss: 0.1436  avg_val_loss: 0.2012  time: 7s
Epoch 19 - Score: 0.2015


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1873(0.2012) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1234(0.1234) Grad: 2.6333  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1348(0.1295) Grad: 1.7267  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1139(0.1314) Grad: 2.1048  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1623(0.1623) 


Epoch 20 - avg_train_loss: 0.1314  avg_val_loss: 0.2004  time: 7s
Epoch 20 - Score: 0.2029


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2245(0.2004) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1010(0.1010) Grad: 0.7666  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1554(0.1132) Grad: 2.2247  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0839(0.1129) Grad: 0.6626  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1508(0.1508) 


Epoch 21 - avg_train_loss: 0.1129  avg_val_loss: 0.1775  time: 7s
Epoch 21 - Score: 0.1790
Epoch 21 - Save Best Score: 0.1790 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1957(0.1775) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0984(0.0984) Grad: 2.5699  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0601(0.0936) Grad: 0.7893  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0929(0.0940) Grad: 0.9961  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1788(0.1788) 


Epoch 22 - avg_train_loss: 0.0940  avg_val_loss: 0.1942  time: 7s
Epoch 22 - Score: 0.1951


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2339(0.1942) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0782(0.0782) Grad: 1.1659  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0715(0.0902) Grad: 1.7663  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0996(0.0982) Grad: 0.9949  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1582(0.1582) 


Epoch 23 - avg_train_loss: 0.0982  avg_val_loss: 0.1821  time: 7s
Epoch 23 - Score: 0.1835


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2200(0.1821) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0712(0.0712) Grad: 1.0827  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1447(0.1122) Grad: 3.3147  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1104(0.1168) Grad: 0.8758  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2037(0.2037) 


Epoch 24 - avg_train_loss: 0.1168  avg_val_loss: 0.2069  time: 7s
Epoch 24 - Score: 0.2070


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1966(0.2069) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1429(0.1429) Grad: 3.6691  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1066(0.1383) Grad: 1.8468  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1351(0.1385) Grad: 1.1595  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1637(0.1637) 


Epoch 25 - avg_train_loss: 0.1385  avg_val_loss: 0.1794  time: 7s
Epoch 25 - Score: 0.1805


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2209(0.1794) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1195(0.1195) Grad: 1.2285  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1708(0.1263) Grad: 3.3538  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1538(0.1308) Grad: 2.9833  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1903(0.1903) 


Epoch 26 - avg_train_loss: 0.1308  avg_val_loss: 0.2088  time: 7s
Epoch 26 - Score: 0.2096


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2421(0.2088) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1231(0.1231) Grad: 1.2023  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1130(0.0982) Grad: 1.8387  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0643(0.0992) Grad: 1.0701  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1671(0.1671) 


Epoch 27 - avg_train_loss: 0.0992  avg_val_loss: 0.1865  time: 7s
Epoch 27 - Score: 0.1878


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2324(0.1865) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0943(0.0943) Grad: 1.5025  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0911(0.0803) Grad: 0.9853  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0572(0.0799) Grad: 0.8434  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1466(0.1466) 


Epoch 28 - avg_train_loss: 0.0799  avg_val_loss: 0.1684  time: 7s
Epoch 28 - Score: 0.1702
Epoch 28 - Save Best Score: 0.1702 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2188(0.1684) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0575(0.0575) Grad: 0.8291  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0714(0.0773) Grad: 0.7976  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0706(0.0771) Grad: 0.6624  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1385(0.1385) 


Epoch 29 - avg_train_loss: 0.0771  avg_val_loss: 0.1620  time: 7s
Epoch 29 - Score: 0.1632
Epoch 29 - Save Best Score: 0.1632 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1806(0.1620) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0793(0.0793) Grad: 1.1454  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0664(0.0802) Grad: 2.7935  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1393(0.0811) Grad: 0.8033  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1531(0.1531) 


Epoch 30 - avg_train_loss: 0.0811  avg_val_loss: 0.1776  time: 7s
Epoch 30 - Score: 0.1789


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1982(0.1776) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1202(0.1202) Grad: 2.7162  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1198(0.1021) Grad: 4.5581  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1182(0.1074) Grad: 3.3729  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1559(0.1559) 


Epoch 31 - avg_train_loss: 0.1074  avg_val_loss: 0.1877  time: 7s
Epoch 31 - Score: 0.1899


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2332(0.1877) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1093(0.1093) Grad: 2.0716  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1223(0.1105) Grad: 2.9362  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1248(0.1102) Grad: 2.6837  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1579(0.1579) 


Epoch 32 - avg_train_loss: 0.1102  avg_val_loss: 0.1838  time: 7s
Epoch 32 - Score: 0.1854


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2247(0.1838) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1322(0.1322) Grad: 1.8987  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0744(0.0939) Grad: 1.0176  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0714(0.0868) Grad: 2.4310  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1202(0.1202) 


Epoch 33 - avg_train_loss: 0.0868  avg_val_loss: 0.1576  time: 7s
Epoch 33 - Score: 0.1609
Epoch 33 - Save Best Score: 0.1609 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2001(0.1576) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0581(0.0581) Grad: 0.5606  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0709(0.0707) Grad: 1.7599  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 5s (remain 0m 0s) Loss: 0.0823(0.0712) Grad: 1.0464  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1385(0.1385) 


Epoch 34 - avg_train_loss: 0.0712  avg_val_loss: 0.1662  time: 7s
Epoch 34 - Score: 0.1687


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2200(0.1662) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0689(0.0689) Grad: 0.8086  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0735(0.0672) Grad: 1.6154  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0637(0.0672) Grad: 0.7010  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1189(0.1189) 


Epoch 35 - avg_train_loss: 0.0672  avg_val_loss: 0.1552  time: 7s
Epoch 35 - Score: 0.1585
Epoch 35 - Save Best Score: 0.1585 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2001(0.1552) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0562(0.0562) Grad: 1.6202  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0824(0.0675) Grad: 1.5106  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0700(0.0740) Grad: 2.4613  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1478(0.1478) 


Epoch 36 - avg_train_loss: 0.0740  avg_val_loss: 0.1677  time: 7s
Epoch 36 - Score: 0.1693


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2153(0.1677) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0717(0.0717) Grad: 0.8078  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0863(0.0977) Grad: 1.0102  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0935(0.0971) Grad: 1.7849  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1545(0.1545) 


Epoch 37 - avg_train_loss: 0.0971  avg_val_loss: 0.1837  time: 7s
Epoch 37 - Score: 0.1872


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2586(0.1837) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0770(0.0770) Grad: 1.1452  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0554(0.0806) Grad: 0.9109  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0805(0.0807) Grad: 2.1707  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1613(0.1613) 


Epoch 38 - avg_train_loss: 0.0807  avg_val_loss: 0.1770  time: 7s
Epoch 38 - Score: 0.1785


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2256(0.1770) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1400(0.1400) Grad: 2.9960  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0575(0.0752) Grad: 0.7054  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0672(0.0756) Grad: 4.1125  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1329(0.1329) 


Epoch 39 - avg_train_loss: 0.0756  avg_val_loss: 0.1593  time: 7s
Epoch 39 - Score: 0.1609


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1705(0.1593) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0731(0.0731) Grad: 4.2129  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0788(0.0750) Grad: 2.6096  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0637(0.0699) Grad: 1.6063  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1287(0.1287) 


Epoch 40 - avg_train_loss: 0.0699  avg_val_loss: 0.1534  time: 7s
Epoch 40 - Score: 0.1551
Epoch 40 - Save Best Score: 0.1551 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1891(0.1534) 


Score: 0.1551


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2513(3.2513) Grad: 3.2324  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.7845(1.6560) Grad: 4.2877  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6434(1.2403) Grad: 0.6896  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6557(0.6557) 


Epoch 1 - avg_train_loss: 1.2403  avg_val_loss: 0.6153  time: 7s
Epoch 1 - Score: 0.6166
Epoch 1 - Save Best Score: 0.6166 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5412(0.6153) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5798(0.5798) Grad: 0.3910  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6212(0.6207) Grad: 0.5956  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4692(0.5950) Grad: 2.1233  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6289(0.6289) 


Epoch 2 - avg_train_loss: 0.5950  avg_val_loss: 0.5542  time: 7s
Epoch 2 - Score: 0.5582
Epoch 2 - Save Best Score: 0.5582 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4600(0.5542) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4122(0.4122) Grad: 1.4001  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5865(0.5099) Grad: 0.7443  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.5787(0.5250) Grad: 0.9583  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5464(0.5464) 


Epoch 3 - avg_train_loss: 0.5250  avg_val_loss: 0.4626  time: 7s
Epoch 3 - Score: 0.4689
Epoch 3 - Save Best Score: 0.4689 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3430(0.4626) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5029(0.5029) Grad: 2.0158  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5133(0.4858) Grad: 1.1582  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3666(0.4757) Grad: 1.8028  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5430(0.5430) 


Epoch 4 - avg_train_loss: 0.4757  avg_val_loss: 0.4573  time: 7s
Epoch 4 - Score: 0.4641
Epoch 4 - Save Best Score: 0.4641 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3313(0.4573) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4451(0.4451) Grad: 1.6997  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4618(0.4142) Grad: 1.2032  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4189(0.4455) Grad: 1.5217  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5956(0.5956) 


Epoch 5 - avg_train_loss: 0.4455  avg_val_loss: 0.5020  time: 7s
Epoch 5 - Score: 0.5085


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3986(0.5020) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5161(0.5161) Grad: 1.9018  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4197(0.4178) Grad: 0.7025  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3957(0.4060) Grad: 1.7793  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3671(0.3671) 


Epoch 6 - avg_train_loss: 0.4060  avg_val_loss: 0.3412  time: 7s
Epoch 6 - Score: 0.3424
Epoch 6 - Save Best Score: 0.3424 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2863(0.3412) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2249(0.2249) Grad: 1.1155  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3763(0.3657) Grad: 2.9934  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3710(0.3628) Grad: 1.9767  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4456(0.4456) 


Epoch 7 - avg_train_loss: 0.3628  avg_val_loss: 0.4112  time: 7s
Epoch 7 - Score: 0.4123


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3661(0.4112) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3979(0.3979) Grad: 1.6037  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3514(0.3155) Grad: 2.6431  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3622(0.3063) Grad: 0.9227  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3650(0.3650) 


Epoch 8 - avg_train_loss: 0.3063  avg_val_loss: 0.3287  time: 7s
Epoch 8 - Score: 0.3302
Epoch 8 - Save Best Score: 0.3302 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3133(0.3287) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2963(0.2963) Grad: 0.4512  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2349(0.2857) Grad: 0.6030  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2243(0.2686) Grad: 1.3635  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3267(0.3267) 


Epoch 9 - avg_train_loss: 0.2686  avg_val_loss: 0.3091  time: 7s
Epoch 9 - Score: 0.3103
Epoch 9 - Save Best Score: 0.3103 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3446(0.3091) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2580(0.2580) Grad: 1.2372  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2012(0.2336) Grad: 1.5616  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2340(0.2345) Grad: 0.4673  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3216(0.3216) 


Epoch 10 - avg_train_loss: 0.2345  avg_val_loss: 0.3061  time: 7s
Epoch 10 - Score: 0.3068
Epoch 10 - Save Best Score: 0.3068 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3279(0.3061) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2361(0.2361) Grad: 1.7034  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2814(0.2501) Grad: 2.9992  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2071(0.2327) Grad: 1.6132  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3036(0.3036) 


Epoch 11 - avg_train_loss: 0.2327  avg_val_loss: 0.2983  time: 7s
Epoch 11 - Score: 0.2990
Epoch 11 - Save Best Score: 0.2990 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3344(0.2983) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2689(0.2689) Grad: 2.9528  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2124(0.2339) Grad: 2.9009  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1882(0.2279) Grad: 4.5179  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3387(0.3387) 


Epoch 12 - avg_train_loss: 0.2279  avg_val_loss: 0.3449  time: 7s
Epoch 12 - Score: 0.3450


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3546(0.3449) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2845(0.2845) Grad: 4.8199  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2140(0.2506) Grad: 2.9692  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1982(0.2335) Grad: 1.1669  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3058(0.3058) 


Epoch 13 - avg_train_loss: 0.2335  avg_val_loss: 0.2861  time: 7s
Epoch 13 - Score: 0.2867
Epoch 13 - Save Best Score: 0.2867 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2518(0.2861) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2172(0.2172) Grad: 1.2723  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1761(0.1830) Grad: 4.2335  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2332(0.1862) Grad: 1.3017  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3465(0.3465) 


Epoch 14 - avg_train_loss: 0.1862  avg_val_loss: 0.3195  time: 7s
Epoch 14 - Score: 0.3207


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3326(0.3195) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2337(0.2337) Grad: 1.3684  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2095(0.1641) Grad: 1.3444  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1716(0.1572) Grad: 0.8130  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3395(0.3395) 


Epoch 15 - avg_train_loss: 0.1572  avg_val_loss: 0.2967  time: 7s
Epoch 15 - Score: 0.2993


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2976(0.2967) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1823(0.1823) Grad: 0.8134  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1569(0.1537) Grad: 0.8018  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1259(0.1459) Grad: 0.6873  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3007(0.3007) 


Epoch 16 - avg_train_loss: 0.1459  avg_val_loss: 0.2724  time: 7s
Epoch 16 - Score: 0.2745
Epoch 16 - Save Best Score: 0.2745 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3004(0.2724) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1135(0.1135) Grad: 0.7288  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1884(0.1716) Grad: 1.5071  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0850(0.1538) Grad: 2.6191  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3138(0.3138) 


Epoch 17 - avg_train_loss: 0.1538  avg_val_loss: 0.2766  time: 7s
Epoch 17 - Score: 0.2787


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2789(0.2766) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1215(0.1215) Grad: 3.2550  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1483(0.1508) Grad: 0.6034  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1719(0.1603) Grad: 1.1270  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2722(0.2722) 


Epoch 18 - avg_train_loss: 0.1603  avg_val_loss: 0.2667  time: 7s
Epoch 18 - Score: 0.2669
Epoch 18 - Save Best Score: 0.2669 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2815(0.2667) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1959(0.1959) Grad: 2.3249  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1852(0.2075) Grad: 2.0920  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2611(0.2013) Grad: 4.6080  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3186(0.3186) 


Epoch 19 - avg_train_loss: 0.2013  avg_val_loss: 0.3088  time: 7s
Epoch 19 - Score: 0.3091


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3267(0.3088) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1710(0.1710) Grad: 4.5266  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1854(0.1981) Grad: 0.8099  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2296(0.1814) Grad: 2.6646  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2673(0.2673) 


Epoch 20 - avg_train_loss: 0.1814  avg_val_loss: 0.2425  time: 7s
Epoch 20 - Score: 0.2446
Epoch 20 - Save Best Score: 0.2446 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2756(0.2425) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0853(0.0853) Grad: 0.8330  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1735(0.1297) Grad: 0.7934  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0762(0.1275) Grad: 0.7734  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2704(0.2704) 


Epoch 21 - avg_train_loss: 0.1275  avg_val_loss: 0.2454  time: 7s
Epoch 21 - Score: 0.2466


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2515(0.2454) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1050(0.1050) Grad: 1.3376  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0851(0.1091) Grad: 0.8065  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1379(0.1068) Grad: 0.7872  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2583(0.2583) 


Epoch 22 - avg_train_loss: 0.1068  avg_val_loss: 0.2389  time: 7s
Epoch 22 - Score: 0.2401
Epoch 22 - Save Best Score: 0.2401 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2614(0.2389) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0964(0.0964) Grad: 1.2628  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0893(0.1038) Grad: 0.7203  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1072(0.1140) Grad: 4.2805  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2610(0.2610) 


Epoch 23 - avg_train_loss: 0.1140  avg_val_loss: 0.2441  time: 7s
Epoch 23 - Score: 0.2468


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3009(0.2441) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0929(0.0929) Grad: 1.4153  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1001(0.1236) Grad: 2.9581  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1184(0.1254) Grad: 2.3264  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3061(0.3061) 


Epoch 24 - avg_train_loss: 0.1254  avg_val_loss: 0.2559  time: 7s
Epoch 24 - Score: 0.2606


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2719(0.2559) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0997(0.0997) Grad: 2.9249  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0989(0.1272) Grad: 3.4240  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1550(0.1275) Grad: 4.1308  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3190(0.3190) 


Epoch 25 - avg_train_loss: 0.1275  avg_val_loss: 0.2940  time: 7s
Epoch 25 - Score: 0.2959


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3287(0.2940) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1556(0.1556) Grad: 2.8508  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1304(0.1323) Grad: 1.8859  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1505(0.1245) Grad: 1.2246  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3138(0.3138) 


Epoch 26 - avg_train_loss: 0.1245  avg_val_loss: 0.2619  time: 7s
Epoch 26 - Score: 0.2667


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2768(0.2619) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1059(0.1059) Grad: 3.8171  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1121(0.1013) Grad: 0.7029  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1067(0.0962) Grad: 2.2288  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2652(0.2652) 


Epoch 27 - avg_train_loss: 0.0962  avg_val_loss: 0.2429  time: 7s
Epoch 27 - Score: 0.2439


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2512(0.2429) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0786(0.0786) Grad: 1.3734  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0783(0.0836) Grad: 0.7797  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1109(0.0842) Grad: 1.6559  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2665(0.2665) 


Epoch 28 - avg_train_loss: 0.0842  avg_val_loss: 0.2440  time: 7s
Epoch 28 - Score: 0.2448


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2373(0.2440) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0847(0.0847) Grad: 0.4439  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0981(0.0760) Grad: 1.8517  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0580(0.0758) Grad: 0.5599  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2649(0.2649) 


Epoch 29 - avg_train_loss: 0.0758  avg_val_loss: 0.2274  time: 7s
Epoch 29 - Score: 0.2309
Epoch 29 - Save Best Score: 0.2309 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2520(0.2274) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0846(0.0846) Grad: 0.7971  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0811(0.0854) Grad: 2.1857  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0797(0.0822) Grad: 0.9654  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2918(0.2918) 


Epoch 30 - avg_train_loss: 0.0822  avg_val_loss: 0.2508  time: 7s
Epoch 30 - Score: 0.2553


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2886(0.2508) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0638(0.0638) Grad: 1.0600  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1112(0.0962) Grad: 1.3405  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0735(0.0939) Grad: 1.5758  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2860(0.2860) 


Epoch 31 - avg_train_loss: 0.0939  avg_val_loss: 0.2503  time: 7s
Epoch 31 - Score: 0.2523


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2473(0.2503) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1120(0.1120) Grad: 3.5987  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0717(0.0878) Grad: 1.1395  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0794(0.0885) Grad: 1.0889  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2838(0.2838) 


Epoch 32 - avg_train_loss: 0.0885  avg_val_loss: 0.2488  time: 7s
Epoch 32 - Score: 0.2532


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2985(0.2488) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0980(0.0980) Grad: 2.9301  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0568(0.0789) Grad: 1.2236  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0708(0.0752) Grad: 1.1055  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2777(0.2777) 


Epoch 33 - avg_train_loss: 0.0752  avg_val_loss: 0.2349  time: 7s
Epoch 33 - Score: 0.2384


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2433(0.2349) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0703(0.0703) Grad: 0.7932  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0656(0.0620) Grad: 0.9929  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0438(0.0625) Grad: 0.4288  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2804(0.2804) 


Epoch 34 - avg_train_loss: 0.0625  avg_val_loss: 0.2417  time: 7s
Epoch 34 - Score: 0.2450


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2614(0.2417) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0664(0.0664) Grad: 1.5806  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0534(0.0652) Grad: 1.0749  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0517(0.0641) Grad: 1.9271  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2813(0.2813) 


Epoch 35 - avg_train_loss: 0.0641  avg_val_loss: 0.2412  time: 7s
Epoch 35 - Score: 0.2439


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2390(0.2412) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0630(0.0630) Grad: 1.2012  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1022(0.0666) Grad: 2.5440  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0573(0.0677) Grad: 0.7748  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2851(0.2851) 


Epoch 36 - avg_train_loss: 0.0677  avg_val_loss: 0.2497  time: 7s
Epoch 36 - Score: 0.2520


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2564(0.2497) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0855(0.0855) Grad: 1.5552  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0973(0.0826) Grad: 1.3164  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1765(0.0995) Grad: 3.6365  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3057(0.3057) 


Epoch 37 - avg_train_loss: 0.0995  avg_val_loss: 0.2604  time: 7s
Epoch 37 - Score: 0.2647


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2863(0.2604) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1069(0.1069) Grad: 3.0251  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0961(0.0969) Grad: 1.6364  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0895(0.0956) Grad: 3.6357  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2865(0.2865) 


Epoch 38 - avg_train_loss: 0.0956  avg_val_loss: 0.2430  time: 7s
Epoch 38 - Score: 0.2474


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2720(0.2430) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0985(0.0985) Grad: 2.1592  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0949(0.0767) Grad: 3.2013  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0537(0.0752) Grad: 0.9967  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2770(0.2770) 


Epoch 39 - avg_train_loss: 0.0752  avg_val_loss: 0.2446  time: 7s
Epoch 39 - Score: 0.2472


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2683(0.2446) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0610(0.0610) Grad: 0.8313  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0516(0.0612) Grad: 1.3010  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0664(0.0583) Grad: 0.5129  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2595(0.2595) 


Epoch 40 - avg_train_loss: 0.0583  avg_val_loss: 0.2374  time: 7s
Epoch 40 - Score: 0.2395


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2741(0.2374) 


Score: 0.2309


Epoch: [1][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 3.2829(3.2829) Grad: 3.3601  LR: 0.000100  
Epoch: [1][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6478(1.5814) Grad: 3.0956  LR: 0.000100  
Epoch: [1][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6450(1.1894) Grad: 2.8348  LR: 0.000100  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.6819(0.6819) 


Epoch 1 - avg_train_loss: 1.1894  avg_val_loss: 0.6382  time: 7s
Epoch 1 - Score: 0.6392
Epoch 1 - Save Best Score: 0.6392 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5969(0.6382) 
Epoch: [2][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5730(0.5730) Grad: 1.6952  LR: 0.000057  
Epoch: [2][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.6081(0.5682) Grad: 1.5614  LR: 0.000057  
Epoch: [2][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4926(0.5724) Grad: 2.5385  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5658(0.5658) 


Epoch 2 - avg_train_loss: 0.5724  avg_val_loss: 0.5257  time: 7s
Epoch 2 - Score: 0.5273
Epoch 2 - Save Best Score: 0.5273 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4476(0.5257) 
Epoch: [3][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5089(0.5089) Grad: 1.3924  LR: 0.000009  
Epoch: [3][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5133(0.4987) Grad: 0.5759  LR: 0.000009  
Epoch: [3][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3459(0.4921) Grad: 0.7699  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5139(0.5139) 


Epoch 3 - avg_train_loss: 0.4921  avg_val_loss: 0.4739  time: 7s
Epoch 3 - Score: 0.4765
Epoch 3 - Save Best Score: 0.4765 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3704(0.4739) 
Epoch: [4][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5633(0.5633) Grad: 1.5503  LR: 0.000001  
Epoch: [4][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.5070(0.4823) Grad: 0.7753  LR: 0.000001  
Epoch: [4][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4727(0.4538) Grad: 1.0401  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.5084(0.5084) 


Epoch 4 - avg_train_loss: 0.4538  avg_val_loss: 0.4673  time: 7s
Epoch 4 - Score: 0.4697
Epoch 4 - Save Best Score: 0.4697 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3698(0.4673) 
Epoch: [5][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5053(0.5053) Grad: 1.9520  LR: 0.000050  
Epoch: [5][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.4727(0.4436) Grad: 1.3977  LR: 0.000050  
Epoch: [5][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4420(0.4220) Grad: 1.1022  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4724(0.4724) 


Epoch 5 - avg_train_loss: 0.4220  avg_val_loss: 0.4251  time: 7s
Epoch 5 - Score: 0.4278
Epoch 5 - Save Best Score: 0.4278 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3353(0.4251) 
Epoch: [6][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3477(0.3477) Grad: 2.2232  LR: 0.000224  
Epoch: [6][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2268(0.3761) Grad: 1.0934  LR: 0.000224  
Epoch: [6][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.3766(0.3760) Grad: 2.4776  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.4450(0.4450) 


Epoch 6 - avg_train_loss: 0.3760  avg_val_loss: 0.3964  time: 7s
Epoch 6 - Score: 0.3990
Epoch 6 - Save Best Score: 0.3990 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3219(0.3964) 
Epoch: [7][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.3501(0.3501) Grad: 1.9510  LR: 0.000133  
Epoch: [7][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.3045(0.3250) Grad: 2.8659  LR: 0.000133  
Epoch: [7][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.4065(0.3225) Grad: 3.6743  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3614(0.3614) 


Epoch 7 - avg_train_loss: 0.3225  avg_val_loss: 0.3259  time: 7s
Epoch 7 - Score: 0.3296
Epoch 7 - Save Best Score: 0.3296 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2209(0.3259) 
Epoch: [8][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2572(0.2572) Grad: 0.6466  LR: 0.000057  
Epoch: [8][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2103(0.2676) Grad: 2.1293  LR: 0.000057  
Epoch: [8][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1759(0.2699) Grad: 1.6577  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3436(0.3436) 


Epoch 8 - avg_train_loss: 0.2699  avg_val_loss: 0.2965  time: 7s
Epoch 8 - Score: 0.3004
Epoch 8 - Save Best Score: 0.3004 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2057(0.2965) 
Epoch: [9][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1966(0.1966) Grad: 1.0787  LR: 0.000009  
Epoch: [9][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2243(0.2229) Grad: 1.3566  LR: 0.000009  
Epoch: [9][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2901(0.2336) Grad: 1.8726  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3134(0.3134) 


Epoch 9 - avg_train_loss: 0.2336  avg_val_loss: 0.2793  time: 7s
Epoch 9 - Score: 0.2827
Epoch 9 - Save Best Score: 0.2827 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1863(0.2793) 
Epoch: [10][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2321(0.2321) Grad: 1.5069  LR: 0.000001  
Epoch: [10][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.2429(0.2100) Grad: 0.7996  LR: 0.000001  
Epoch: [10][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2512(0.2080) Grad: 1.6305  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3206(0.3206) 


Epoch 10 - avg_train_loss: 0.2080  avg_val_loss: 0.2849  time: 7s
Epoch 10 - Score: 0.2871


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2221(0.2849) 
Epoch: [11][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1903(0.1903) Grad: 1.5992  LR: 0.000050  
Epoch: [11][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1511(0.2153) Grad: 1.6120  LR: 0.000050  
Epoch: [11][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2224(0.2261) Grad: 3.7849  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3177(0.3177) 


Epoch 11 - avg_train_loss: 0.2261  avg_val_loss: 0.2844  time: 7s
Epoch 11 - Score: 0.2886


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1783(0.2844) 
Epoch: [12][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.2356(0.2356) Grad: 1.6627  LR: 0.000224  
Epoch: [12][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1878(0.1856) Grad: 0.9421  LR: 0.000224  
Epoch: [12][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2393(0.2025) Grad: 2.4023  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2717(0.2717) 


Epoch 12 - avg_train_loss: 0.2025  avg_val_loss: 0.2619  time: 7s
Epoch 12 - Score: 0.2635
Epoch 12 - Save Best Score: 0.2635 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1976(0.2619) 
Epoch: [13][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2020(0.2020) Grad: 0.8879  LR: 0.000133  
Epoch: [13][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1506(0.1878) Grad: 2.5725  LR: 0.000133  
Epoch: [13][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2123(0.1900) Grad: 3.1337  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2897(0.2897) 


Epoch 13 - avg_train_loss: 0.1900  avg_val_loss: 0.2662  time: 7s
Epoch 13 - Score: 0.2680


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2012(0.2662) 
Epoch: [14][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1668(0.1668) Grad: 1.2186  LR: 0.000057  
Epoch: [14][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1371(0.1616) Grad: 0.6689  LR: 0.000057  
Epoch: [14][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1848(0.1639) Grad: 0.9304  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.3030(0.3030) 


Epoch 14 - avg_train_loss: 0.1639  avg_val_loss: 0.2664  time: 7s
Epoch 14 - Score: 0.2683


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2258(0.2664) 
Epoch: [15][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0984(0.0984) Grad: 2.7180  LR: 0.000009  
Epoch: [15][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1525(0.1426) Grad: 0.8355  LR: 0.000009  
Epoch: [15][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.2031(0.1387) Grad: 2.0819  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2520(0.2520) 


Epoch 15 - avg_train_loss: 0.1387  avg_val_loss: 0.2325  time: 7s
Epoch 15 - Score: 0.2345
Epoch 15 - Save Best Score: 0.2345 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1661(0.2325) 
Epoch: [16][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1516(0.1516) Grad: 0.7567  LR: 0.000001  
Epoch: [16][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1342(0.1378) Grad: 1.3075  LR: 0.000001  
Epoch: [16][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0977(0.1252) Grad: 1.5294  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2576(0.2576) 


Epoch 16 - avg_train_loss: 0.1252  avg_val_loss: 0.2310  time: 7s
Epoch 16 - Score: 0.2327
Epoch 16 - Save Best Score: 0.2327 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1773(0.2310) 
Epoch: [17][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1271(0.1271) Grad: 1.9664  LR: 0.000050  
Epoch: [17][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0952(0.1205) Grad: 2.6043  LR: 0.000050  
Epoch: [17][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1982(0.1318) Grad: 1.1370  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2364(0.2364) 


Epoch 17 - avg_train_loss: 0.1318  avg_val_loss: 0.2209  time: 7s
Epoch 17 - Score: 0.2223
Epoch 17 - Save Best Score: 0.2223 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1668(0.2209) 
Epoch: [18][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0977(0.0977) Grad: 2.2336  LR: 0.000224  
Epoch: [18][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0957(0.1323) Grad: 2.8150  LR: 0.000224  
Epoch: [18][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1755(0.1341) Grad: 1.3796  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2722(0.2722) 


Epoch 18 - avg_train_loss: 0.1341  avg_val_loss: 0.2464  time: 7s
Epoch 18 - Score: 0.2487


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1750(0.2464) 
Epoch: [19][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1886(0.1886) Grad: 1.5079  LR: 0.000133  
Epoch: [19][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1177(0.1578) Grad: 0.6636  LR: 0.000133  
Epoch: [19][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1751(0.1458) Grad: 2.3716  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2707(0.2707) 


Epoch 19 - avg_train_loss: 0.1458  avg_val_loss: 0.2433  time: 7s
Epoch 19 - Score: 0.2447


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1963(0.2433) 
Epoch: [20][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1439(0.1439) Grad: 4.2157  LR: 0.000057  
Epoch: [20][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1924(0.1517) Grad: 5.1376  LR: 0.000057  
Epoch: [20][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1269(0.1503) Grad: 0.5117  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2534(0.2534) 


Epoch 20 - avg_train_loss: 0.1503  avg_val_loss: 0.2194  time: 7s
Epoch 20 - Score: 0.2223


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1505(0.2194) 
Epoch: [21][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1501(0.1501) Grad: 3.5636  LR: 0.000009  
Epoch: [21][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1514(0.1259) Grad: 1.7489  LR: 0.000009  
Epoch: [21][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1783(0.1233) Grad: 0.9217  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2291(0.2291) 


Epoch 21 - avg_train_loss: 0.1233  avg_val_loss: 0.2113  time: 7s
Epoch 21 - Score: 0.2129
Epoch 21 - Save Best Score: 0.2129 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1550(0.2113) 
Epoch: [22][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1134(0.1134) Grad: 1.1180  LR: 0.000001  
Epoch: [22][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1685(0.1013) Grad: 0.7021  LR: 0.000001  
Epoch: [22][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0830(0.0962) Grad: 0.7798  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2385(0.2385) 


Epoch 22 - avg_train_loss: 0.0962  avg_val_loss: 0.2128  time: 7s
Epoch 22 - Score: 0.2148


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1533(0.2128) 
Epoch: [23][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0911(0.0911) Grad: 1.5959  LR: 0.000050  
Epoch: [23][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0875(0.0952) Grad: 1.3151  LR: 0.000050  
Epoch: [23][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1002(0.1020) Grad: 0.5421  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2504(0.2504) 


Epoch 23 - avg_train_loss: 0.1020  avg_val_loss: 0.2030  time: 7s
Epoch 23 - Score: 0.2075
Epoch 23 - Save Best Score: 0.2075 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1374(0.2030) 
Epoch: [24][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1068(0.1068) Grad: 2.0452  LR: 0.000224  
Epoch: [24][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1350(0.1193) Grad: 3.2550  LR: 0.000224  
Epoch: [24][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1399(0.1160) Grad: 1.6159  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2740(0.2740) 


Epoch 24 - avg_train_loss: 0.1160  avg_val_loss: 0.2293  time: 7s
Epoch 24 - Score: 0.2327


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1746(0.2293) 
Epoch: [25][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1522(0.1522) Grad: 2.5967  LR: 0.000133  
Epoch: [25][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0949(0.1392) Grad: 2.7783  LR: 0.000133  
Epoch: [25][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1457(0.1367) Grad: 1.7151  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2439(0.2439) 


Epoch 25 - avg_train_loss: 0.1367  avg_val_loss: 0.2174  time: 7s
Epoch 25 - Score: 0.2200


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1471(0.2174) 
Epoch: [26][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0986(0.0986) Grad: 1.5716  LR: 0.000057  
Epoch: [26][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1592(0.1336) Grad: 4.4955  LR: 0.000057  
Epoch: [26][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1369(0.1367) Grad: 1.5306  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2838(0.2838) 


Epoch 26 - avg_train_loss: 0.1367  avg_val_loss: 0.2352  time: 7s
Epoch 26 - Score: 0.2392


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1729(0.2352) 
Epoch: [27][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0919(0.0919) Grad: 3.0056  LR: 0.000009  
Epoch: [27][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0674(0.0971) Grad: 0.7100  LR: 0.000009  
Epoch: [27][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0687(0.0935) Grad: 1.1123  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2409(0.2409) 


Epoch 27 - avg_train_loss: 0.0935  avg_val_loss: 0.2074  time: 7s
Epoch 27 - Score: 0.2098


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1567(0.2074) 
Epoch: [28][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0616(0.0616) Grad: 1.4225  LR: 0.000001  
Epoch: [28][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0713(0.0773) Grad: 1.5911  LR: 0.000001  
Epoch: [28][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0854(0.0775) Grad: 1.1540  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2500(0.2500) 


Epoch 28 - avg_train_loss: 0.0775  avg_val_loss: 0.2034  time: 7s
Epoch 28 - Score: 0.2075
Epoch 28 - Save Best Score: 0.2075 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1485(0.2034) 
Epoch: [29][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0463(0.0463) Grad: 1.5861  LR: 0.000050  
Epoch: [29][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0610(0.0733) Grad: 0.5214  LR: 0.000050  
Epoch: [29][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0962(0.0743) Grad: 0.7777  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2375(0.2375) 


Epoch 29 - avg_train_loss: 0.0743  avg_val_loss: 0.2013  time: 7s
Epoch 29 - Score: 0.2044
Epoch 29 - Save Best Score: 0.2044 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1372(0.2013) 
Epoch: [30][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0611(0.0611) Grad: 1.6671  LR: 0.000224  
Epoch: [30][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1086(0.0784) Grad: 1.9200  LR: 0.000224  
Epoch: [30][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0855(0.0832) Grad: 2.4966  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2302(0.2302) 


Epoch 30 - avg_train_loss: 0.0832  avg_val_loss: 0.1948  time: 7s
Epoch 30 - Score: 0.1975
Epoch 30 - Save Best Score: 0.1975 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1421(0.1948) 
Epoch: [31][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0932(0.0932) Grad: 1.3564  LR: 0.000133  
Epoch: [31][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0957(0.0873) Grad: 0.9745  LR: 0.000133  
Epoch: [31][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0728(0.0963) Grad: 0.5214  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2506(0.2506) 


Epoch 31 - avg_train_loss: 0.0963  avg_val_loss: 0.2188  time: 7s
Epoch 31 - Score: 0.2213


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1560(0.2188) 
Epoch: [32][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1080(0.1080) Grad: 1.2791  LR: 0.000057  
Epoch: [32][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0750(0.0940) Grad: 0.7802  LR: 0.000057  
Epoch: [32][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0807(0.0955) Grad: 3.0376  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2358(0.2358) 


Epoch 32 - avg_train_loss: 0.0955  avg_val_loss: 0.2210  time: 7s
Epoch 32 - Score: 0.2232


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1510(0.2210) 
Epoch: [33][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1168(0.1168) Grad: 3.0258  LR: 0.000009  
Epoch: [33][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0532(0.0768) Grad: 0.4803  LR: 0.000009  
Epoch: [33][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0952(0.0787) Grad: 1.7936  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2297(0.2297) 


Epoch 33 - avg_train_loss: 0.0787  avg_val_loss: 0.1985  time: 7s
Epoch 33 - Score: 0.2012


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1364(0.1985) 
Epoch: [34][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0431(0.0431) Grad: 0.8726  LR: 0.000001  
Epoch: [34][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0625(0.0614) Grad: 0.5874  LR: 0.000001  
Epoch: [34][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0603(0.0644) Grad: 2.3775  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2218(0.2218) 


Epoch 34 - avg_train_loss: 0.0644  avg_val_loss: 0.1934  time: 7s
Epoch 34 - Score: 0.1954
Epoch 34 - Save Best Score: 0.1954 Model


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1422(0.1934) 
Epoch: [35][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0397(0.0397) Grad: 1.6940  LR: 0.000050  
Epoch: [35][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0618(0.0701) Grad: 0.8143  LR: 0.000050  
Epoch: [35][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0992(0.0691) Grad: 0.9961  LR: 0.000050  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2446(0.2446) 


Epoch 35 - avg_train_loss: 0.0691  avg_val_loss: 0.2049  time: 7s
Epoch 35 - Score: 0.2087


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1339(0.2049) 
Epoch: [36][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0658(0.0658) Grad: 0.5185  LR: 0.000224  
Epoch: [36][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0682(0.0664) Grad: 1.1537  LR: 0.000224  
Epoch: [36][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.1179(0.0697) Grad: 1.7453  LR: 0.000224  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2203(0.2203) 


Epoch 36 - avg_train_loss: 0.0697  avg_val_loss: 0.2134  time: 7s
Epoch 36 - Score: 0.2143


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1697(0.2134) 
Epoch: [37][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0953(0.0953) Grad: 3.6621  LR: 0.000133  
Epoch: [37][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1135(0.0943) Grad: 4.6346  LR: 0.000133  
Epoch: [37][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0903(0.0945) Grad: 3.6026  LR: 0.000133  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2489(0.2489) 


Epoch 37 - avg_train_loss: 0.0945  avg_val_loss: 0.2053  time: 7s
Epoch 37 - Score: 0.2098


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1267(0.2053) 
Epoch: [38][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0858(0.0858) Grad: 4.3381  LR: 0.000057  
Epoch: [38][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.1121(0.0931) Grad: 4.0848  LR: 0.000057  
Epoch: [38][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0749(0.0907) Grad: 3.4491  LR: 0.000057  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2136(0.2136) 


Epoch 38 - avg_train_loss: 0.0907  avg_val_loss: 0.1980  time: 7s
Epoch 38 - Score: 0.1993


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1496(0.1980) 
Epoch: [39][0/19] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0615(0.0615) Grad: 1.5461  LR: 0.000009  
Epoch: [39][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0631(0.0689) Grad: 1.5760  LR: 0.000009  
Epoch: [39][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0812(0.0718) Grad: 0.9409  LR: 0.000009  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2319(0.2319) 


Epoch 39 - avg_train_loss: 0.0718  avg_val_loss: 0.1991  time: 7s
Epoch 39 - Score: 0.2017


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1427(0.1991) 
Epoch: [40][0/19] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0573(0.0573) Grad: 3.1569  LR: 0.000001  
Epoch: [40][10/19] Elapsed 0m 3s (remain 0m 2s) Loss: 0.0602(0.0634) Grad: 3.1569  LR: 0.000001  
Epoch: [40][18/19] Elapsed 0m 6s (remain 0m 0s) Loss: 0.0520(0.0613) Grad: 0.8372  LR: 0.000001  
EVAL: [0/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.2376(0.2376) 


Epoch 40 - avg_train_loss: 0.0613  avg_val_loss: 0.2002  time: 7s
Epoch 40 - Score: 0.2042


EVAL: [2/3] Elapsed 0m 0s (remain 0m 0s) Loss: 0.1233(0.2002) 


Score: 0.1954
Score: 0.1982
