In [1]:
import json
import multiprocessing
import cv2
import albumentations as A
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast, GradScaler
import warnings
import pandas as pd
import time
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import argparse
import importlib
import torch
import torch.nn as nn
from torch.optim import Adam, SGD, AdamW

import datetime
import math
import random

from timm.utils.model_ema import ModelEmaV2

# Config

In [2]:
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2


class CFG:
    # ============== comp exp name =============
    comp_name = 'atmacup_18'  # comp名

    comp_dataset_path = '../raw/atmacup_18_dataset/'

    exp_name = 'atmacup_18_cnn_swin_small_3'

    is_debug = False
    use_gray_scale = False

    model_in_chans = 9  # モデルの入力チャンネル数

    # ============== file path =============
    train_fold_dir = "../proc/baseline/folds"

    # ============== model cfg =============
    model_name = "swin_small_patch4_window7_224"

    num_frames = 3  # model_in_chansの倍数
    norm_in_chans = 1 if use_gray_scale else 3

    use_torch_compile = False
    use_ema = True
    ema_decay = 0.995
    # ============== training cfg =============
    size = 224  # 224

    batch_size = 64  # 32

    use_amp = True

    scheduler = 'GradualWarmupSchedulerV2'
    # scheduler = 'CosineAnnealingLR'
    epochs = 80
    if is_debug:
        epochs = 2

    # adamW warmupあり
    warmup_factor = 10
    lr = 1e-4
    if scheduler == 'GradualWarmupSchedulerV2':
        lr /= warmup_factor

    # ============== fold =============
    n_fold = 5
    use_holdout = False
    use_alldata = False
    train_folds = [0, 1, 2, 3, 4]

    skf_col = 'class'
    group_col = 'scene'
    fold_type = 'gkf'

    objective_cv = 'regression'  # 'binary', 'multiclass', 'regression'
    metric_direction = 'minimize'  # 'maximize', 'minimize'
    metrics = 'calc_mae_atmacup'

    # ============== pred target =============
    target_size = 18
    target_col = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2',
                  'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']


    # ============== ほぼ固定 =============
    pretrained = True
    inf_weight = 'last'  # 'best'

    min_lr = 5e-8
    weight_decay = 1e-5
    max_grad_norm = 1000

    print_freq = 500
    num_workers = 4

    seed = 42

    # ============== set dataset path =============
    if exp_name is not None:
        print('set dataset path')

        outputs_path = f'../proc/baseline/outputs/{exp_name}/'

        submission_dir = outputs_path + 'submissions/'
        submission_path = submission_dir + f'submission_{exp_name}.csv'

        model_dir = outputs_path + \
            f'{comp_name}-models/'

        figures_dir = outputs_path + 'figures/'

        log_dir = outputs_path + 'logs/'
        log_path = log_dir + f'{exp_name}.txt'

    # ============== augmentation =============
    train_aug_list = [
        # A.RandomResizedCrop(
        #     size, size, scale=(0.85, 1.0)),
        A.Resize(size, size),
        A.Downscale(p=0.25),
        # A.HorizontalFlip(p=0.5),
        # A.VerticalFlip(p=0.5),
        # A.RandomRotate90(p=0.5),
        # A.RandomBrightnessContrast(p=0.5),
        # A.ShiftScaleRotate(p=0.5),
        A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                ], p=0.4),
        # A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        # A.CoarseDropout(max_holes=1, max_height=int(
        #     size * 0.3), max_width=int(size * 0.3), p=0.5),
        A.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
        A.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),

        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2(),
    ]

    valid_aug_list = [
        A.Resize(size, size),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2(),
    ]

set dataset path


In [3]:
print("CUDA が利用可能か:", torch.cuda.is_available())
print("利用可能な CUDA デバイス数:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("現在の CUDA デバイス:", torch.cuda.get_device_name(0))

CUDA が利用可能か: True
利用可能な CUDA デバイス数: 1
現在の CUDA デバイス: NVIDIA GeForce RTX 3090


# make fold

In [4]:
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold, StratifiedGroupKFold
# from iterstrat.ml_stratifiers import MultilabelStratifiedKFold


def get_fold(train, cfg):
    if cfg.fold_type == 'kf':
        Fold = KFold(n_splits=cfg.n_fold,
                     shuffle=True, random_state=cfg.seed)
        kf = Fold.split(train, train[cfg.target_col])
    elif cfg.fold_type == 'skf':
        Fold = StratifiedKFold(n_splits=cfg.n_fold,
                               shuffle=True, random_state=cfg.seed)
        kf = Fold.split(train, train[cfg.skf_col])
    elif cfg.fold_type == 'gkf':
        Fold = GroupKFold(n_splits=cfg.n_fold)
        groups = train[cfg.group_col].values
        kf = Fold.split(train, train[cfg.group_col], groups)
    elif cfg.fold_type == 'sgkf':
        Fold = StratifiedGroupKFold(n_splits=cfg.n_fold,
                                    shuffle=True, random_state=cfg.seed)
        groups = train[cfg.group_col].values
        kf = Fold.split(train, train[cfg.skf_col], groups)
    # elif fold_type == 'mskf':
    #     Fold = MultilabelStratifiedKFold(
    #         n_splits=cfg.n_fold, shuffle=True, random_state=cfg.seed)
    #     kf = Fold.split(train, train[cfg.skf_col])

    for n, (train_index, val_index) in enumerate(kf):
        train.loc[val_index, 'fold'] = int(n)
    train['fold'] = train['fold'].astype(int)

    print(train.groupby('fold').size())

    return train

In [5]:
def make_train_folds():
    train_df = pd.read_csv(CFG.comp_dataset_path + 'train_features.csv')

    train_df['scene'] = train_df['ID'].str.split('_').str[0]

    print('group', CFG.group_col)
    print(f'train len: {len(train_df)}')

    train_df = get_fold(train_df, CFG)

    # print(train_df.groupby(['fold', CFG.target_col]).size())
    print(train_df['fold'].value_counts())

    os.makedirs(CFG.train_fold_dir, exist_ok=True)

    train_df.to_csv(CFG.train_fold_dir +
                    'train_folds.csv', index=False)

In [6]:
make_train_folds()

group scene
train len: 43371
fold
0    8675
1    8674
2    8674
3    8674
4    8674
dtype: int64
fold
0    8675
1    8674
2    8674
3    8674
4    8674
Name: count, dtype: int64


# setting

In [7]:
# 乱数固定
def set_seed(seed=None, cudnn_deterministic=True):
    if seed is None:
        seed = 42

    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = cudnn_deterministic
    torch.backends.cudnn.benchmark = False

def make_dirs(cfg):
    for dir in [cfg.model_dir, cfg.figures_dir, cfg.submission_dir, cfg.log_dir]:
        os.makedirs(dir, exist_ok=True)

def cfg_init(cfg, mode='train'):
    set_seed(cfg.seed)

    if mode == 'train':
        make_dirs(cfg)

In [8]:
cfg_init(CFG)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# logger

In [10]:
# from common_utils.logger import init_logger, wandb_init, AverageMeter, timeSince
# from common_utils.settings import cfg_init

def init_logger(log_file):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [11]:
Logger = init_logger(log_file=CFG.log_path)

Logger.info('\n\n-------- exp_info -----------------')
Logger.info(datetime.datetime.now().strftime('%Y年%m月%d日 %H:%M:%S'))



-------- exp_info -----------------
2024年11月22日 14:37:14


# eval function

In [12]:
def get_score(y_true, y_pred):
    # return roc_auc_score(y_true, y_pred)
    eval_func = eval(CFG.metrics)
    return eval_func(y_true, y_pred)


def calc_mae_atmacup(y_true, y_pred):
    abs_diff = np.abs(y_true - y_pred)  # 各予測の差分の絶対値を計算して
    mae = np.mean(abs_diff.reshape(-1, ))  # 予測の差分の絶対値の平均を計算

    return mae

def get_result(result_df):

    # preds = result_df['preds'].values

    pred_cols = [f'pred_{i}' for i in range(CFG.target_size)]
    preds = result_df[pred_cols].values
    labels = result_df[CFG.target_col].values
    score = get_score(labels, preds)
    Logger.info(f'score: {score:<.4f}')

    return score

# image utils

In [13]:
def draw_traffic_light(image, id):
    path = f'./datasets/atmacup_18/traffic_lights/{id}.json'
    traffic_lights = json.load(open(path))

    traffic_class = ['green',
                     'straight', 'left', 'right', 'empty', 'other', 'yellow', 'red']
    class_to_idx = {
        cls: idx for idx, cls in enumerate(traffic_class)
    }

    for traffic_light in traffic_lights:
        bbox = traffic_light['bbox']
        x1, y1, x2, y2 = bbox
        # int
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        point1 = (x1, y1)
        point2 = (x2, y2)

        idx = class_to_idx[traffic_light['class']]
        color = 255 - int(255*(idx/len(traffic_class)))

        cv2.rectangle(image, point1, point2, color=color, thickness=1)

    return image


def read_image_for_cache(path):
    if CFG.use_gray_scale:
        image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    else:
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # image = cv2.resize(image, (CFG.size, CFG.size))

    # 効かない
    # image = draw_traffic_light(image, path.split('/')[-2])
    return (path, image)


def make_video_cache(paths):
    debug = []
    for idx in range(9):
        color = 255 - int(255*(idx/9))
        debug.append(color)
    print(debug)

    processes = multiprocessing.cpu_count()
    with multiprocessing.Pool(processes=processes) as pool:
        res = pool.imap_unordered(read_image_for_cache, paths)
        res = tqdm(res)
        res = list(res)

    return dict(res)

# dataset

In [14]:
from albumentations import ReplayCompose
from torch.utils.data import DataLoader, Dataset
import albumentations as A

def get_transforms(data, cfg):
    if data == 'train':
        # aug = A.Compose(cfg.train_aug_list)
        aug = A.ReplayCompose(cfg.train_aug_list)
    elif data == 'valid':
        # aug = A.Compose(cfg.valid_aug_list)
        aug = A.ReplayCompose(cfg.valid_aug_list)

    # print(aug)
    return aug


class CustomDataset(Dataset):
    def __init__(self, df, cfg, labels=None, transform=None):
        self.df = df
        self.cfg = cfg
        self.base_paths = df['base_path'].values
        # self.labels = df[self.cfg.target_col].values
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def read_image_multiframe(self, idx):
        base_path = self.base_paths[idx]

        images = []
        suffixs = ['image_t-1.0.png', 'image_t-0.5.png', 'image_t.png']
        for suffix in suffixs:
            path = base_path + suffix

            image = self.cfg.video_cache[path]

            images.append(image)
        return images

    def __getitem__(self, idx):
        image = self.read_image_multiframe(idx)

        if self.transform:
            # image = self.transform(image=image)['image']
            replay = None
            images = []
            for img in image:
                if replay is None:
                    sample = self.transform(image=img)
                    replay = sample['replay']
                else:
                    sample = ReplayCompose.replay(replay, image=img)
                images.append(sample['image'])

            image = torch.concat(images, dim=0)

        if self.labels is None:
            return image

        if self.cfg.objective_cv == 'multiclass':
            label = torch.tensor(self.labels[idx]).long()
        else:
            label = torch.tensor(self.labels[idx]).float()

        return image, label

In [15]:
def plot_aug_video(train, cfg, plot_count=1):
    transform = CFG.train_aug_list
    transform = A.ReplayCompose(transform)

    dataset = CustomDataset(
        train, CFG, transform=transform)

    for i in range(plot_count):
        image = dataset.read_image_multiframe(i)

        if cfg.use_gray_scale:
            image = np.stack(image, axis=2)
        else:
            image = np.concatenate(image, axis=2)

        aug_image = dataset[i]
        # torch to numpy
        aug_image = aug_image.permute(1, 2, 0).numpy()*255

        for frame in range(image.shape[-1]):
            if frame % 3 != 0:
                continue

            fig, axes = plt.subplots(1, 2, figsize=(15, 6))

            if cfg.use_gray_scale:
                axes[0].imshow(image[..., frame], cmap="gray")
                axes[1].imshow(aug_image[..., frame], cmap="gray")
            else:
                axes[0].imshow(image[..., frame:frame+3].astype(int))
                axes[1].imshow(aug_image[..., frame:frame+3].astype(int))
            plt.savefig(cfg.figures_dir +
                        f'aug_{i}_frame{frame}.png')

# model

In [16]:
import timm

class CustomModel(nn.Module):
    def __init__(self, cfg, pretrained=False, target_size=None, model_name=None):
        super().__init__()
        # self.cfg = cfg

        if model_name is None:
            model_name = cfg.model_name

        print(f'pretrained: {pretrained}')

        self.model = timm.create_model(
            model_name, pretrained=pretrained, num_classes=0,
            in_chans=cfg.model_in_chans)

        # モデルの出力サイズを取得
        if hasattr(self.model, 'num_features'):
            self.n_features = self.model.num_features  # num_featuresで取得するモデルが多い
        elif hasattr(self.model, 'classifier') and hasattr(self.model.classifier, 'in_features'):
            self.n_features = self.model.classifier.in_features  # classifierが存在する場合
        elif hasattr(self.model, 'fc') and hasattr(self.model.fc, 'in_features'):
            self.n_features = self.model.fc.in_features  # fcが存在する場合
        else:
            raise AttributeError("Could not find the output feature size.")

        self.target_size = cfg.target_size if target_size is None else target_size

        # nn.Dropout(0.5),
        self.final_fc = nn.Sequential(
            nn.Linear(self.n_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, self.target_size),
        )

    def feature(self, image):

        feature = self.model(image)
        return feature

    def forward(self, image):
        feature = self.feature(image)
        output = self.final_fc(feature)
        return output

# scheduler

In [17]:
import torch.nn as nn
import torch
import math
import time
import numpy as np
import torch

from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from warmup_scheduler import GradualWarmupScheduler


class GradualWarmupSchedulerV2(GradualWarmupScheduler):
    """
    https://www.kaggle.com/code/underwearfitting/single-fold-training-of-resnet200d-lb0-965
    """
    def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
        super(GradualWarmupSchedulerV2, self).__init__(
            optimizer, multiplier, total_epoch, after_scheduler)

    def get_lr(self):
        if self.last_epoch > self.total_epoch:
            if self.after_scheduler:
                if not self.finished:
                    self.after_scheduler.base_lrs = [
                        base_lr * self.multiplier for base_lr in self.base_lrs]
                    self.finished = True
                return self.after_scheduler.get_lr()
            return [base_lr * self.multiplier for base_lr in self.base_lrs]
        if self.multiplier == 1.0:
            return [base_lr * (float(self.last_epoch) / self.total_epoch) for base_lr in self.base_lrs]
        else:
            return [base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]

def get_scheduler(cfg, optimizer):
    if cfg.scheduler == 'ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(
            optimizer, mode='min', factor=cfg.factor, patience=cfg.patience, verbose=True, eps=cfg.eps)
    elif cfg.scheduler == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(
            optimizer, T_max=cfg.epochs, eta_min=cfg.min_lr, last_epoch=-1)
    elif cfg.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(
            optimizer, T_0=cfg.T_0, T_mult=1, eta_min=cfg.min_lr, last_epoch=-1)
    elif cfg.scheduler == 'GradualWarmupSchedulerV2':
        scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, cfg.epochs, eta_min=1e-7)
        scheduler = GradualWarmupSchedulerV2(
            optimizer, multiplier=10, total_epoch=1, after_scheduler=scheduler_cosine)

    return scheduler

def scheduler_step(scheduler, avg_val_loss, epoch):
    if isinstance(scheduler, ReduceLROnPlateau):
        scheduler.step(avg_val_loss)
    elif isinstance(scheduler, CosineAnnealingLR):
        scheduler.step()
    elif isinstance(scheduler, CosineAnnealingWarmRestarts):
        scheduler.step()
    elif isinstance(scheduler, GradualWarmupSchedulerV2):
        scheduler.step(epoch)

# train

In [18]:
def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device,
             model_ema=None):
    """ 1epoch毎のtrain """

    model.train()
    scaler = GradScaler(enabled=CFG.use_amp)

    losses = AverageMeter()
    preds = []
    preds_labels = []
    start = time.time()
    global_step = 0

    for step, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        with autocast(CFG.use_amp):
            y_preds = model(images)

            if y_preds.size(1) == 1:
                y_preds = y_preds.view(-1)

            loss = criterion(y_preds, labels)

        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(
            model.parameters(), CFG.max_grad_norm)

        scaler.step(optimizer)
        scaler.update()

        if model_ema is not None:
            model_ema.update(model)

        optimizer.zero_grad()
        global_step += 1

        if CFG.objective_cv == 'binary':
            preds.append(torch.sigmoid(y_preds).detach().to('cpu').numpy())
        elif CFG.objective_cv == 'multiclass':
            preds.append(y_preds.softmax(1).detach().to('cpu').numpy())
        elif CFG.objective_cv == 'regression':
            preds.append(y_preds.detach().to('cpu').numpy())

        preds_labels.append(labels.detach().to('cpu').numpy())

        if step % CFG.print_freq == 0 or step == (len(train_loader) - 1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(epoch + 1, step, len(train_loader),
                          remain=timeSince(start, float(
                              step + 1) / len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))
    predictions = np.concatenate(preds)
    labels = np.concatenate(preds_labels)
    return losses.avg, predictions, labels


def valid_fn(valid_loader, model, criterion, device):

    model.eval()
    losses = AverageMeter()
    preds = []
    start = time.time()

    for step, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)

        with torch.no_grad():
            y_preds = model(images)

        if y_preds.size(1) == 1:
            y_preds = y_preds.view(-1)

        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)

        # binary
        if CFG.objective_cv == 'binary':
            preds.append(torch.sigmoid(y_preds).to('cpu').numpy())
        elif CFG.objective_cv == 'multiclass':
            preds.append(y_preds.softmax(1).to('cpu').numpy())
        elif CFG.objective_cv == 'regression':
            preds.append(y_preds.to('cpu').numpy())

        if step % CFG.print_freq == 0 or step == (len(valid_loader) - 1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step + 1) / len(valid_loader))))
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [19]:
def train_fold(folds, fold):

    Logger.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    if CFG.use_alldata:
        train_folds = folds.copy().reset_index(drop=True)
    else:
        train_folds = folds.loc[trn_idx].reset_index(drop=True)

    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    # train_folds = train_downsampling(train_folds)

    train_labels = train_folds[CFG.target_col].values
    valid_labels = valid_folds[CFG.target_col].values

    train_dataset = CustomDataset(
        train_folds, CFG, labels=train_labels, transform=get_transforms(data='train', cfg=CFG))
    valid_dataset = CustomDataset(
        valid_folds, CFG, labels=valid_labels, transform=get_transforms(data='valid', cfg=CFG))

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True,
                              )
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size * 2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================

    model = CustomModel(CFG, pretrained=CFG.pretrained)
    model.to(device)

    if CFG.use_ema:
        model_ema = ModelEmaV2(model, decay=CFG.ema_decay)
    else:
        model_ema = None

    optimizer = AdamW(model.parameters(), lr=CFG.lr)
    scheduler = get_scheduler(CFG, optimizer)

    # ====================================================
    # loop
    # ====================================================
    if CFG.objective_cv == 'binary':
        criterion = nn.BCEWithLogitsLoss()
    elif CFG.objective_cv == 'multiclass':
        criterion = nn.CrossEntropyLoss()
    elif CFG.objective_cv == 'regression':
        criterion = nn.L1Loss()

    if CFG.metric_direction == 'minimize':
        best_score = np.inf
    elif CFG.metric_direction == 'maximize':
        best_score = -1

    best_loss = np.inf

    df_score = pd.DataFrame(columns=["train_loss", 'train_score', 'val_loss', 'val_score'])

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss, train_preds, train_labels_epoch = train_fn(fold, train_loader, model,
                                                             criterion, optimizer, epoch, scheduler, device, model_ema)
        train_score = get_score(train_labels_epoch, train_preds)

        # eval
        if model_ema is not None:
            avg_val_loss, valid_preds = valid_fn(
                valid_loader, model_ema.module, criterion, device)
        else:
            avg_val_loss, valid_preds = valid_fn(
                valid_loader, model, criterion, device)

        scheduler_step(scheduler, avg_val_loss, epoch)

        # scoring
        score = get_score(valid_labels, valid_preds)

        elapsed = time.time() - start_time

        Logger.info(
            f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        # Logger.info(f'Epoch {epoch+1} - avgScore: {avg_score:.4f}')
        Logger.info(
            f'Epoch {epoch+1} - avg_train_Score: {train_score:.4f} avgScore: {score:.4f}')
        
        df_score.loc[epoch] = [avg_loss, train_score, avg_val_loss, score]

        if CFG.metric_direction == 'minimize':
            update_best = score < best_score
        elif CFG.metric_direction == 'maximize':
            update_best = score > best_score

        if update_best:
            best_loss = avg_val_loss
            best_score = score

            Logger.info(
                f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            Logger.info(
                f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')

            if model_ema is not None:
                torch.save({'model': model_ema.module.state_dict(),
                            'preds': valid_preds},
                           CFG.model_dir + f'{CFG.model_name}_fold{fold}_best.pth')
            else:
                torch.save({'model': model.state_dict(),
                            'preds': valid_preds},
                           CFG.model_dir + f'{CFG.model_name}_fold{fold}_best.pth')

    """
    torch.save({'model': model.state_dict(),
                'preds': valid_preds},
               CFG.model_dir + f'{CFG.model_name}_fold{fold}_last.pth')
    """
    if model_ema is not None:
        torch.save({'model': model_ema.module.state_dict(),
                    'preds': valid_preds},
                   CFG.model_dir + f'{CFG.model_name}_fold{fold}_last.pth')
    else:
        torch.save({'model': model.state_dict(),
                    'preds': valid_preds},
                   CFG.model_dir + f'{CFG.model_name}_fold{fold}_last.pth')

    check_point = torch.load(
        CFG.model_dir + f'{CFG.model_name}_fold{fold}_{CFG.inf_weight}.pth', map_location=torch.device('cpu'))
    pred_cols = [f'pred_{i}' for i in range(CFG.target_size)]

    check_point_pred = check_point['preds']

    # Columns must be same length as key 対策
    if check_point_pred.ndim == 1:
        check_point_pred = check_point_pred.reshape(-1, CFG.target_size)

    print('check_point_pred shape', check_point_pred.shape)
    valid_folds[pred_cols] = check_point_pred
    return valid_folds, df_score

# train main

In [20]:
def main():
    train = pd.read_csv(CFG.train_fold_dir + 'train_folds.csv')
    train['ori_idx'] = train.index

    train['scene'] = train['ID'].str.split('_').str[0]

    """
    if CFG.is_debug:
        use_ids = train['scene'].unique()[:100]
        train = train[train['scene'].isin(use_ids)].reset_index(drop=True)
    """

    train['base_path'] = CFG.comp_dataset_path + 'images/' + train['ID'] + '/'

    paths = []
    for base_path in train['base_path'].values:
        suffixs = ['image_t-1.0.png', 'image_t-0.5.png', 'image_t.png']
        for suffix in suffixs:
            path = base_path + suffix
            paths.append(path)

    print(paths[:5])

    CFG.video_cache = make_video_cache(paths)

    # plot_aug_video(train, CFG, plot_count=10)

    # train
    oof_df = pd.DataFrame()
    list_df_score = []
    for fold in range(CFG.n_fold):
        if fold not in CFG.train_folds:
            print(f'fold {fold} is skipped')
            continue

        _oof_df, _df_score = train_fold(train, fold)
        oof_df = pd.concat([oof_df, _oof_df])
        list_df_score.append(_df_score)
        Logger.info(f"========== fold: {fold} result ==========")
        get_result(_oof_df)

        if CFG.use_holdout or CFG.use_alldata:
            break

    oof_df = oof_df.sort_values('ori_idx').reset_index(drop=True)

    # CV result
    Logger.info("========== CV ==========")
    score = get_result(oof_df)

    # 学習曲線を可視化する
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    ax1.grid(alpha=0.1)
    ax2.grid(alpha=0.1)
    for i, df_score in enumerate(list_df_score):
        ax1.plot(df_score['train_score'], label=f'fold {i}')
        ax2.plot(df_score['val_score'], label=f'fold {i}')
    ax1.set_title('Train Score')
    ax2.set_title('Val Score') 
    ax1.set_xlabel('Epoch')
    ax2.set_xlabel('Epoch')
    ax1.set_ylabel('Train Score')
    ax2.set_ylabel('Val Score')
    ax1.set_ylim([0, 1.5])
    ax2.set_ylim([0, 1.5])
    plt.tight_layout()
    plt.savefig(CFG.figures_dir + f'learning_curve_{CFG.exp_name}.png')
    plt.show()

    # save result
    oof_df.to_csv(CFG.submission_dir + 'oof_cv.csv', index=False)

In [21]:
main()

['../raw/atmacup_18_dataset/images/00066be8e20318869c38c66be466631a_320/image_t-1.0.png', '../raw/atmacup_18_dataset/images/00066be8e20318869c38c66be466631a_320/image_t-0.5.png', '../raw/atmacup_18_dataset/images/00066be8e20318869c38c66be466631a_320/image_t.png', '../raw/atmacup_18_dataset/images/00066be8e20318869c38c66be466631a_420/image_t-1.0.png', '../raw/atmacup_18_dataset/images/00066be8e20318869c38c66be466631a_420/image_t-0.5.png']
[255, 227, 199, 170, 142, 114, 85, 57, 29]


0it [00:00, ?it/s]



pretrained: True


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [1][0/542] Elapsed 0m 2s (remain 19m 37s) Loss: 6.0366(6.0366) Grad: 375788.4375  LR: 0.000010  
Epoch: [1][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 4.5523(5.2568) Grad: 148893.3906  LR: 0.000010  
Epoch: [1][541/542] Elapsed 1m 51s (remain 0m 0s) Loss: 4.3808(5.2285) Grad: 77569.9844  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 4.6142(4.6142) 


Epoch 1 - avg_train_loss: 5.2285  avg_val_loss: 5.1450  time: 130s
Epoch 1 - avg_train_Score: 5.2285 avgScore: 5.1450
Epoch 1 - Save Best Score: 5.1450 Model
Epoch 1 - Save Best Loss: 5.1450 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 4.6824(5.1450) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [2][0/542] Elapsed 0m 1s (remain 10m 51s) Loss: 5.0616(5.0616) Grad: 162640.1406  LR: 0.000010  
Epoch: [2][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 3.8306(4.7935) Grad: 137758.3906  LR: 0.000010  
Epoch: [2][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 4.2578(4.7736) Grad: 51668.2773  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 3.9502(3.9502) 


Epoch 2 - avg_train_loss: 4.7736  avg_val_loss: 4.5315  time: 129s
Epoch 2 - avg_train_Score: 4.7736 avgScore: 4.5315
Epoch 2 - Save Best Score: 4.5315 Model
Epoch 2 - Save Best Loss: 4.5315 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 4.1734(4.5315) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [3][0/542] Elapsed 0m 1s (remain 11m 49s) Loss: 5.3810(5.3810) Grad: 318578.2188  LR: 0.000100  
Epoch: [3][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.8816(2.7357) Grad: 66808.2031  LR: 0.000100  
Epoch: [3][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.8723(2.6521) Grad: 29557.6230  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 1.6479(1.6479) 


  _warn_get_lr_called_within_step(self)
Epoch 3 - avg_train_loss: 2.6521  avg_val_loss: 2.0375  time: 130s
Epoch 3 - avg_train_Score: 2.6521 avgScore: 2.0375


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 1.8795(2.0375) 


Epoch 3 - Save Best Score: 2.0375 Model
Epoch 3 - Save Best Loss: 2.0375 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [4][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 1.4006(1.4006) Grad: 367963.2812  LR: 0.000100  
Epoch: [4][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.2833(1.4899) Grad: 43884.0234  LR: 0.000100  
Epoch: [4][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.5203(1.4827) Grad: 22490.6777  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 29s) Loss: 1.0399(1.0399) 


Epoch 4 - avg_train_loss: 1.4827  avg_val_loss: 1.1593  time: 129s
Epoch 4 - avg_train_Score: 1.4827 avgScore: 1.1593
Epoch 4 - Save Best Score: 1.1593 Model
Epoch 4 - Save Best Loss: 1.1593 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 1.0878(1.1593) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [5][0/542] Elapsed 0m 1s (remain 11m 24s) Loss: 1.4868(1.4868) Grad: 967027.4375  LR: 0.000100  
Epoch: [5][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3781(1.4181) Grad: 19634.1641  LR: 0.000100  
Epoch: [5][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1871(1.4119) Grad: 25464.0332  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.9801(0.9801) 


Epoch 5 - avg_train_loss: 1.4119  avg_val_loss: 1.0365  time: 130s
Epoch 5 - avg_train_Score: 1.4119 avgScore: 1.0365
Epoch 5 - Save Best Score: 1.0365 Model
Epoch 5 - Save Best Loss: 1.0365 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 1.0030(1.0365) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [6][0/542] Elapsed 0m 1s (remain 11m 21s) Loss: 1.4612(1.4612) Grad: 325628.0000  LR: 0.000099  
Epoch: [6][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0246(1.2006) Grad: 165794.0156  LR: 0.000099  
Epoch: [6][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.7186(1.1999) Grad: 162002.5469  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.9236(0.9236) 


Epoch 6 - avg_train_loss: 1.1999  avg_val_loss: 1.0038  time: 130s
Epoch 6 - avg_train_Score: 1.1999 avgScore: 1.0038
Epoch 6 - Save Best Score: 1.0038 Model
Epoch 6 - Save Best Loss: 1.0038 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.9692(1.0038) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [7][0/542] Elapsed 0m 1s (remain 11m 19s) Loss: 0.9564(0.9564) Grad: 334526.7812  LR: 0.000099  
Epoch: [7][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9636(1.1251) Grad: 142393.2969  LR: 0.000099  
Epoch: [7][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.9422(1.1251) Grad: 166381.3281  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8960(0.8960) 


Epoch 7 - avg_train_loss: 1.1251  avg_val_loss: 0.9744  time: 129s
Epoch 7 - avg_train_Score: 1.1251 avgScore: 0.9744
Epoch 7 - Save Best Score: 0.9744 Model
Epoch 7 - Save Best Loss: 0.9744 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9642(0.9744) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [8][0/542] Elapsed 0m 1s (remain 11m 33s) Loss: 1.2162(1.2162) Grad: 352622.0312  LR: 0.000099  
Epoch: [8][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0538(1.1117) Grad: 56289.7656  LR: 0.000099  
Epoch: [8][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.4525(1.1162) Grad: 67959.5312  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8638(0.8638) 


Epoch 8 - avg_train_loss: 1.1162  avg_val_loss: 0.9496  time: 130s
Epoch 8 - avg_train_Score: 1.1162 avgScore: 0.9496
Epoch 8 - Save Best Score: 0.9496 Model
Epoch 8 - Save Best Loss: 0.9496 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9388(0.9496) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [9][0/542] Elapsed 0m 1s (remain 11m 23s) Loss: 0.9189(0.9189) Grad: 275935.4688  LR: 0.000098  
Epoch: [9][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9852(1.0789) Grad: 136089.6406  LR: 0.000098  
Epoch: [9][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8364(1.0801) Grad: 119107.9141  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8508(0.8508) 


Epoch 9 - avg_train_loss: 1.0801  avg_val_loss: 0.9301  time: 130s
Epoch 9 - avg_train_Score: 1.0801 avgScore: 0.9301
Epoch 9 - Save Best Score: 0.9301 Model
Epoch 9 - Save Best Loss: 0.9301 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.9545(0.9301) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [10][0/542] Elapsed 0m 1s (remain 11m 40s) Loss: 0.9776(0.9776) Grad: 328559.5000  LR: 0.000098  
Epoch: [10][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3135(1.0719) Grad: 64268.0000  LR: 0.000098  
Epoch: [10][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9791(1.0732) Grad: 88225.6797  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.8340(0.8340) 


Epoch 10 - avg_train_loss: 1.0732  avg_val_loss: 0.9153  time: 130s
Epoch 10 - avg_train_Score: 1.0732 avgScore: 0.9153
Epoch 10 - Save Best Score: 0.9153 Model
Epoch 10 - Save Best Loss: 0.9153 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9380(0.9153) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [11][0/542] Elapsed 0m 1s (remain 11m 48s) Loss: 0.9321(0.9321) Grad: 475925.3438  LR: 0.000097  
Epoch: [11][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.8990(1.0299) Grad: 72117.0703  LR: 0.000097  
Epoch: [11][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9409(1.0324) Grad: 75785.5703  LR: 0.000097  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 27s) Loss: 0.8133(0.8133) 


Epoch 11 - avg_train_loss: 1.0324  avg_val_loss: 0.9058  time: 129s
Epoch 11 - avg_train_Score: 1.0324 avgScore: 0.9058
Epoch 11 - Save Best Score: 0.9058 Model
Epoch 11 - Save Best Loss: 0.9058 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8996(0.9058) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [12][0/542] Elapsed 0m 1s (remain 11m 15s) Loss: 1.0759(1.0759) Grad: 279799.1562  LR: 0.000096  
Epoch: [12][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3111(0.9779) Grad: 261995.1719  LR: 0.000096  
Epoch: [12][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8533(0.9731) Grad: 312083.3438  LR: 0.000096  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8257(0.8257) 


Epoch 12 - avg_train_loss: 0.9731  avg_val_loss: 0.8978  time: 130s
Epoch 12 - avg_train_Score: 0.9731 avgScore: 0.8978
Epoch 12 - Save Best Score: 0.8978 Model
Epoch 12 - Save Best Loss: 0.8978 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8855(0.8978) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [13][0/542] Elapsed 0m 1s (remain 11m 14s) Loss: 0.9731(0.9731) Grad: 439401.7812  LR: 0.000095  
Epoch: [13][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7765(0.9848) Grad: 81655.6719  LR: 0.000095  
Epoch: [13][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9518(0.9911) Grad: 71336.1797  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8135(0.8135) 


Epoch 13 - avg_train_loss: 0.9911  avg_val_loss: 0.8855  time: 130s
Epoch 13 - avg_train_Score: 0.9911 avgScore: 0.8855
Epoch 13 - Save Best Score: 0.8855 Model
Epoch 13 - Save Best Loss: 0.8855 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8597(0.8855) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [14][0/542] Elapsed 0m 1s (remain 11m 30s) Loss: 0.9013(0.9013) Grad: 234858.4375  LR: 0.000095  
Epoch: [14][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9453(0.9845) Grad: 179203.6094  LR: 0.000095  
Epoch: [14][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7716(0.9832) Grad: 160242.5156  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8206(0.8206) 


Epoch 14 - avg_train_loss: 0.9832  avg_val_loss: 0.8788  time: 129s
Epoch 14 - avg_train_Score: 0.9832 avgScore: 0.8788
Epoch 14 - Save Best Score: 0.8788 Model
Epoch 14 - Save Best Loss: 0.8788 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8739(0.8788) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [15][0/542] Elapsed 0m 1s (remain 11m 23s) Loss: 1.1740(1.1740) Grad: 247274.5625  LR: 0.000094  
Epoch: [15][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8464(0.9456) Grad: 162092.4219  LR: 0.000094  
Epoch: [15][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1053(0.9490) Grad: 212093.4688  LR: 0.000094  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.8143(0.8143) 


Epoch 15 - avg_train_loss: 0.9490  avg_val_loss: 0.8664  time: 130s
Epoch 15 - avg_train_Score: 0.9490 avgScore: 0.8664
Epoch 15 - Save Best Score: 0.8664 Model
Epoch 15 - Save Best Loss: 0.8664 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8787(0.8664) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [16][0/542] Elapsed 0m 1s (remain 11m 21s) Loss: 0.8665(0.8665) Grad: 300826.2812  LR: 0.000093  
Epoch: [16][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.2161(0.8983) Grad: 138316.1719  LR: 0.000093  
Epoch: [16][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.8813(0.8990) Grad: 202933.3594  LR: 0.000093  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7898(0.7898) 


Epoch 16 - avg_train_loss: 0.8990  avg_val_loss: 0.8572  time: 129s
Epoch 16 - avg_train_Score: 0.8990 avgScore: 0.8572
Epoch 16 - Save Best Score: 0.8572 Model
Epoch 16 - Save Best Loss: 0.8572 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8629(0.8572) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [17][0/542] Elapsed 0m 1s (remain 11m 18s) Loss: 0.8537(0.8537) Grad: 267384.0312  LR: 0.000092  
Epoch: [17][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9499(0.8894) Grad: 110985.1016  LR: 0.000092  
Epoch: [17][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7535(0.8881) Grad: 153388.4375  LR: 0.000092  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.8049(0.8049) 


Epoch 17 - avg_train_loss: 0.8881  avg_val_loss: 0.8556  time: 130s
Epoch 17 - avg_train_Score: 0.8881 avgScore: 0.8556
Epoch 17 - Save Best Score: 0.8556 Model
Epoch 17 - Save Best Loss: 0.8556 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8592(0.8556) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [18][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.9471(0.9471) Grad: 229224.8594  LR: 0.000091  
Epoch: [18][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6879(0.8578) Grad: 232529.3125  LR: 0.000091  
Epoch: [18][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8048(0.8581) Grad: 253109.9062  LR: 0.000091  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.7998(0.7998) 


Epoch 18 - avg_train_loss: 0.8581  avg_val_loss: 0.8504  time: 130s
Epoch 18 - avg_train_Score: 0.8581 avgScore: 0.8504
Epoch 18 - Save Best Score: 0.8504 Model
Epoch 18 - Save Best Loss: 0.8504 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8193(0.8504) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [19][0/542] Elapsed 0m 1s (remain 15m 30s) Loss: 0.7910(0.7910) Grad: 239088.5469  LR: 0.000089  
Epoch: [19][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9267(0.8535) Grad: 165772.0156  LR: 0.000089  
Epoch: [19][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9535(0.8557) Grad: 248860.1719  LR: 0.000089  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7899(0.7899) 


Epoch 19 - avg_train_loss: 0.8557  avg_val_loss: 0.8479  time: 130s
Epoch 19 - avg_train_Score: 0.8557 avgScore: 0.8479
Epoch 19 - Save Best Score: 0.8479 Model
Epoch 19 - Save Best Loss: 0.8479 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8252(0.8479) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [20][0/542] Elapsed 0m 1s (remain 11m 43s) Loss: 1.0630(1.0630) Grad: 184583.0156  LR: 0.000088  
Epoch: [20][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7512(0.8390) Grad: 167195.7031  LR: 0.000088  
Epoch: [20][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8617(0.8416) Grad: 120479.5703  LR: 0.000088  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7671(0.7671) 


Epoch 20 - avg_train_loss: 0.8416  avg_val_loss: 0.8389  time: 130s
Epoch 20 - avg_train_Score: 0.8416 avgScore: 0.8389
Epoch 20 - Save Best Score: 0.8389 Model
Epoch 20 - Save Best Loss: 0.8389 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8164(0.8389) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [21][0/542] Elapsed 0m 1s (remain 11m 23s) Loss: 0.7376(0.7376) Grad: 300792.5938  LR: 0.000087  
Epoch: [21][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9905(0.8557) Grad: 171288.6719  LR: 0.000087  
Epoch: [21][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7262(0.8526) Grad: 121901.8672  LR: 0.000087  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7474(0.7474) 


Epoch 21 - avg_train_loss: 0.8526  avg_val_loss: 0.8365  time: 129s
Epoch 21 - avg_train_Score: 0.8526 avgScore: 0.8365
Epoch 21 - Save Best Score: 0.8365 Model
Epoch 21 - Save Best Loss: 0.8365 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8157(0.8365) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [22][0/542] Elapsed 0m 1s (remain 11m 34s) Loss: 0.7860(0.7860) Grad: 210798.7031  LR: 0.000085  
Epoch: [22][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8893(0.8097) Grad: 269940.1562  LR: 0.000085  
Epoch: [22][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8311(0.8115) Grad: 254362.6562  LR: 0.000085  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7560(0.7560) 


Epoch 22 - avg_train_loss: 0.8115  avg_val_loss: 0.8300  time: 130s
Epoch 22 - avg_train_Score: 0.8115 avgScore: 0.8300
Epoch 22 - Save Best Score: 0.8300 Model
Epoch 22 - Save Best Loss: 0.8300 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8130(0.8300) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [23][0/542] Elapsed 0m 1s (remain 11m 51s) Loss: 1.2387(1.2387) Grad: 241442.0625  LR: 0.000084  
Epoch: [23][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6870(0.8063) Grad: 310206.7812  LR: 0.000084  
Epoch: [23][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6743(0.8059) Grad: 177059.9844  LR: 0.000084  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.7823(0.7823) 


Epoch 23 - avg_train_loss: 0.8059  avg_val_loss: 0.8285  time: 130s
Epoch 23 - avg_train_Score: 0.8059 avgScore: 0.8285
Epoch 23 - Save Best Score: 0.8285 Model
Epoch 23 - Save Best Loss: 0.8285 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7990(0.8285) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [24][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 0.5966(0.5966) Grad: 178905.4531  LR: 0.000083  
Epoch: [24][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.8633(0.8307) Grad: 103472.4375  LR: 0.000083  
Epoch: [24][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6947(0.8326) Grad: 151235.2031  LR: 0.000083  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7777(0.7777) 


Epoch 24 - avg_train_loss: 0.8326  avg_val_loss: 0.8338  time: 130s
Epoch 24 - avg_train_Score: 0.8326 avgScore: 0.8338


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8257(0.8338) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [25][0/542] Elapsed 0m 1s (remain 11m 16s) Loss: 0.8125(0.8125) Grad: 212558.0312  LR: 0.000081  
Epoch: [25][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.2958(0.8208) Grad: 65838.9922  LR: 0.000081  
Epoch: [25][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9754(0.8250) Grad: 91060.6406  LR: 0.000081  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7925(0.7925) 


Epoch 25 - avg_train_loss: 0.8250  avg_val_loss: 0.8328  time: 130s
Epoch 25 - avg_train_Score: 0.8250 avgScore: 0.8328


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7881(0.8328) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [26][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 1.0452(1.0452) Grad: 198383.6250  LR: 0.000079  
Epoch: [26][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6757(0.8021) Grad: 133905.8750  LR: 0.000079  
Epoch: [26][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8236(0.7976) Grad: 122012.3750  LR: 0.000079  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7964(0.7964) 


Epoch 26 - avg_train_loss: 0.7976  avg_val_loss: 0.8272  time: 130s
Epoch 26 - avg_train_Score: 0.7976 avgScore: 0.8272
Epoch 26 - Save Best Score: 0.8272 Model
Epoch 26 - Save Best Loss: 0.8272 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7658(0.8272) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [27][0/542] Elapsed 0m 1s (remain 11m 18s) Loss: 0.7336(0.7336) Grad: 226200.9375  LR: 0.000078  
Epoch: [27][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.0176(0.7735) Grad: 103175.4766  LR: 0.000078  
Epoch: [27][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6878(0.7749) Grad: 152129.7656  LR: 0.000078  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7822(0.7822) 


Epoch 27 - avg_train_loss: 0.7749  avg_val_loss: 0.8221  time: 129s
Epoch 27 - avg_train_Score: 0.7749 avgScore: 0.8221
Epoch 27 - Save Best Score: 0.8221 Model
Epoch 27 - Save Best Loss: 0.8221 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7729(0.8221) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [28][0/542] Elapsed 0m 1s (remain 11m 25s) Loss: 0.7293(0.7293) Grad: 227154.4531  LR: 0.000076  
Epoch: [28][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7257(0.7738) Grad: 138069.1875  LR: 0.000076  
Epoch: [28][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8006(0.7698) Grad: 124932.2344  LR: 0.000076  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7519(0.7519) 


Epoch 28 - avg_train_loss: 0.7698  avg_val_loss: 0.8194  time: 130s
Epoch 28 - avg_train_Score: 0.7698 avgScore: 0.8194
Epoch 28 - Save Best Score: 0.8194 Model
Epoch 28 - Save Best Loss: 0.8194 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8071(0.8194) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [29][0/542] Elapsed 0m 1s (remain 11m 23s) Loss: 1.1015(1.1015) Grad: 153076.2969  LR: 0.000075  
Epoch: [29][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.8182(0.7493) Grad: 149016.0469  LR: 0.000075  
Epoch: [29][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.3153(0.7551) Grad: 77864.7109  LR: 0.000075  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7618(0.7618) 


Epoch 29 - avg_train_loss: 0.7551  avg_val_loss: 0.8183  time: 130s
Epoch 29 - avg_train_Score: 0.7551 avgScore: 0.8183
Epoch 29 - Save Best Score: 0.8183 Model
Epoch 29 - Save Best Loss: 0.8183 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8197(0.8183) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [30][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.6722(0.6722) Grad: 172514.1562  LR: 0.000073  
Epoch: [30][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7895(0.7653) Grad: 93418.4688  LR: 0.000073  
Epoch: [30][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5552(0.7657) Grad: 85639.4062  LR: 0.000073  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7426(0.7426) 


Epoch 30 - avg_train_loss: 0.7657  avg_val_loss: 0.8172  time: 130s
Epoch 30 - avg_train_Score: 0.7657 avgScore: 0.8172
Epoch 30 - Save Best Score: 0.8172 Model
Epoch 30 - Save Best Loss: 0.8172 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8022(0.8172) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [31][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.6132(0.6132) Grad: 186060.9062  LR: 0.000071  
Epoch: [31][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7242(0.7520) Grad: 95335.3125  LR: 0.000071  
Epoch: [31][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9918(0.7518) Grad: 116356.2656  LR: 0.000071  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7665(0.7665) 


Epoch 31 - avg_train_loss: 0.7518  avg_val_loss: 0.8135  time: 129s
Epoch 31 - avg_train_Score: 0.7518 avgScore: 0.8135
Epoch 31 - Save Best Score: 0.8135 Model
Epoch 31 - Save Best Loss: 0.8135 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7931(0.8135) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [32][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.6379(0.6379) Grad: 198753.5625  LR: 0.000069  
Epoch: [32][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5985(0.7380) Grad: 283675.8750  LR: 0.000069  
Epoch: [32][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6463(0.7364) Grad: 187722.5000  LR: 0.000069  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7352(0.7352) 


Epoch 32 - avg_train_loss: 0.7364  avg_val_loss: 0.8116  time: 130s
Epoch 32 - avg_train_Score: 0.7364 avgScore: 0.8116
Epoch 32 - Save Best Score: 0.8116 Model
Epoch 32 - Save Best Loss: 0.8116 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7723(0.8116) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [33][0/542] Elapsed 0m 1s (remain 11m 34s) Loss: 0.7447(0.7447) Grad: 204457.3750  LR: 0.000067  
Epoch: [33][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6122(0.7313) Grad: 97601.0312  LR: 0.000067  
Epoch: [33][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8628(0.7333) Grad: 110209.6250  LR: 0.000067  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7444(0.7444) 


Epoch 33 - avg_train_loss: 0.7333  avg_val_loss: 0.8066  time: 130s
Epoch 33 - avg_train_Score: 0.7333 avgScore: 0.8066
Epoch 33 - Save Best Score: 0.8066 Model
Epoch 33 - Save Best Loss: 0.8066 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8047(0.8066) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [34][0/542] Elapsed 0m 1s (remain 12m 44s) Loss: 0.6595(0.6595) Grad: 195461.4375  LR: 0.000066  
Epoch: [34][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6650(0.7389) Grad: 79349.5625  LR: 0.000066  
Epoch: [34][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7312(0.7401) Grad: 94904.8203  LR: 0.000066  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7246(0.7246) 


Epoch 34 - avg_train_loss: 0.7401  avg_val_loss: 0.8108  time: 130s
Epoch 34 - avg_train_Score: 0.7401 avgScore: 0.8108


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8053(0.8108) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [35][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 0.8217(0.8217) Grad: 152476.5312  LR: 0.000064  
Epoch: [35][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8655(0.7394) Grad: 100984.6484  LR: 0.000064  
Epoch: [35][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5892(0.7409) Grad: 110686.4062  LR: 0.000064  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 29s) Loss: 0.7379(0.7379) 


Epoch 35 - avg_train_loss: 0.7409  avg_val_loss: 0.8080  time: 130s
Epoch 35 - avg_train_Score: 0.7409 avgScore: 0.8080


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7962(0.8080) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [36][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.8240(0.8240) Grad: 405862.5625  LR: 0.000062  
Epoch: [36][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7620(0.7452) Grad: 116888.1484  LR: 0.000062  
Epoch: [36][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.3042(0.7474) Grad: 71564.6172  LR: 0.000062  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.7398(0.7398) 


Epoch 36 - avg_train_loss: 0.7474  avg_val_loss: 0.8101  time: 129s
Epoch 36 - avg_train_Score: 0.7474 avgScore: 0.8101


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7976(0.8101) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [37][0/542] Elapsed 0m 1s (remain 11m 36s) Loss: 1.0354(1.0354) Grad: 237398.0469  LR: 0.000060  
Epoch: [37][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9065(0.7609) Grad: 42382.8086  LR: 0.000060  
Epoch: [37][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6027(0.7589) Grad: 46630.6172  LR: 0.000060  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7202(0.7202) 


Epoch 37 - avg_train_loss: 0.7589  avg_val_loss: 0.8116  time: 130s
Epoch 37 - avg_train_Score: 0.7589 avgScore: 0.8116


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8085(0.8116) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [38][0/542] Elapsed 0m 1s (remain 12m 25s) Loss: 0.7474(0.7474) Grad: 175847.4688  LR: 0.000058  
Epoch: [38][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6952(0.7318) Grad: 99012.7812  LR: 0.000058  
Epoch: [38][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6031(0.7316) Grad: 100257.9062  LR: 0.000058  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7077(0.7077) 


Epoch 38 - avg_train_loss: 0.7316  avg_val_loss: 0.8118  time: 130s
Epoch 38 - avg_train_Score: 0.7316 avgScore: 0.8118


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8100(0.8118) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [39][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 0.6790(0.6790) Grad: 161835.9375  LR: 0.000056  
Epoch: [39][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6117(0.7268) Grad: 86830.3281  LR: 0.000056  
Epoch: [39][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7550(0.7287) Grad: 82082.6016  LR: 0.000056  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7349(0.7349) 


Epoch 39 - avg_train_loss: 0.7287  avg_val_loss: 0.8088  time: 130s
Epoch 39 - avg_train_Score: 0.7287 avgScore: 0.8088


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7885(0.8088) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [40][0/542] Elapsed 0m 1s (remain 12m 9s) Loss: 0.7062(0.7062) Grad: 219920.2188  LR: 0.000054  
Epoch: [40][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6760(0.6972) Grad: 200837.4531  LR: 0.000054  
Epoch: [40][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5414(0.7005) Grad: 164311.9375  LR: 0.000054  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7368(0.7368) 


Epoch 40 - avg_train_loss: 0.7005  avg_val_loss: 0.8060  time: 130s
Epoch 40 - avg_train_Score: 0.7005 avgScore: 0.8060
Epoch 40 - Save Best Score: 0.8060 Model
Epoch 40 - Save Best Loss: 0.8060 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7762(0.8060) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [41][0/542] Elapsed 0m 1s (remain 11m 48s) Loss: 0.5352(0.5352) Grad: 199001.2031  LR: 0.000052  
Epoch: [41][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7489(0.7018) Grad: 270137.6562  LR: 0.000052  
Epoch: [41][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6831(0.7014) Grad: 204990.3438  LR: 0.000052  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7280(0.7280) 


Epoch 41 - avg_train_loss: 0.7014  avg_val_loss: 0.8055  time: 130s
Epoch 41 - avg_train_Score: 0.7014 avgScore: 0.8055
Epoch 41 - Save Best Score: 0.8055 Model
Epoch 41 - Save Best Loss: 0.8055 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7881(0.8055) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [42][0/542] Elapsed 0m 1s (remain 12m 53s) Loss: 0.6287(0.6287) Grad: 161505.1719  LR: 0.000050  
Epoch: [42][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6186(0.7186) Grad: 87146.5156  LR: 0.000050  
Epoch: [42][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7004(0.7199) Grad: 85324.2734  LR: 0.000050  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7391(0.7391) 


Epoch 42 - avg_train_loss: 0.7199  avg_val_loss: 0.8089  time: 130s
Epoch 42 - avg_train_Score: 0.7199 avgScore: 0.8089


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7879(0.8089) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [43][0/542] Elapsed 0m 1s (remain 11m 29s) Loss: 0.6086(0.6086) Grad: 232924.2188  LR: 0.000048  
Epoch: [43][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6151(0.7085) Grad: 90476.6797  LR: 0.000048  
Epoch: [43][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8568(0.7096) Grad: 109447.2578  LR: 0.000048  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7170(0.7170) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7729(0.8042) 


Epoch 43 - avg_train_loss: 0.7096  avg_val_loss: 0.8042  time: 130s
Epoch 43 - avg_train_Score: 0.7096 avgScore: 0.8042
Epoch 43 - Save Best Score: 0.8042 Model
Epoch 43 - Save Best Loss: 0.8042 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [44][0/542] Elapsed 0m 1s (remain 11m 26s) Loss: 0.6729(0.6729) Grad: 180117.2969  LR: 0.000046  
Epoch: [44][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6590(0.7086) Grad: 86113.9922  LR: 0.000046  
Epoch: [44][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8499(0.7099) Grad: 103895.0078  LR: 0.000046  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7205(0.7205) 


Epoch 44 - avg_train_loss: 0.7099  avg_val_loss: 0.8026  time: 130s
Epoch 44 - avg_train_Score: 0.7099 avgScore: 0.8026
Epoch 44 - Save Best Score: 0.8026 Model
Epoch 44 - Save Best Loss: 0.8026 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7813(0.8026) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [45][0/542] Elapsed 0m 1s (remain 11m 28s) Loss: 0.7029(0.7029) Grad: 224482.8281  LR: 0.000044  
Epoch: [45][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5980(0.7052) Grad: 109929.7500  LR: 0.000044  
Epoch: [45][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.7059(0.7041) Grad: 100504.0312  LR: 0.000044  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7145(0.7145) 


Epoch 45 - avg_train_loss: 0.7041  avg_val_loss: 0.8026  time: 129s
Epoch 45 - avg_train_Score: 0.7041 avgScore: 0.8026


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7746(0.8026) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [46][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.5369(0.5369) Grad: 162618.2500  LR: 0.000042  
Epoch: [46][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5986(0.6941) Grad: 92705.8281  LR: 0.000042  
Epoch: [46][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6467(0.6951) Grad: 93356.7344  LR: 0.000042  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 29s) Loss: 0.7029(0.7029) 


Epoch 46 - avg_train_loss: 0.6951  avg_val_loss: 0.7976  time: 129s
Epoch 46 - avg_train_Score: 0.6951 avgScore: 0.7976
Epoch 46 - Save Best Score: 0.7976 Model
Epoch 46 - Save Best Loss: 0.7976 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7767(0.7976) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [47][0/542] Elapsed 0m 1s (remain 11m 41s) Loss: 0.6554(0.6554) Grad: 187601.5000  LR: 0.000040  
Epoch: [47][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6127(0.6947) Grad: 164992.2188  LR: 0.000040  
Epoch: [47][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6651(0.6928) Grad: 153174.7344  LR: 0.000040  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7219(0.7219) 


Epoch 47 - avg_train_loss: 0.6928  avg_val_loss: 0.7964  time: 130s
Epoch 47 - avg_train_Score: 0.6928 avgScore: 0.7964
Epoch 47 - Save Best Score: 0.7964 Model
Epoch 47 - Save Best Loss: 0.7964 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7687(0.7964) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [48][0/542] Elapsed 0m 1s (remain 11m 29s) Loss: 0.5807(0.5807) Grad: 152242.3281  LR: 0.000038  
Epoch: [48][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9085(0.6787) Grad: 152449.9688  LR: 0.000038  
Epoch: [48][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.6201(0.6807) Grad: 182140.8438  LR: 0.000038  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.6947(0.6947) 


Epoch 48 - avg_train_loss: 0.6807  avg_val_loss: 0.7947  time: 129s
Epoch 48 - avg_train_Score: 0.6807 avgScore: 0.7947
Epoch 48 - Save Best Score: 0.7947 Model
Epoch 48 - Save Best Loss: 0.7947 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7764(0.7947) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [49][0/542] Elapsed 0m 1s (remain 11m 29s) Loss: 0.5851(0.5851) Grad: 250440.5000  LR: 0.000037  
Epoch: [49][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5466(0.6846) Grad: 228215.2656  LR: 0.000037  
Epoch: [49][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6129(0.6810) Grad: 153574.7969  LR: 0.000037  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7206(0.7206) 


Epoch 49 - avg_train_loss: 0.6810  avg_val_loss: 0.7952  time: 130s
Epoch 49 - avg_train_Score: 0.6810 avgScore: 0.7952


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7859(0.7952) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [50][0/542] Elapsed 0m 1s (remain 11m 28s) Loss: 0.9230(0.9230) Grad: 207607.8281  LR: 0.000035  
Epoch: [50][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8633(0.6804) Grad: 266318.3750  LR: 0.000035  
Epoch: [50][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6406(0.6809) Grad: 208128.7188  LR: 0.000035  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.6894(0.6894) 


Epoch 50 - avg_train_loss: 0.6809  avg_val_loss: 0.7941  time: 130s
Epoch 50 - avg_train_Score: 0.6809 avgScore: 0.7941
Epoch 50 - Save Best Score: 0.7941 Model
Epoch 50 - Save Best Loss: 0.7941 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7664(0.7941) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [51][0/542] Elapsed 0m 1s (remain 11m 17s) Loss: 0.6906(0.6906) Grad: 202964.2344  LR: 0.000033  
Epoch: [51][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5294(0.6681) Grad: 168830.4375  LR: 0.000033  
Epoch: [51][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.6555(0.6712) Grad: 188479.2656  LR: 0.000033  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 29s) Loss: 0.7156(0.7156) 


Epoch 51 - avg_train_loss: 0.6712  avg_val_loss: 0.7954  time: 129s
Epoch 51 - avg_train_Score: 0.6712 avgScore: 0.7954


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7825(0.7954) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [52][0/542] Elapsed 0m 1s (remain 11m 30s) Loss: 0.9219(0.9219) Grad: 183994.6250  LR: 0.000031  
Epoch: [52][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7274(0.6817) Grad: 186679.5625  LR: 0.000031  
Epoch: [52][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.5824(0.6786) Grad: 201438.6094  LR: 0.000031  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.7342(0.7342) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8025(0.7971) 


Epoch 52 - avg_train_loss: 0.6786  avg_val_loss: 0.7971  time: 130s
Epoch 52 - avg_train_Score: 0.6786 avgScore: 0.7971
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [53][0/542] Elapsed 0m 1s (remain 13m 23s) Loss: 0.5189(0.5189) Grad: 173619.3281  LR: 0.000029  
Epoch: [53][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5748(0.6799) Grad: 169040.1094  LR: 0.000029  
Epoch: [53][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7026(0.6799) Grad: 157569.0156  LR: 0.000029  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7249(0.7249) 


Epoch 53 - avg_train_loss: 0.6799  avg_val_loss: 0.7964  time: 130s
Epoch 53 - avg_train_Score: 0.6799 avgScore: 0.7964


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8032(0.7964) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [54][0/542] Elapsed 0m 1s (remain 11m 34s) Loss: 1.0872(1.0872) Grad: 272073.2812  LR: 0.000027  
Epoch: [54][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6354(0.6666) Grad: 81793.0078  LR: 0.000027  
Epoch: [54][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.5544(0.6660) Grad: 102675.6016  LR: 0.000027  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7525(0.7525) 


Epoch 54 - avg_train_loss: 0.6660  avg_val_loss: 0.7960  time: 129s
Epoch 54 - avg_train_Score: 0.6660 avgScore: 0.7960


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7856(0.7960) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [55][0/542] Elapsed 0m 1s (remain 11m 51s) Loss: 0.4707(0.4707) Grad: 154560.6406  LR: 0.000026  
Epoch: [55][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5327(0.6818) Grad: 86058.4219  LR: 0.000026  
Epoch: [55][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8793(0.6817) Grad: 88973.2031  LR: 0.000026  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.6971(0.6971) 


Epoch 55 - avg_train_loss: 0.6817  avg_val_loss: 0.7952  time: 130s
Epoch 55 - avg_train_Score: 0.6817 avgScore: 0.7952


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7797(0.7952) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [56][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 0.5219(0.5219) Grad: 168740.2031  LR: 0.000024  
Epoch: [56][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6779(0.6621) Grad: 155593.8438  LR: 0.000024  
Epoch: [56][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5527(0.6627) Grad: 168323.4062  LR: 0.000024  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.6795(0.6795) 


Epoch 56 - avg_train_loss: 0.6627  avg_val_loss: 0.7946  time: 130s
Epoch 56 - avg_train_Score: 0.6627 avgScore: 0.7946


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7612(0.7946) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [57][0/542] Elapsed 0m 1s (remain 11m 36s) Loss: 0.5358(0.5358) Grad: 161005.9844  LR: 0.000022  
Epoch: [57][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6138(0.6700) Grad: 69439.7344  LR: 0.000022  
Epoch: [57][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6802(0.6656) Grad: 78006.2422  LR: 0.000022  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.6885(0.6885) 


Epoch 57 - avg_train_loss: 0.6656  avg_val_loss: 0.7939  time: 130s
Epoch 57 - avg_train_Score: 0.6656 avgScore: 0.7939


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7736(0.7939) 


Epoch 57 - Save Best Score: 0.7939 Model
Epoch 57 - Save Best Loss: 0.7939 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [58][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.8318(0.8318) Grad: 149631.2500  LR: 0.000021  
Epoch: [58][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7158(0.6669) Grad: 151269.5000  LR: 0.000021  
Epoch: [58][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5367(0.6652) Grad: 161762.8125  LR: 0.000021  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.6867(0.6867) 


Epoch 58 - avg_train_loss: 0.6652  avg_val_loss: 0.7941  time: 130s
Epoch 58 - avg_train_Score: 0.6652 avgScore: 0.7941


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7786(0.7941) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [59][0/542] Elapsed 0m 1s (remain 11m 34s) Loss: 0.6558(0.6558) Grad: 150792.9844  LR: 0.000019  
Epoch: [59][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.2067(0.6585) Grad: 299270.0625  LR: 0.000019  
Epoch: [59][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5724(0.6599) Grad: 173937.4531  LR: 0.000019  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7052(0.7052) 


Epoch 59 - avg_train_loss: 0.6599  avg_val_loss: 0.7933  time: 130s
Epoch 59 - avg_train_Score: 0.6599 avgScore: 0.7933
Epoch 59 - Save Best Score: 0.7933 Model
Epoch 59 - Save Best Loss: 0.7933 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7811(0.7933) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [60][0/542] Elapsed 0m 1s (remain 11m 23s) Loss: 0.5942(0.5942) Grad: 152903.7188  LR: 0.000018  
Epoch: [60][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7593(0.6550) Grad: 153067.7188  LR: 0.000018  
Epoch: [60][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5695(0.6544) Grad: 91470.9688  LR: 0.000018  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7044(0.7044) 


Epoch 60 - avg_train_loss: 0.6544  avg_val_loss: 0.7901  time: 129s
Epoch 60 - avg_train_Score: 0.6544 avgScore: 0.7901
Epoch 60 - Save Best Score: 0.7901 Model
Epoch 60 - Save Best Loss: 0.7901 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7712(0.7901) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [61][0/542] Elapsed 0m 1s (remain 11m 36s) Loss: 0.4817(0.4817) Grad: 183708.6562  LR: 0.000016  
Epoch: [61][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5806(0.6661) Grad: 76931.7578  LR: 0.000016  
Epoch: [61][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6284(0.6634) Grad: 78199.4375  LR: 0.000016  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7093(0.7093) 
EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7627(0.7929) 


Epoch 61 - avg_train_loss: 0.6634  avg_val_loss: 0.7929  time: 130s
Epoch 61 - avg_train_Score: 0.6634 avgScore: 0.7929
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [62][0/542] Elapsed 0m 1s (remain 11m 47s) Loss: 0.5279(0.5279) Grad: 146096.3438  LR: 0.000015  
Epoch: [62][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9944(0.6489) Grad: 140544.7969  LR: 0.000015  
Epoch: [62][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6852(0.6507) Grad: 287254.1875  LR: 0.000015  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7111(0.7111) 


Epoch 62 - avg_train_loss: 0.6507  avg_val_loss: 0.7940  time: 130s
Epoch 62 - avg_train_Score: 0.6507 avgScore: 0.7940


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7693(0.7940) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [63][0/542] Elapsed 0m 1s (remain 11m 22s) Loss: 0.7798(0.7798) Grad: 202612.5312  LR: 0.000013  
Epoch: [63][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5365(0.6585) Grad: 67945.3438  LR: 0.000013  
Epoch: [63][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5590(0.6570) Grad: 82826.4297  LR: 0.000013  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7053(0.7053) 


Epoch 63 - avg_train_loss: 0.6570  avg_val_loss: 0.7932  time: 130s
Epoch 63 - avg_train_Score: 0.6570 avgScore: 0.7932


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7817(0.7932) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [64][0/542] Elapsed 0m 1s (remain 11m 32s) Loss: 0.4452(0.4452) Grad: 153002.4375  LR: 0.000012  
Epoch: [64][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.8364(0.6454) Grad: 138820.0469  LR: 0.000012  
Epoch: [64][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5543(0.6458) Grad: 173600.1406  LR: 0.000012  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7102(0.7102) 


Epoch 64 - avg_train_loss: 0.6458  avg_val_loss: 0.7915  time: 130s
Epoch 64 - avg_train_Score: 0.6458 avgScore: 0.7915


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7691(0.7915) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [65][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.6410(0.6410) Grad: 175343.0312  LR: 0.000011  
Epoch: [65][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5726(0.6497) Grad: 172418.7812  LR: 0.000011  
Epoch: [65][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7954(0.6485) Grad: 138992.5156  LR: 0.000011  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.7191(0.7191) 


Epoch 65 - avg_train_loss: 0.6485  avg_val_loss: 0.7908  time: 130s
Epoch 65 - avg_train_Score: 0.6485 avgScore: 0.7908


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7793(0.7908) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [66][0/542] Elapsed 0m 1s (remain 11m 35s) Loss: 0.4488(0.4488) Grad: 151114.3594  LR: 0.000010  
Epoch: [66][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6570(0.6587) Grad: 94996.2891  LR: 0.000010  
Epoch: [66][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6041(0.6568) Grad: 71153.4141  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7184(0.7184) 


Epoch 66 - avg_train_loss: 0.6568  avg_val_loss: 0.7898  time: 130s
Epoch 66 - avg_train_Score: 0.6568 avgScore: 0.7898
Epoch 66 - Save Best Score: 0.7898 Model
Epoch 66 - Save Best Loss: 0.7898 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7659(0.7898) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [67][0/542] Elapsed 0m 1s (remain 12m 40s) Loss: 0.7458(0.7458) Grad: 213178.2812  LR: 0.000009  
Epoch: [67][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7965(0.6549) Grad: 95235.9766  LR: 0.000009  
Epoch: [67][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1171(0.6537) Grad: 73431.3984  LR: 0.000009  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7326(0.7326) 


Epoch 67 - avg_train_loss: 0.6537  avg_val_loss: 0.7908  time: 130s
Epoch 67 - avg_train_Score: 0.6537 avgScore: 0.7908


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7685(0.7908) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [68][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 0.7163(0.7163) Grad: 132684.7656  LR: 0.000008  
Epoch: [68][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5096(0.6454) Grad: 141485.6875  LR: 0.000008  
Epoch: [68][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5430(0.6486) Grad: 123382.0156  LR: 0.000008  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7156(0.7156) 


Epoch 68 - avg_train_loss: 0.6486  avg_val_loss: 0.7911  time: 130s
Epoch 68 - avg_train_Score: 0.6486 avgScore: 0.7911


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7600(0.7911) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [69][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 0.9632(0.9632) Grad: 205501.0469  LR: 0.000007  
Epoch: [69][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8097(0.6292) Grad: 75280.8516  LR: 0.000007  
Epoch: [69][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5529(0.6301) Grad: 77324.4062  LR: 0.000007  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7128(0.7128) 


Epoch 69 - avg_train_loss: 0.6301  avg_val_loss: 0.7894  time: 130s
Epoch 69 - avg_train_Score: 0.6301 avgScore: 0.7894
Epoch 69 - Save Best Score: 0.7894 Model
Epoch 69 - Save Best Loss: 0.7894 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7636(0.7894) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [70][0/542] Elapsed 0m 1s (remain 11m 26s) Loss: 0.6599(0.6599) Grad: 148785.1562  LR: 0.000006  
Epoch: [70][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5435(0.6353) Grad: 160918.3906  LR: 0.000006  
Epoch: [70][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9118(0.6339) Grad: 175626.7500  LR: 0.000006  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 30s) Loss: 0.7158(0.7158) 


Epoch 70 - avg_train_loss: 0.6339  avg_val_loss: 0.7884  time: 130s
Epoch 70 - avg_train_Score: 0.6339 avgScore: 0.7884
Epoch 70 - Save Best Score: 0.7884 Model
Epoch 70 - Save Best Loss: 0.7884 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7645(0.7884) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [71][0/542] Elapsed 0m 1s (remain 11m 58s) Loss: 0.6265(0.6265) Grad: 153658.1250  LR: 0.000005  
Epoch: [71][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.0650(0.6401) Grad: 88112.6641  LR: 0.000005  
Epoch: [71][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8463(0.6399) Grad: 71888.3125  LR: 0.000005  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7141(0.7141) 


Epoch 71 - avg_train_loss: 0.6399  avg_val_loss: 0.7901  time: 130s
Epoch 71 - avg_train_Score: 0.6399 avgScore: 0.7901


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7734(0.7901) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [72][0/542] Elapsed 0m 1s (remain 11m 49s) Loss: 0.3945(0.3945) Grad: 140144.5156  LR: 0.000004  
Epoch: [72][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7063(0.6477) Grad: 153462.2656  LR: 0.000004  
Epoch: [72][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5503(0.6481) Grad: 154295.1406  LR: 0.000004  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7176(0.7176) 


Epoch 72 - avg_train_loss: 0.6481  avg_val_loss: 0.7897  time: 130s
Epoch 72 - avg_train_Score: 0.6481 avgScore: 0.7897


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7729(0.7897) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [73][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.5304(0.5304) Grad: 180613.4688  LR: 0.000003  
Epoch: [73][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5767(0.6415) Grad: 68916.5156  LR: 0.000003  
Epoch: [73][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7427(0.6395) Grad: 86322.1484  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7222(0.7222) 


Epoch 73 - avg_train_loss: 0.6395  avg_val_loss: 0.7894  time: 130s
Epoch 73 - avg_train_Score: 0.6395 avgScore: 0.7894


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7695(0.7894) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [74][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.5588(0.5588) Grad: 135165.6250  LR: 0.000003  
Epoch: [74][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7269(0.6443) Grad: 82281.9688  LR: 0.000003  
Epoch: [74][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5675(0.6412) Grad: 74007.9062  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7196(0.7196) 


Epoch 74 - avg_train_loss: 0.6412  avg_val_loss: 0.7889  time: 130s
Epoch 74 - avg_train_Score: 0.6412 avgScore: 0.7889


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7605(0.7889) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [75][0/542] Elapsed 0m 1s (remain 12m 38s) Loss: 0.6439(0.6439) Grad: 141671.3438  LR: 0.000002  
Epoch: [75][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5401(0.6262) Grad: 146873.0469  LR: 0.000002  
Epoch: [75][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7116(0.6250) Grad: 202665.1562  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7144(0.7144) 


Epoch 75 - avg_train_loss: 0.6250  avg_val_loss: 0.7891  time: 130s
Epoch 75 - avg_train_Score: 0.6250 avgScore: 0.7891


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7590(0.7891) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [76][0/542] Elapsed 0m 1s (remain 11m 36s) Loss: 0.6757(0.6757) Grad: 201961.2812  LR: 0.000002  
Epoch: [76][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7755(0.6339) Grad: 88649.4141  LR: 0.000002  
Epoch: [76][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5213(0.6324) Grad: 73060.1953  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7145(0.7145) 


Epoch 76 - avg_train_loss: 0.6324  avg_val_loss: 0.7884  time: 130s
Epoch 76 - avg_train_Score: 0.6324 avgScore: 0.7884


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7600(0.7884) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [77][0/542] Elapsed 0m 1s (remain 11m 31s) Loss: 0.5984(0.5984) Grad: 301640.0625  LR: 0.000001  
Epoch: [77][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5330(0.6281) Grad: 161396.0781  LR: 0.000001  
Epoch: [77][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7980(0.6312) Grad: 259933.7969  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7139(0.7139) 


Epoch 77 - avg_train_loss: 0.6312  avg_val_loss: 0.7885  time: 130s
Epoch 77 - avg_train_Score: 0.6312 avgScore: 0.7885


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7605(0.7885) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [78][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 0.7259(0.7259) Grad: 164129.5312  LR: 0.000001  
Epoch: [78][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5762(0.6435) Grad: 68556.9141  LR: 0.000001  
Epoch: [78][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4913(0.6433) Grad: 81496.0234  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7108(0.7108) 


Epoch 78 - avg_train_loss: 0.6433  avg_val_loss: 0.7893  time: 130s


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7590(0.7893) 


Epoch 78 - avg_train_Score: 0.6433 avgScore: 0.7893
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [79][0/542] Elapsed 0m 1s (remain 11m 44s) Loss: 0.7314(0.7314) Grad: 171523.1250  LR: 0.000000  
Epoch: [79][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7401(0.6356) Grad: 205157.2656  LR: 0.000000  
Epoch: [79][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5943(0.6361) Grad: 188670.2188  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7171(0.7171) 


Epoch 79 - avg_train_loss: 0.6361  avg_val_loss: 0.7883  time: 130s
Epoch 79 - avg_train_Score: 0.6361 avgScore: 0.7883
Epoch 79 - Save Best Score: 0.7883 Model
Epoch 79 - Save Best Loss: 0.7883 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7620(0.7883) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [80][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.4686(0.4686) Grad: 119953.8438  LR: 0.000000  
Epoch: [80][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.4963(0.6412) Grad: 41844.6914  LR: 0.000000  
Epoch: [80][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4602(0.6408) Grad: 41416.5547  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7136(0.7136) 


Epoch 80 - avg_train_loss: 0.6408  avg_val_loss: 0.7884  time: 130s
Epoch 80 - avg_train_Score: 0.6408 avgScore: 0.7884


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7601(0.7884) 


  check_point = torch.load(
score: 0.7884


check_point_pred shape (8675, 18)
pretrained: True


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [1][0/542] Elapsed 0m 1s (remain 12m 9s) Loss: 6.7383(6.7383) Grad: 513245.1250  LR: 0.000010  
Epoch: [1][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 5.4968(5.3430) Grad: 51434.4727  LR: 0.000010  
Epoch: [1][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 4.8214(5.3188) Grad: 70050.4297  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 5.2718(5.2718) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 3.4173(4.9286) 


Epoch 1 - avg_train_loss: 5.3188  avg_val_loss: 4.9286  time: 130s
Epoch 1 - avg_train_Score: 5.3188 avgScore: 4.9286
Epoch 1 - Save Best Score: 4.9286 Model
Epoch 1 - Save Best Loss: 4.9286 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [2][0/542] Elapsed 0m 1s (remain 13m 2s) Loss: 4.9064(4.9064) Grad: 187386.6875  LR: 0.000010  
Epoch: [2][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 5.0357(4.8503) Grad: 42296.8750  LR: 0.000010  
Epoch: [2][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 4.6655(4.8335) Grad: 283393.8438  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 4.6368(4.6368) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 3.0066(4.3271) 


Epoch 2 - avg_train_loss: 4.8335  avg_val_loss: 4.3271  time: 130s
Epoch 2 - avg_train_Score: 4.8335 avgScore: 4.3271
Epoch 2 - Save Best Score: 4.3271 Model
Epoch 2 - Save Best Loss: 4.3271 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [3][0/542] Elapsed 0m 1s (remain 12m 29s) Loss: 4.0958(4.0958) Grad: 385750.7500  LR: 0.000100  
Epoch: [3][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 3.1264(3.5379) Grad: 24626.8809  LR: 0.000100  
Epoch: [3][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.9303(3.4324) Grad: 13175.7441  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 4.8621(4.8621) 


  _warn_get_lr_called_within_step(self)
Epoch 3 - avg_train_loss: 3.4324  avg_val_loss: 4.5089  time: 130s
Epoch 3 - avg_train_Score: 3.4324 avgScore: 4.5089


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 3.1263(4.5089) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [4][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 1.7897(1.7897) Grad: 464575.0312  LR: 0.000100  
Epoch: [4][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.6357(1.6746) Grad: 140065.3125  LR: 0.000100  
Epoch: [4][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2972(1.6590) Grad: 171428.4844  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 1.6799(1.6799) 


Epoch 4 - avg_train_loss: 1.6590  avg_val_loss: 1.4372  time: 130s
Epoch 4 - avg_train_Score: 1.6590 avgScore: 1.4372


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.3334(1.4372) 


Epoch 4 - Save Best Score: 1.4372 Model
Epoch 4 - Save Best Loss: 1.4372 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [5][0/542] Elapsed 0m 1s (remain 12m 15s) Loss: 1.2627(1.2627) Grad: 263827.4688  LR: 0.000100  
Epoch: [5][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3429(1.3999) Grad: 256893.1406  LR: 0.000100  
Epoch: [5][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.3237(1.3907) Grad: 243801.2969  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 49s) Loss: 1.1988(1.1988) 


Epoch 5 - avg_train_loss: 1.3907  avg_val_loss: 1.0871  time: 130s
Epoch 5 - avg_train_Score: 1.3907 avgScore: 1.0871
Epoch 5 - Save Best Score: 1.0871 Model
Epoch 5 - Save Best Loss: 1.0871 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.1091(1.0871) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [6][0/542] Elapsed 0m 1s (remain 12m 39s) Loss: 1.4627(1.4627) Grad: 710163.0625  LR: 0.000099  
Epoch: [6][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.6102(1.2680) Grad: 273821.6562  LR: 0.000099  
Epoch: [6][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.5158(1.2675) Grad: 140048.3594  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 1.0877(1.0877) 


Epoch 6 - avg_train_loss: 1.2675  avg_val_loss: 0.9932  time: 130s
Epoch 6 - avg_train_Score: 1.2675 avgScore: 0.9932
Epoch 6 - Save Best Score: 0.9932 Model
Epoch 6 - Save Best Loss: 0.9932 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0263(0.9932) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [7][0/542] Elapsed 0m 1s (remain 12m 35s) Loss: 1.1105(1.1105) Grad: 418913.5000  LR: 0.000099  
Epoch: [7][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3498(1.1997) Grad: 184457.1875  LR: 0.000099  
Epoch: [7][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1234(1.1976) Grad: 233411.3281  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.9914(0.9914) 


Epoch 7 - avg_train_loss: 1.1976  avg_val_loss: 0.9505  time: 130s
Epoch 7 - avg_train_Score: 1.1976 avgScore: 0.9505
Epoch 7 - Save Best Score: 0.9505 Model
Epoch 7 - Save Best Loss: 0.9505 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9669(0.9505) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [8][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.9918(0.9918) Grad: 310043.5625  LR: 0.000099  
Epoch: [8][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1744(1.1386) Grad: 805488.8750  LR: 0.000099  
Epoch: [8][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9634(1.1350) Grad: 461532.5625  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.9579(0.9579) 


Epoch 8 - avg_train_loss: 1.1350  avg_val_loss: 0.9128  time: 130s
Epoch 8 - avg_train_Score: 1.1350 avgScore: 0.9128
Epoch 8 - Save Best Score: 0.9128 Model
Epoch 8 - Save Best Loss: 0.9128 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9211(0.9128) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [9][0/542] Elapsed 0m 1s (remain 12m 36s) Loss: 1.1184(1.1184) Grad: 440378.9062  LR: 0.000098  
Epoch: [9][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8738(1.1304) Grad: 212573.9688  LR: 0.000098  
Epoch: [9][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9933(1.1285) Grad: 165501.3750  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.9467(0.9467) 


Epoch 9 - avg_train_loss: 1.1285  avg_val_loss: 0.8969  time: 130s
Epoch 9 - avg_train_Score: 1.1285 avgScore: 0.8969
Epoch 9 - Save Best Score: 0.8969 Model
Epoch 9 - Save Best Loss: 0.8969 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9502(0.8969) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [10][0/542] Elapsed 0m 1s (remain 12m 19s) Loss: 0.9427(0.9427) Grad: 311477.5938  LR: 0.000098  
Epoch: [10][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3179(1.0859) Grad: 177042.9688  LR: 0.000098  
Epoch: [10][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2745(1.0886) Grad: 184210.6875  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.8778(0.8778) 


Epoch 10 - avg_train_loss: 1.0886  avg_val_loss: 0.8736  time: 130s
Epoch 10 - avg_train_Score: 1.0886 avgScore: 0.8736
Epoch 10 - Save Best Score: 0.8736 Model
Epoch 10 - Save Best Loss: 0.8736 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8725(0.8736) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [11][0/542] Elapsed 0m 1s (remain 12m 39s) Loss: 1.1058(1.1058) Grad: 306951.6875  LR: 0.000097  
Epoch: [11][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7171(1.0201) Grad: 155165.7188  LR: 0.000097  
Epoch: [11][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7906(1.0209) Grad: 168719.2969  LR: 0.000097  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8308(0.8308) 


Epoch 11 - avg_train_loss: 1.0209  avg_val_loss: 0.8505  time: 130s
Epoch 11 - avg_train_Score: 1.0209 avgScore: 0.8505
Epoch 11 - Save Best Score: 0.8505 Model
Epoch 11 - Save Best Loss: 0.8505 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8893(0.8505) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [12][0/542] Elapsed 0m 1s (remain 12m 57s) Loss: 0.9904(0.9904) Grad: 286936.5000  LR: 0.000096  
Epoch: [12][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.1063(1.0519) Grad: 168523.3281  LR: 0.000096  
Epoch: [12][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1092(1.0592) Grad: 94679.1328  LR: 0.000096  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.8501(0.8501) 


Epoch 12 - avg_train_loss: 1.0592  avg_val_loss: 0.8520  time: 130s
Epoch 12 - avg_train_Score: 1.0592 avgScore: 0.8520


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8378(0.8520) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [13][0/542] Elapsed 0m 1s (remain 11m 55s) Loss: 1.0185(1.0185) Grad: 216165.5625  LR: 0.000095  
Epoch: [13][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8757(0.9853) Grad: 165402.5000  LR: 0.000095  
Epoch: [13][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9901(0.9873) Grad: 236916.0312  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 51s) Loss: 0.8151(0.8151) 


Epoch 13 - avg_train_loss: 0.9873  avg_val_loss: 0.8249  time: 130s
Epoch 13 - avg_train_Score: 0.9873 avgScore: 0.8249
Epoch 13 - Save Best Score: 0.8249 Model
Epoch 13 - Save Best Loss: 0.8249 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8343(0.8249) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [14][0/542] Elapsed 0m 1s (remain 11m 44s) Loss: 1.2285(1.2285) Grad: 419706.3125  LR: 0.000095  
Epoch: [14][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8266(0.9510) Grad: 139708.8281  LR: 0.000095  
Epoch: [14][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6612(0.9508) Grad: 173037.5000  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.8033(0.8033) 


Epoch 14 - avg_train_loss: 0.9508  avg_val_loss: 0.8133  time: 130s
Epoch 14 - avg_train_Score: 0.9508 avgScore: 0.8133
Epoch 14 - Save Best Score: 0.8133 Model
Epoch 14 - Save Best Loss: 0.8133 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7844(0.8133) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [15][0/542] Elapsed 0m 1s (remain 11m 43s) Loss: 0.7162(0.7162) Grad: 276612.8438  LR: 0.000094  
Epoch: [15][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8450(0.9349) Grad: 151314.7344  LR: 0.000094  
Epoch: [15][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8629(0.9355) Grad: 170431.2344  LR: 0.000094  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8206(0.8206) 


Epoch 15 - avg_train_loss: 0.9355  avg_val_loss: 0.8053  time: 130s
Epoch 15 - avg_train_Score: 0.9355 avgScore: 0.8053
Epoch 15 - Save Best Score: 0.8053 Model
Epoch 15 - Save Best Loss: 0.8053 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7971(0.8053) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [16][0/542] Elapsed 0m 1s (remain 13m 17s) Loss: 0.9417(0.9417) Grad: 362966.6875  LR: 0.000093  
Epoch: [16][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9211(0.9052) Grad: 123880.0781  LR: 0.000093  
Epoch: [16][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8238(0.9072) Grad: 138989.9219  LR: 0.000093  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8258(0.8258) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7882(0.8043) 


Epoch 16 - avg_train_loss: 0.9072  avg_val_loss: 0.8043  time: 130s
Epoch 16 - avg_train_Score: 0.9072 avgScore: 0.8043
Epoch 16 - Save Best Score: 0.8043 Model
Epoch 16 - Save Best Loss: 0.8043 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [17][0/542] Elapsed 0m 1s (remain 11m 39s) Loss: 0.8850(0.8850) Grad: 273429.4375  LR: 0.000092  
Epoch: [17][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8083(0.9189) Grad: 75444.7500  LR: 0.000092  
Epoch: [17][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1285(0.9225) Grad: 89762.2266  LR: 0.000092  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.8249(0.8249) 


Epoch 17 - avg_train_loss: 0.9225  avg_val_loss: 0.8027  time: 130s
Epoch 17 - avg_train_Score: 0.9225 avgScore: 0.8027
Epoch 17 - Save Best Score: 0.8027 Model
Epoch 17 - Save Best Loss: 0.8027 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7739(0.8027) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [18][0/542] Elapsed 0m 1s (remain 11m 50s) Loss: 1.0694(1.0694) Grad: 218785.6406  LR: 0.000091  
Epoch: [18][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8248(0.9087) Grad: 73301.6875  LR: 0.000091  
Epoch: [18][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9498(0.9112) Grad: 107715.2969  LR: 0.000091  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7939(0.7939) 


Epoch 18 - avg_train_loss: 0.9112  avg_val_loss: 0.7985  time: 130s
Epoch 18 - avg_train_Score: 0.9112 avgScore: 0.7985
Epoch 18 - Save Best Score: 0.7985 Model
Epoch 18 - Save Best Loss: 0.7985 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7917(0.7985) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [19][0/542] Elapsed 0m 1s (remain 11m 38s) Loss: 1.3428(1.3428) Grad: 339069.3438  LR: 0.000089  
Epoch: [19][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.5500(0.8767) Grad: 94062.7109  LR: 0.000089  
Epoch: [19][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8481(0.8762) Grad: 116768.6797  LR: 0.000089  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7754(0.7754) 


Epoch 19 - avg_train_loss: 0.8762  avg_val_loss: 0.7877  time: 130s
Epoch 19 - avg_train_Score: 0.8762 avgScore: 0.7877
Epoch 19 - Save Best Score: 0.7877 Model
Epoch 19 - Save Best Loss: 0.7877 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7523(0.7877) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [20][0/542] Elapsed 0m 1s (remain 12m 34s) Loss: 0.7123(0.7123) Grad: 218506.4844  LR: 0.000088  
Epoch: [20][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7656(0.8550) Grad: 70960.7031  LR: 0.000088  
Epoch: [20][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8654(0.8607) Grad: 105981.5078  LR: 0.000088  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7765(0.7765) 
EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7593(0.7820) 


Epoch 20 - avg_train_loss: 0.8607  avg_val_loss: 0.7820  time: 130s
Epoch 20 - avg_train_Score: 0.8607 avgScore: 0.7820
Epoch 20 - Save Best Score: 0.7820 Model
Epoch 20 - Save Best Loss: 0.7820 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [21][0/542] Elapsed 0m 1s (remain 12m 44s) Loss: 0.7602(0.7602) Grad: 238329.1719  LR: 0.000087  
Epoch: [21][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7712(0.8628) Grad: 76403.5703  LR: 0.000087  
Epoch: [21][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9380(0.8667) Grad: 56622.2070  LR: 0.000087  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7492(0.7492) 


Epoch 21 - avg_train_loss: 0.8667  avg_val_loss: 0.7822  time: 130s
Epoch 21 - avg_train_Score: 0.8667 avgScore: 0.7822


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7822(0.7822) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [22][0/542] Elapsed 0m 1s (remain 12m 45s) Loss: 1.1300(1.1300) Grad: 222730.8438  LR: 0.000085  
Epoch: [22][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7704(0.8223) Grad: 116355.8438  LR: 0.000085  
Epoch: [22][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7131(0.8202) Grad: 89342.7422  LR: 0.000085  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7540(0.7540) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7998(0.7789) 


Epoch 22 - avg_train_loss: 0.8202  avg_val_loss: 0.7789  time: 130s
Epoch 22 - avg_train_Score: 0.8202 avgScore: 0.7789
Epoch 22 - Save Best Score: 0.7789 Model
Epoch 22 - Save Best Loss: 0.7789 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [23][0/542] Elapsed 0m 1s (remain 12m 34s) Loss: 1.0276(1.0276) Grad: 212072.7656  LR: 0.000084  
Epoch: [23][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7440(0.8023) Grad: 683413.5000  LR: 0.000084  
Epoch: [23][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8538(0.8052) Grad: 106352.3516  LR: 0.000084  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7822(0.7822) 


Epoch 23 - avg_train_loss: 0.8052  avg_val_loss: 0.7763  time: 130s
Epoch 23 - avg_train_Score: 0.8052 avgScore: 0.7763
Epoch 23 - Save Best Score: 0.7763 Model
Epoch 23 - Save Best Loss: 0.7763 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7865(0.7763) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [24][0/542] Elapsed 0m 1s (remain 12m 27s) Loss: 0.9300(0.9300) Grad: 253766.6875  LR: 0.000083  
Epoch: [24][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6230(0.7814) Grad: 190068.1719  LR: 0.000083  
Epoch: [24][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6710(0.7826) Grad: 257777.5625  LR: 0.000083  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7676(0.7676) 


Epoch 24 - avg_train_loss: 0.7826  avg_val_loss: 0.7744  time: 130s
Epoch 24 - avg_train_Score: 0.7826 avgScore: 0.7744
Epoch 24 - Save Best Score: 0.7744 Model
Epoch 24 - Save Best Loss: 0.7744 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7593(0.7744) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [25][0/542] Elapsed 0m 1s (remain 12m 24s) Loss: 0.6625(0.6625) Grad: 156488.9844  LR: 0.000081  
Epoch: [25][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.0154(0.7678) Grad: 160413.6562  LR: 0.000081  
Epoch: [25][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6598(0.7670) Grad: 191580.5625  LR: 0.000081  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 51s) Loss: 0.7629(0.7629) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7453(0.7697) 


Epoch 25 - avg_train_loss: 0.7670  avg_val_loss: 0.7697  time: 130s
Epoch 25 - avg_train_Score: 0.7670 avgScore: 0.7697
Epoch 25 - Save Best Score: 0.7697 Model
Epoch 25 - Save Best Loss: 0.7697 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [26][0/542] Elapsed 0m 1s (remain 12m 26s) Loss: 0.6609(0.6609) Grad: 219308.0625  LR: 0.000079  
Epoch: [26][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7209(0.7846) Grad: 100996.2578  LR: 0.000079  
Epoch: [26][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6983(0.7843) Grad: 211804.8906  LR: 0.000079  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7568(0.7568) 


Epoch 26 - avg_train_loss: 0.7843  avg_val_loss: 0.7706  time: 130s
Epoch 26 - avg_train_Score: 0.7843 avgScore: 0.7706


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7471(0.7706) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [27][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 0.7769(0.7769) Grad: 174706.5938  LR: 0.000078  
Epoch: [27][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8671(0.7874) Grad: 100305.5000  LR: 0.000078  
Epoch: [27][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0039(0.7917) Grad: 154706.4688  LR: 0.000078  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.7623(0.7623) 


Epoch 27 - avg_train_loss: 0.7917  avg_val_loss: 0.7706  time: 130s
Epoch 27 - avg_train_Score: 0.7917 avgScore: 0.7706


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7755(0.7706) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [28][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.7630(0.7630) Grad: 178288.9219  LR: 0.000076  
Epoch: [28][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9172(0.7899) Grad: 124414.1797  LR: 0.000076  
Epoch: [28][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8762(0.7916) Grad: 98950.5078  LR: 0.000076  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7541(0.7541) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7630(0.7661) 


Epoch 28 - avg_train_loss: 0.7916  avg_val_loss: 0.7661  time: 130s
Epoch 28 - avg_train_Score: 0.7916 avgScore: 0.7661
Epoch 28 - Save Best Score: 0.7661 Model
Epoch 28 - Save Best Loss: 0.7661 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [29][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 1.0165(1.0165) Grad: 347039.0625  LR: 0.000075  
Epoch: [29][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9146(0.7861) Grad: 59496.8047  LR: 0.000075  
Epoch: [29][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9466(0.7918) Grad: 62400.5586  LR: 0.000075  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7534(0.7534) 


Epoch 29 - avg_train_loss: 0.7918  avg_val_loss: 0.7636  time: 130s


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7346(0.7636) 


Epoch 29 - avg_train_Score: 0.7918 avgScore: 0.7636
Epoch 29 - Save Best Score: 0.7636 Model
Epoch 29 - Save Best Loss: 0.7636 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [30][0/542] Elapsed 0m 1s (remain 11m 58s) Loss: 0.7654(0.7654) Grad: 233436.7656  LR: 0.000073  
Epoch: [30][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6443(0.7905) Grad: 102710.2344  LR: 0.000073  
Epoch: [30][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7598(0.7890) Grad: 132432.8438  LR: 0.000073  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7482(0.7482) 


Epoch 30 - avg_train_loss: 0.7890  avg_val_loss: 0.7606  time: 130s
Epoch 30 - avg_train_Score: 0.7890 avgScore: 0.7606
Epoch 30 - Save Best Score: 0.7606 Model
Epoch 30 - Save Best Loss: 0.7606 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7392(0.7606) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [31][0/542] Elapsed 0m 1s (remain 12m 0s) Loss: 0.9256(0.9256) Grad: 200832.0625  LR: 0.000071  
Epoch: [31][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6334(0.7662) Grad: 86128.2266  LR: 0.000071  
Epoch: [31][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6595(0.7640) Grad: 124419.1250  LR: 0.000071  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7448(0.7448) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7835(0.7615) 


Epoch 31 - avg_train_loss: 0.7640  avg_val_loss: 0.7615  time: 130s
Epoch 31 - avg_train_Score: 0.7640 avgScore: 0.7615
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [32][0/542] Elapsed 0m 1s (remain 12m 17s) Loss: 1.3978(1.3978) Grad: 186856.1562  LR: 0.000069  
Epoch: [32][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6506(0.7628) Grad: 51245.3711  LR: 0.000069  
Epoch: [32][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7692(0.7696) Grad: 74451.9219  LR: 0.000069  
EVAL: [0/68] Elapsed 0m 1s (remain 2m 0s) Loss: 0.7498(0.7498) 


Epoch 32 - avg_train_loss: 0.7696  avg_val_loss: 0.7589  time: 130s
Epoch 32 - avg_train_Score: 0.7696 avgScore: 0.7589
Epoch 32 - Save Best Score: 0.7589 Model
Epoch 32 - Save Best Loss: 0.7589 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7772(0.7589) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [33][0/542] Elapsed 0m 1s (remain 12m 20s) Loss: 1.0841(1.0841) Grad: 190599.0000  LR: 0.000067  
Epoch: [33][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6370(0.7874) Grad: 55437.7070  LR: 0.000067  
Epoch: [33][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7805(0.7877) Grad: 50756.9375  LR: 0.000067  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7429(0.7429) 


Epoch 33 - avg_train_loss: 0.7877  avg_val_loss: 0.7615  time: 130s
Epoch 33 - avg_train_Score: 0.7877 avgScore: 0.7615


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7589(0.7615) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [34][0/542] Elapsed 0m 1s (remain 12m 34s) Loss: 0.8940(0.8940) Grad: 230351.2344  LR: 0.000066  
Epoch: [34][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7640(0.7557) Grad: 48187.9258  LR: 0.000066  
Epoch: [34][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6392(0.7569) Grad: 41343.4883  LR: 0.000066  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7504(0.7504) 


Epoch 34 - avg_train_loss: 0.7569  avg_val_loss: 0.7605  time: 130s
Epoch 34 - avg_train_Score: 0.7569 avgScore: 0.7605


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7555(0.7605) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [35][0/542] Elapsed 0m 1s (remain 12m 45s) Loss: 0.6909(0.6909) Grad: 242621.9375  LR: 0.000064  
Epoch: [35][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5469(0.7529) Grad: 43616.9492  LR: 0.000064  
Epoch: [35][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7217(0.7516) Grad: 49406.6758  LR: 0.000064  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7223(0.7223) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7503(0.7571) 


Epoch 35 - avg_train_loss: 0.7516  avg_val_loss: 0.7571  time: 130s
Epoch 35 - avg_train_Score: 0.7516 avgScore: 0.7571
Epoch 35 - Save Best Score: 0.7571 Model
Epoch 35 - Save Best Loss: 0.7571 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [36][0/542] Elapsed 0m 1s (remain 12m 19s) Loss: 0.7752(0.7752) Grad: 186172.5625  LR: 0.000062  
Epoch: [36][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6385(0.7345) Grad: 90448.8906  LR: 0.000062  
Epoch: [36][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6961(0.7326) Grad: 91141.7344  LR: 0.000062  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 47s) Loss: 0.7247(0.7247) 


Epoch 36 - avg_train_loss: 0.7326  avg_val_loss: 0.7531  time: 130s
Epoch 36 - avg_train_Score: 0.7326 avgScore: 0.7531
Epoch 36 - Save Best Score: 0.7531 Model
Epoch 36 - Save Best Loss: 0.7531 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7369(0.7531) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [37][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.6857(0.6857) Grad: 171834.0312  LR: 0.000060  
Epoch: [37][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5000(0.7255) Grad: 76999.3203  LR: 0.000060  
Epoch: [37][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8318(0.7263) Grad: 79775.1172  LR: 0.000060  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 46s) Loss: 0.7158(0.7158) 


Epoch 37 - avg_train_loss: 0.7263  avg_val_loss: 0.7499  time: 130s
Epoch 37 - avg_train_Score: 0.7263 avgScore: 0.7499
Epoch 37 - Save Best Score: 0.7499 Model
Epoch 37 - Save Best Loss: 0.7499 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7394(0.7499) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [38][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.6445(0.6445) Grad: 170396.7812  LR: 0.000058  
Epoch: [38][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7170(0.8055) Grad: 19751.8125  LR: 0.000058  
Epoch: [38][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8236(0.8034) Grad: 11883.9932  LR: 0.000058  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7344(0.7344) 


Epoch 38 - avg_train_loss: 0.8034  avg_val_loss: 0.7633  time: 130s
Epoch 38 - avg_train_Score: 0.8034 avgScore: 0.7633


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7588(0.7633) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [39][0/542] Elapsed 0m 1s (remain 12m 4s) Loss: 0.8073(0.8073) Grad: 191934.3750  LR: 0.000056  
Epoch: [39][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5948(0.7601) Grad: 43267.7617  LR: 0.000056  
Epoch: [39][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2238(0.7630) Grad: 38095.1562  LR: 0.000056  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7547(0.7547) 


Epoch 39 - avg_train_loss: 0.7630  avg_val_loss: 0.7634  time: 130s
Epoch 39 - avg_train_Score: 0.7630 avgScore: 0.7634


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7558(0.7634) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [40][0/542] Elapsed 0m 1s (remain 12m 7s) Loss: 0.9031(0.9031) Grad: 197377.4219  LR: 0.000054  
Epoch: [40][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9725(0.7336) Grad: 161696.3281  LR: 0.000054  
Epoch: [40][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6773(0.7324) Grad: 247712.3125  LR: 0.000054  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7656(0.7656) 


Epoch 40 - avg_train_loss: 0.7324  avg_val_loss: 0.7553  time: 130s
Epoch 40 - avg_train_Score: 0.7324 avgScore: 0.7553


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7445(0.7553) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [41][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.6472(0.6472) Grad: 189942.8125  LR: 0.000052  
Epoch: [41][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5366(0.7222) Grad: 47979.8984  LR: 0.000052  
Epoch: [41][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8486(0.7241) Grad: 45453.9414  LR: 0.000052  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7689(0.7689) 


Epoch 41 - avg_train_loss: 0.7241  avg_val_loss: 0.7526  time: 130s
Epoch 41 - avg_train_Score: 0.7241 avgScore: 0.7526


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7316(0.7526) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [42][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 0.5431(0.5431) Grad: 176361.3594  LR: 0.000050  
Epoch: [42][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6516(0.7321) Grad: 49642.9297  LR: 0.000050  
Epoch: [42][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6688(0.7311) Grad: 71693.4219  LR: 0.000050  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7509(0.7509) 


Epoch 42 - avg_train_loss: 0.7311  avg_val_loss: 0.7528  time: 130s
Epoch 42 - avg_train_Score: 0.7311 avgScore: 0.7528


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7310(0.7528) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [43][0/542] Elapsed 0m 1s (remain 12m 39s) Loss: 0.6071(0.6071) Grad: 138697.1094  LR: 0.000048  
Epoch: [43][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7997(0.7028) Grad: 42392.2383  LR: 0.000048  
Epoch: [43][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6749(0.7022) Grad: 47586.9375  LR: 0.000048  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7332(0.7332) 


Epoch 43 - avg_train_loss: 0.7022  avg_val_loss: 0.7498  time: 130s
Epoch 43 - avg_train_Score: 0.7022 avgScore: 0.7498
Epoch 43 - Save Best Score: 0.7498 Model
Epoch 43 - Save Best Loss: 0.7498 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7446(0.7498) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [44][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.5033(0.5033) Grad: 175898.9844  LR: 0.000046  
Epoch: [44][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5986(0.7053) Grad: 47452.3516  LR: 0.000046  
Epoch: [44][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5322(0.7033) Grad: 43883.5391  LR: 0.000046  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7354(0.7354) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7326(0.7485) 


Epoch 44 - avg_train_loss: 0.7033  avg_val_loss: 0.7485  time: 130s
Epoch 44 - avg_train_Score: 0.7033 avgScore: 0.7485
Epoch 44 - Save Best Score: 0.7485 Model
Epoch 44 - Save Best Loss: 0.7485 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [45][0/542] Elapsed 0m 1s (remain 12m 17s) Loss: 0.7434(0.7434) Grad: 171141.1406  LR: 0.000044  
Epoch: [45][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6691(0.6915) Grad: 114255.1797  LR: 0.000044  
Epoch: [45][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6190(0.6918) Grad: 73637.5938  LR: 0.000044  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7221(0.7221) 


Epoch 45 - avg_train_loss: 0.6918  avg_val_loss: 0.7468  time: 130s
Epoch 45 - avg_train_Score: 0.6918 avgScore: 0.7468
Epoch 45 - Save Best Score: 0.7468 Model
Epoch 45 - Save Best Loss: 0.7468 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7371(0.7468) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [46][0/542] Elapsed 0m 1s (remain 12m 27s) Loss: 0.8290(0.8290) Grad: 138551.4219  LR: 0.000042  
Epoch: [46][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0957(0.7122) Grad: 42688.1250  LR: 0.000042  
Epoch: [46][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6567(0.7168) Grad: 54478.4805  LR: 0.000042  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7382(0.7382) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7520(0.7445) 


Epoch 46 - avg_train_loss: 0.7168  avg_val_loss: 0.7445  time: 130s
Epoch 46 - avg_train_Score: 0.7168 avgScore: 0.7445
Epoch 46 - Save Best Score: 0.7445 Model
Epoch 46 - Save Best Loss: 0.7445 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [47][0/542] Elapsed 0m 1s (remain 12m 35s) Loss: 0.5824(0.5824) Grad: 157592.9062  LR: 0.000040  
Epoch: [47][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5365(0.6997) Grad: 101879.4688  LR: 0.000040  
Epoch: [47][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8552(0.6971) Grad: 84515.3047  LR: 0.000040  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7281(0.7281) 


Epoch 47 - avg_train_loss: 0.6971  avg_val_loss: 0.7481  time: 130s
Epoch 47 - avg_train_Score: 0.6971 avgScore: 0.7481


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7457(0.7481) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [48][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.6265(0.6265) Grad: 155639.8906  LR: 0.000038  
Epoch: [48][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5707(0.6848) Grad: 78831.5547  LR: 0.000038  
Epoch: [48][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5609(0.6840) Grad: 77634.7812  LR: 0.000038  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7091(0.7091) 


Epoch 48 - avg_train_loss: 0.6840  avg_val_loss: 0.7452  time: 130s
Epoch 48 - avg_train_Score: 0.6840 avgScore: 0.7452


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7407(0.7452) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [49][0/542] Elapsed 0m 1s (remain 11m 48s) Loss: 0.6679(0.6679) Grad: 190433.1875  LR: 0.000037  
Epoch: [49][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5875(0.6936) Grad: 100563.9375  LR: 0.000037  
Epoch: [49][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6625(0.6907) Grad: 77704.6875  LR: 0.000037  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7161(0.7161) 


Epoch 49 - avg_train_loss: 0.6907  avg_val_loss: 0.7414  time: 130s
Epoch 49 - avg_train_Score: 0.6907 avgScore: 0.7414
Epoch 49 - Save Best Score: 0.7414 Model
Epoch 49 - Save Best Loss: 0.7414 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7465(0.7414) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [50][0/542] Elapsed 0m 1s (remain 12m 24s) Loss: 0.6566(0.6566) Grad: 138999.8438  LR: 0.000035  
Epoch: [50][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7306(0.7128) Grad: 56093.3633  LR: 0.000035  
Epoch: [50][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7243(0.7126) Grad: 45261.4023  LR: 0.000035  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7493(0.7493) 


Epoch 50 - avg_train_loss: 0.7126  avg_val_loss: 0.7500  time: 130s
Epoch 50 - avg_train_Score: 0.7126 avgScore: 0.7500


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7561(0.7500) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [51][0/542] Elapsed 0m 1s (remain 12m 45s) Loss: 0.6087(0.6087) Grad: 211978.0938  LR: 0.000033  
Epoch: [51][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6084(0.6868) Grad: 356818.0312  LR: 0.000033  
Epoch: [51][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6676(0.6871) Grad: 83691.7344  LR: 0.000033  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7623(0.7623) 


Epoch 51 - avg_train_loss: 0.6871  avg_val_loss: 0.7474  time: 130s
Epoch 51 - avg_train_Score: 0.6871 avgScore: 0.7474


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7545(0.7474) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [52][0/542] Elapsed 0m 1s (remain 11m 54s) Loss: 0.6112(0.6112) Grad: 178425.1719  LR: 0.000031  
Epoch: [52][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.4759(0.6861) Grad: 75870.9062  LR: 0.000031  
Epoch: [52][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5717(0.6848) Grad: 91826.2266  LR: 0.000031  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7356(0.7356) 


Epoch 52 - avg_train_loss: 0.6848  avg_val_loss: 0.7460  time: 130s
Epoch 52 - avg_train_Score: 0.6848 avgScore: 0.7460


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7356(0.7460) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [53][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.4899(0.4899) Grad: 161878.6562  LR: 0.000029  
Epoch: [53][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5916(0.6889) Grad: 20623.7422  LR: 0.000029  
Epoch: [53][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7393(0.6905) Grad: 23247.2344  LR: 0.000029  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7345(0.7345) 


Epoch 53 - avg_train_loss: 0.6905  avg_val_loss: 0.7460  time: 130s
Epoch 53 - avg_train_Score: 0.6905 avgScore: 0.7460


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7463(0.7460) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [54][0/542] Elapsed 0m 1s (remain 11m 54s) Loss: 0.6330(0.6330) Grad: 159610.1875  LR: 0.000027  
Epoch: [54][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5438(0.7008) Grad: 8086.1528  LR: 0.000027  
Epoch: [54][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6915(0.7053) Grad: 9639.7607  LR: 0.000027  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7429(0.7429) 


Epoch 54 - avg_train_loss: 0.7053  avg_val_loss: 0.7432  time: 130s
Epoch 54 - avg_train_Score: 0.7053 avgScore: 0.7432


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7334(0.7432) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [55][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.9627(0.9627) Grad: 197179.6094  LR: 0.000026  
Epoch: [55][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7180(0.6759) Grad: 31192.0078  LR: 0.000026  
Epoch: [55][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5521(0.6767) Grad: 17436.5312  LR: 0.000026  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7440(0.7440) 


Epoch 55 - avg_train_loss: 0.6767  avg_val_loss: 0.7420  time: 130s


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7456(0.7420) 


Epoch 55 - avg_train_Score: 0.6767 avgScore: 0.7420
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [56][0/542] Elapsed 0m 1s (remain 12m 16s) Loss: 0.8439(0.8439) Grad: 171463.1250  LR: 0.000024  
Epoch: [56][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5536(0.6796) Grad: 8570.7559  LR: 0.000024  
Epoch: [56][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6261(0.6777) Grad: 9552.4600  LR: 0.000024  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.7378(0.7378) 


Epoch 56 - avg_train_loss: 0.6777  avg_val_loss: 0.7416  time: 130s
Epoch 56 - avg_train_Score: 0.6777 avgScore: 0.7416


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7411(0.7416) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [57][0/542] Elapsed 0m 1s (remain 11m 49s) Loss: 0.6320(0.6320) Grad: 480926.5625  LR: 0.000022  
Epoch: [57][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9335(0.6728) Grad: 49001.8320  LR: 0.000022  
Epoch: [57][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6804(0.6738) Grad: 46748.4883  LR: 0.000022  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7417(0.7417) 


Epoch 57 - avg_train_loss: 0.6738  avg_val_loss: 0.7430  time: 130s
Epoch 57 - avg_train_Score: 0.6738 avgScore: 0.7430


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7334(0.7430) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [58][0/542] Elapsed 0m 1s (remain 11m 49s) Loss: 0.5951(0.5951) Grad: 156803.0781  LR: 0.000021  
Epoch: [58][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6574(0.6760) Grad: 20713.0898  LR: 0.000021  
Epoch: [58][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9429(0.6777) Grad: 256958.3750  LR: 0.000021  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7329(0.7329) 


Epoch 58 - avg_train_loss: 0.6777  avg_val_loss: 0.7409  time: 130s
Epoch 58 - avg_train_Score: 0.6777 avgScore: 0.7409
Epoch 58 - Save Best Score: 0.7409 Model
Epoch 58 - Save Best Loss: 0.7409 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7366(0.7409) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [59][0/542] Elapsed 0m 1s (remain 11m 52s) Loss: 0.7450(0.7450) Grad: 242526.1094  LR: 0.000019  
Epoch: [59][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5338(0.6694) Grad: 20096.9570  LR: 0.000019  
Epoch: [59][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5080(0.6675) Grad: 18530.4746  LR: 0.000019  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7307(0.7307) 


Epoch 59 - avg_train_loss: 0.6675  avg_val_loss: 0.7400  time: 130s
Epoch 59 - avg_train_Score: 0.6675 avgScore: 0.7400
Epoch 59 - Save Best Score: 0.7400 Model
Epoch 59 - Save Best Loss: 0.7400 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7434(0.7400) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [60][0/542] Elapsed 0m 1s (remain 12m 25s) Loss: 0.6234(0.6234) Grad: 144237.7188  LR: 0.000018  
Epoch: [60][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6805(0.6627) Grad: 53810.1484  LR: 0.000018  
Epoch: [60][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7490(0.6621) Grad: 272040.3750  LR: 0.000018  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7295(0.7295) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7461(0.7409) 


Epoch 60 - avg_train_loss: 0.6621  avg_val_loss: 0.7409  time: 130s
Epoch 60 - avg_train_Score: 0.6621 avgScore: 0.7409
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [61][0/542] Elapsed 0m 1s (remain 12m 5s) Loss: 0.6015(0.6015) Grad: 155895.7656  LR: 0.000016  
Epoch: [61][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6317(0.6717) Grad: 37088.5703  LR: 0.000016  
Epoch: [61][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7386(0.6701) Grad: 21732.5938  LR: 0.000016  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7160(0.7160) 


Epoch 61 - avg_train_loss: 0.6701  avg_val_loss: 0.7419  time: 130s
Epoch 61 - avg_train_Score: 0.6701 avgScore: 0.7419


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7604(0.7419) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [62][0/542] Elapsed 0m 1s (remain 13m 14s) Loss: 1.0467(1.0467) Grad: 166790.6875  LR: 0.000015  
Epoch: [62][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6238(0.6908) Grad: 12115.8447  LR: 0.000015  
Epoch: [62][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5219(0.6900) Grad: 8532.1680  LR: 0.000015  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7198(0.7198) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7708(0.7398) 


Epoch 62 - avg_train_loss: 0.6900  avg_val_loss: 0.7398  time: 130s
Epoch 62 - avg_train_Score: 0.6900 avgScore: 0.7398
Epoch 62 - Save Best Score: 0.7398 Model
Epoch 62 - Save Best Loss: 0.7398 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [63][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.7788(0.7788) Grad: 141071.4219  LR: 0.000013  
Epoch: [63][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5777(0.6655) Grad: 29072.6797  LR: 0.000013  
Epoch: [63][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6601(0.6684) Grad: 20174.4004  LR: 0.000013  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7239(0.7239) 


Epoch 63 - avg_train_loss: 0.6684  avg_val_loss: 0.7374  time: 130s
Epoch 63 - avg_train_Score: 0.6684 avgScore: 0.7374
Epoch 63 - Save Best Score: 0.7374 Model
Epoch 63 - Save Best Loss: 0.7374 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7452(0.7374) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [64][0/542] Elapsed 0m 1s (remain 12m 11s) Loss: 0.7208(0.7208) Grad: 200597.5781  LR: 0.000012  
Epoch: [64][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5469(0.6502) Grad: 10064.4346  LR: 0.000012  
Epoch: [64][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6818(0.6543) Grad: 39951.2148  LR: 0.000012  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7120(0.7120) 


Epoch 64 - avg_train_loss: 0.6543  avg_val_loss: 0.7389  time: 130s
Epoch 64 - avg_train_Score: 0.6543 avgScore: 0.7389


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7465(0.7389) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [65][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.7035(0.7035) Grad: nan  LR: 0.000011  
Epoch: [65][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6475(0.6459) Grad: 18740.3379  LR: 0.000011  
Epoch: [65][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5694(0.6518) Grad: 427041.1562  LR: 0.000011  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7140(0.7140) 


Epoch 65 - avg_train_loss: 0.6518  avg_val_loss: 0.7396  time: 130s
Epoch 65 - avg_train_Score: 0.6518 avgScore: 0.7396


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7466(0.7396) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [66][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.5787(0.5787) Grad: 202704.8594  LR: 0.000010  
Epoch: [66][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5009(0.6512) Grad: 20135.9824  LR: 0.000010  
Epoch: [66][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6610(0.6519) Grad: 19589.6074  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7107(0.7107) 


Epoch 66 - avg_train_loss: 0.6519  avg_val_loss: 0.7401  time: 130s
Epoch 66 - avg_train_Score: 0.6519 avgScore: 0.7401


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7442(0.7401) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [67][0/542] Elapsed 0m 1s (remain 14m 6s) Loss: 0.6905(0.6905) Grad: 136745.4844  LR: 0.000009  
Epoch: [67][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5368(0.6555) Grad: 8166.2671  LR: 0.000009  
Epoch: [67][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8988(0.6573) Grad: 9820.8174  LR: 0.000009  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7093(0.7093) 


Epoch 67 - avg_train_loss: 0.6573  avg_val_loss: 0.7390  time: 130s
Epoch 67 - avg_train_Score: 0.6573 avgScore: 0.7390


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7444(0.7390) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [68][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.5683(0.5683) Grad: 128761.1484  LR: 0.000008  
Epoch: [68][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6981(0.6583) Grad: 231852.9375  LR: 0.000008  
Epoch: [68][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5420(0.6593) Grad: 15828.4600  LR: 0.000008  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7100(0.7100) 


Epoch 68 - avg_train_loss: 0.6593  avg_val_loss: 0.7389  time: 130s
Epoch 68 - avg_train_Score: 0.6593 avgScore: 0.7389


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7497(0.7389) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [69][0/542] Elapsed 0m 1s (remain 11m 58s) Loss: 0.5593(0.5593) Grad: 137462.4531  LR: 0.000007  
Epoch: [69][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.4792(0.6491) Grad: 16433.6582  LR: 0.000007  
Epoch: [69][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7291(0.6536) Grad: 8994.9766  LR: 0.000007  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7111(0.7111) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7512(0.7380) 


Epoch 69 - avg_train_loss: 0.6536  avg_val_loss: 0.7380  time: 130s
Epoch 69 - avg_train_Score: 0.6536 avgScore: 0.7380
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [70][0/542] Elapsed 0m 1s (remain 12m 4s) Loss: 0.6715(0.6715) Grad: 175196.5938  LR: 0.000006  
Epoch: [70][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9253(0.6487) Grad: 17690.8809  LR: 0.000006  
Epoch: [70][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7034(0.6503) Grad: 19196.2988  LR: 0.000006  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7085(0.7085) 


Epoch 70 - avg_train_loss: 0.6503  avg_val_loss: 0.7379  time: 130s
Epoch 70 - avg_train_Score: 0.6503 avgScore: 0.7379


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7572(0.7379) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [71][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.7222(0.7222) Grad: 140942.3906  LR: 0.000005  
Epoch: [71][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6561(0.6374) Grad: 20629.3047  LR: 0.000005  
Epoch: [71][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5228(0.6416) Grad: 19160.9141  LR: 0.000005  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7087(0.7087) 


Epoch 71 - avg_train_loss: 0.6416  avg_val_loss: 0.7394  time: 130s
Epoch 71 - avg_train_Score: 0.6416 avgScore: 0.7394


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7550(0.7394) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [72][0/542] Elapsed 0m 1s (remain 12m 43s) Loss: 0.5676(0.5676) Grad: 143460.4688  LR: 0.000004  
Epoch: [72][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5454(0.6581) Grad: 9553.4062  LR: 0.000004  
Epoch: [72][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5360(0.6578) Grad: 8608.7676  LR: 0.000004  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7055(0.7055) 


Epoch 72 - avg_train_loss: 0.6578  avg_val_loss: 0.7371  time: 130s
Epoch 72 - avg_train_Score: 0.6578 avgScore: 0.7371
Epoch 72 - Save Best Score: 0.7371 Model
Epoch 72 - Save Best Loss: 0.7371 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7553(0.7371) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [73][0/542] Elapsed 0m 1s (remain 12m 32s) Loss: 0.8836(0.8836) Grad: 166584.5625  LR: 0.000003  
Epoch: [73][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5156(0.6503) Grad: 35095.4336  LR: 0.000003  
Epoch: [73][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5468(0.6482) Grad: 29316.2383  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7137(0.7137) 


Epoch 73 - avg_train_loss: 0.6482  avg_val_loss: 0.7385  time: 130s
Epoch 73 - avg_train_Score: 0.6482 avgScore: 0.7385


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7597(0.7385) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [74][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.5903(0.5903) Grad: 130352.2344  LR: 0.000003  
Epoch: [74][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5598(0.6405) Grad: 17047.8730  LR: 0.000003  
Epoch: [74][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4111(0.6395) Grad: 18655.4922  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7157(0.7157) 


Epoch 74 - avg_train_loss: 0.6395  avg_val_loss: 0.7384  time: 130s
Epoch 74 - avg_train_Score: 0.6395 avgScore: 0.7384


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7521(0.7384) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [75][0/542] Elapsed 0m 1s (remain 11m 48s) Loss: 0.5976(0.5976) Grad: 171037.6250  LR: 0.000002  
Epoch: [75][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5178(0.6512) Grad: 22887.2207  LR: 0.000002  
Epoch: [75][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7742(0.6511) Grad: 20017.6543  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.7141(0.7141) 


Epoch 75 - avg_train_loss: 0.6511  avg_val_loss: 0.7386  time: 130s
Epoch 75 - avg_train_Score: 0.6511 avgScore: 0.7386


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7491(0.7386) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [76][0/542] Elapsed 0m 1s (remain 11m 47s) Loss: 0.6048(0.6048) Grad: 152376.6094  LR: 0.000002  
Epoch: [76][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6918(0.6571) Grad: 19074.4395  LR: 0.000002  
Epoch: [76][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4792(0.6543) Grad: 16701.8418  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7139(0.7139) 


Epoch 76 - avg_train_loss: 0.6543  avg_val_loss: 0.7375  time: 130s
Epoch 76 - avg_train_Score: 0.6543 avgScore: 0.7375


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7463(0.7375) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [77][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 0.6750(0.6750) Grad: 139984.8906  LR: 0.000001  
Epoch: [77][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7683(0.6396) Grad: 40728.9453  LR: 0.000001  
Epoch: [77][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7549(0.6406) Grad: 33816.2383  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7156(0.7156) 


Epoch 77 - avg_train_loss: 0.6406  avg_val_loss: 0.7370  time: 130s
Epoch 77 - avg_train_Score: 0.6406 avgScore: 0.7370
Epoch 77 - Save Best Score: 0.7370 Model
Epoch 77 - Save Best Loss: 0.7370 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7478(0.7370) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [78][0/542] Elapsed 0m 1s (remain 12m 15s) Loss: 0.5598(0.5598) Grad: 150103.4375  LR: 0.000001  
Epoch: [78][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5209(0.6408) Grad: 18735.8652  LR: 0.000001  
Epoch: [78][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.3005(0.6395) Grad: 21435.7148  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7147(0.7147) 


Epoch 78 - avg_train_loss: 0.6395  avg_val_loss: 0.7368  time: 130s
Epoch 78 - avg_train_Score: 0.6395 avgScore: 0.7368
Epoch 78 - Save Best Score: 0.7368 Model
Epoch 78 - Save Best Loss: 0.7368 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7487(0.7368) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [79][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 0.4904(0.4904) Grad: 173083.1094  LR: 0.000000  
Epoch: [79][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.4867(0.6523) Grad: 14739.2578  LR: 0.000000  
Epoch: [79][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5101(0.6480) Grad: 18022.4473  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 58s) Loss: 0.7156(0.7156) 


Epoch 79 - avg_train_loss: 0.6480  avg_val_loss: 0.7362  time: 130s
Epoch 79 - avg_train_Score: 0.6480 avgScore: 0.7362
Epoch 79 - Save Best Score: 0.7362 Model
Epoch 79 - Save Best Loss: 0.7362 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7462(0.7362) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [80][0/542] Elapsed 0m 1s (remain 11m 46s) Loss: 1.0763(1.0763) Grad: 195584.7812  LR: 0.000000  
Epoch: [80][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5850(0.6508) Grad: 19362.0195  LR: 0.000000  
Epoch: [80][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5608(0.6496) Grad: 7930.5615  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7185(0.7185) 


Epoch 80 - avg_train_loss: 0.6496  avg_val_loss: 0.7382  time: 130s
Epoch 80 - avg_train_Score: 0.6496 avgScore: 0.7382


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7515(0.7382) 


  check_point = torch.load(
score: 0.7382


check_point_pred shape (8674, 18)
pretrained: True


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [1][0/542] Elapsed 0m 1s (remain 11m 46s) Loss: 6.9939(6.9939) Grad: 432335.9375  LR: 0.000010  
Epoch: [1][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 5.3526(5.3491) Grad: 153491.5938  LR: 0.000010  
Epoch: [1][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 5.3163(5.3285) Grad: 165182.9531  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 46s) Loss: 4.8893(4.8893) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 4.8899(5.0626) 


Epoch 1 - avg_train_loss: 5.3285  avg_val_loss: 5.0626  time: 130s
Epoch 1 - avg_train_Score: 5.3285 avgScore: 5.0626
Epoch 1 - Save Best Score: 5.0626 Model
Epoch 1 - Save Best Loss: 5.0626 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [2][0/542] Elapsed 0m 1s (remain 12m 20s) Loss: 5.6021(5.6021) Grad: 202671.3125  LR: 0.000010  
Epoch: [2][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 4.5799(4.8374) Grad: 92284.7812  LR: 0.000010  
Epoch: [2][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 4.6025(4.8092) Grad: 100020.4453  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 4.1636(4.1636) 


Epoch 2 - avg_train_loss: 4.8092  avg_val_loss: 4.3281  time: 130s
Epoch 2 - avg_train_Score: 4.8092 avgScore: 4.3281
Epoch 2 - Save Best Score: 4.3281 Model
Epoch 2 - Save Best Loss: 4.3281 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 4.1628(4.3281) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [3][0/542] Elapsed 0m 1s (remain 11m 44s) Loss: 4.1151(4.1151) Grad: 191984.4688  LR: 0.000100  
Epoch: [3][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.6589(2.7105) Grad: 98428.4609  LR: 0.000100  
Epoch: [3][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.5386(2.6310) Grad: 36351.7852  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 1.5698(1.5698) 


  _warn_get_lr_called_within_step(self)
Epoch 3 - avg_train_loss: 2.6310  avg_val_loss: 1.7367  time: 130s
Epoch 3 - avg_train_Score: 2.6310 avgScore: 1.7367
Epoch 3 - Save Best Score: 1.7367 Model
Epoch 3 - Save Best Loss: 1.7367 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.7424(1.7367) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [4][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 1.7079(1.7079) Grad: 691563.5625  LR: 0.000100  
Epoch: [4][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.4743(1.4369) Grad: 168882.1719  LR: 0.000100  
Epoch: [4][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2042(1.4358) Grad: 93880.4609  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.8916(0.8916) 


Epoch 4 - avg_train_loss: 1.4358  avg_val_loss: 1.0812  time: 130s
Epoch 4 - avg_train_Score: 1.4358 avgScore: 1.0812
Epoch 4 - Save Best Score: 1.0812 Model
Epoch 4 - Save Best Loss: 1.0812 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0872(1.0812) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [5][0/542] Elapsed 0m 1s (remain 11m 55s) Loss: 1.4294(1.4294) Grad: 436680.0000  LR: 0.000100  
Epoch: [5][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0220(1.3020) Grad: 117026.8281  LR: 0.000100  
Epoch: [5][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.4539(1.2978) Grad: 84863.6641  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.8266(0.8266) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9655(0.9999) 


Epoch 5 - avg_train_loss: 1.2978  avg_val_loss: 0.9999  time: 130s
Epoch 5 - avg_train_Score: 1.2978 avgScore: 0.9999
Epoch 5 - Save Best Score: 0.9999 Model
Epoch 5 - Save Best Loss: 0.9999 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [6][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 1.2325(1.2325) Grad: 187528.6719  LR: 0.000099  
Epoch: [6][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1475(1.1540) Grad: 271808.2812  LR: 0.000099  
Epoch: [6][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8815(1.1523) Grad: 252911.7031  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7831(0.7831) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9639(0.9460) 


Epoch 6 - avg_train_loss: 1.1523  avg_val_loss: 0.9460  time: 130s
Epoch 6 - avg_train_Score: 1.1523 avgScore: 0.9460
Epoch 6 - Save Best Score: 0.9460 Model
Epoch 6 - Save Best Loss: 0.9460 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [7][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 1.4280(1.4280) Grad: 219978.9844  LR: 0.000099  
Epoch: [7][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3609(1.1265) Grad: 294684.1875  LR: 0.000099  
Epoch: [7][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0101(1.1273) Grad: 88374.7422  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.7837(0.7837) 


Epoch 7 - avg_train_loss: 1.1273  avg_val_loss: 0.9182  time: 130s
Epoch 7 - avg_train_Score: 1.1273 avgScore: 0.9182
Epoch 7 - Save Best Score: 0.9182 Model
Epoch 7 - Save Best Loss: 0.9182 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8923(0.9182) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [8][0/542] Elapsed 0m 1s (remain 12m 54s) Loss: 1.0089(1.0089) Grad: 364358.5938  LR: 0.000099  
Epoch: [8][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1277(1.1095) Grad: 105686.3594  LR: 0.000099  
Epoch: [8][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0249(1.1133) Grad: 72445.3672  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7976(0.7976) 


Epoch 8 - avg_train_loss: 1.1133  avg_val_loss: 0.9087  time: 130s
Epoch 8 - avg_train_Score: 1.1133 avgScore: 0.9087
Epoch 8 - Save Best Score: 0.9087 Model
Epoch 8 - Save Best Loss: 0.9087 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8945(0.9087) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [9][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 1.2649(1.2649) Grad: 228424.4844  LR: 0.000098  
Epoch: [9][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.3026(1.0202) Grad: 280969.7500  LR: 0.000098  
Epoch: [9][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8702(1.0157) Grad: 389298.1562  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.8089(0.8089) 


Epoch 9 - avg_train_loss: 1.0157  avg_val_loss: 0.8761  time: 130s
Epoch 9 - avg_train_Score: 1.0157 avgScore: 0.8761
Epoch 9 - Save Best Score: 0.8761 Model
Epoch 9 - Save Best Loss: 0.8761 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9063(0.8761) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [10][0/542] Elapsed 0m 1s (remain 12m 21s) Loss: 0.8408(0.8408) Grad: 410109.5312  LR: 0.000098  
Epoch: [10][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9111(1.0002) Grad: 108946.9297  LR: 0.000098  
Epoch: [10][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2505(1.0052) Grad: 152681.0781  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.8016(0.8016) 


Epoch 10 - avg_train_loss: 1.0052  avg_val_loss: 0.8574  time: 130s
Epoch 10 - avg_train_Score: 1.0052 avgScore: 0.8574
Epoch 10 - Save Best Score: 0.8574 Model
Epoch 10 - Save Best Loss: 0.8574 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8464(0.8574) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [11][0/542] Elapsed 0m 1s (remain 12m 28s) Loss: 1.1039(1.1039) Grad: 305284.5000  LR: 0.000097  
Epoch: [11][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0829(0.9825) Grad: 187799.5625  LR: 0.000097  
Epoch: [11][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8958(0.9827) Grad: 176445.9375  LR: 0.000097  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7897(0.7897) 


Epoch 11 - avg_train_loss: 0.9827  avg_val_loss: 0.8415  time: 130s
Epoch 11 - avg_train_Score: 0.9827 avgScore: 0.8415
Epoch 11 - Save Best Score: 0.8415 Model
Epoch 11 - Save Best Loss: 0.8415 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8523(0.8415) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [12][0/542] Elapsed 0m 1s (remain 12m 5s) Loss: 0.9125(0.9125) Grad: 298248.4062  LR: 0.000096  
Epoch: [12][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6923(0.9713) Grad: 112585.9062  LR: 0.000096  
Epoch: [12][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9806(0.9700) Grad: 137236.1719  LR: 0.000096  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7411(0.7411) 


Epoch 12 - avg_train_loss: 0.9700  avg_val_loss: 0.8298  time: 130s
Epoch 12 - avg_train_Score: 0.9700 avgScore: 0.8298
Epoch 12 - Save Best Score: 0.8298 Model
Epoch 12 - Save Best Loss: 0.8298 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7983(0.8298) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [13][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.8802(0.8802) Grad: 511645.4062  LR: 0.000095  
Epoch: [13][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9157(0.9015) Grad: 246900.9531  LR: 0.000095  
Epoch: [13][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8557(0.9000) Grad: 395697.9062  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7641(0.7641) 


Epoch 13 - avg_train_loss: 0.9000  avg_val_loss: 0.8174  time: 130s
Epoch 13 - avg_train_Score: 0.9000 avgScore: 0.8174
Epoch 13 - Save Best Score: 0.8174 Model
Epoch 13 - Save Best Loss: 0.8174 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8257(0.8174) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [14][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.8531(0.8531) Grad: 280132.1875  LR: 0.000095  
Epoch: [14][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8248(0.8876) Grad: 247006.7031  LR: 0.000095  
Epoch: [14][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8153(0.8868) Grad: 252197.2969  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7509(0.7509) 


Epoch 14 - avg_train_loss: 0.8868  avg_val_loss: 0.8102  time: 130s
Epoch 14 - avg_train_Score: 0.8868 avgScore: 0.8102
Epoch 14 - Save Best Score: 0.8102 Model
Epoch 14 - Save Best Loss: 0.8102 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7941(0.8102) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [15][0/542] Elapsed 0m 1s (remain 11m 43s) Loss: 0.6999(0.6999) Grad: 257214.5312  LR: 0.000094  
Epoch: [15][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9351(0.9005) Grad: 162877.3438  LR: 0.000094  
Epoch: [15][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7893(0.8986) Grad: 113775.5234  LR: 0.000094  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7695(0.7695) 


Epoch 15 - avg_train_loss: 0.8986  avg_val_loss: 0.8153  time: 130s
Epoch 15 - avg_train_Score: 0.8986 avgScore: 0.8153


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7824(0.8153) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [16][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 0.7398(0.7398) Grad: 235294.6094  LR: 0.000093  
Epoch: [16][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7968(0.9047) Grad: 108328.3359  LR: 0.000093  
Epoch: [16][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7942(0.9035) Grad: 164648.6875  LR: 0.000093  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7462(0.7462) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7771(0.8096) 


Epoch 16 - avg_train_loss: 0.9035  avg_val_loss: 0.8096  time: 130s
Epoch 16 - avg_train_Score: 0.9035 avgScore: 0.8096
Epoch 16 - Save Best Score: 0.8096 Model
Epoch 16 - Save Best Loss: 0.8096 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [17][0/542] Elapsed 0m 1s (remain 11m 37s) Loss: 0.7732(0.7732) Grad: 215534.3594  LR: 0.000092  
Epoch: [17][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9164(0.8532) Grad: 159645.5156  LR: 0.000092  
Epoch: [17][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0872(0.8564) Grad: 104177.7422  LR: 0.000092  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7449(0.7449) 


Epoch 17 - avg_train_loss: 0.8564  avg_val_loss: 0.7993  time: 130s
Epoch 17 - avg_train_Score: 0.8564 avgScore: 0.7993
Epoch 17 - Save Best Score: 0.7993 Model
Epoch 17 - Save Best Loss: 0.7993 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8037(0.7993) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [18][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 0.7340(0.7340) Grad: 221793.2031  LR: 0.000091  
Epoch: [18][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7042(0.8279) Grad: 231098.5781  LR: 0.000091  
Epoch: [18][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7376(0.8256) Grad: 254197.2969  LR: 0.000091  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.7393(0.7393) 


Epoch 18 - avg_train_loss: 0.8256  avg_val_loss: 0.7930  time: 130s
Epoch 18 - avg_train_Score: 0.8256 avgScore: 0.7930
Epoch 18 - Save Best Score: 0.7930 Model
Epoch 18 - Save Best Loss: 0.7930 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7961(0.7930) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [19][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 0.8011(0.8011) Grad: 214444.0000  LR: 0.000089  
Epoch: [19][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9326(0.8611) Grad: 110446.0234  LR: 0.000089  
Epoch: [19][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6814(0.8644) Grad: 90047.2422  LR: 0.000089  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7796(0.7796) 


Epoch 19 - avg_train_loss: 0.8644  avg_val_loss: 0.7929  time: 130s
Epoch 19 - avg_train_Score: 0.8644 avgScore: 0.7929
Epoch 19 - Save Best Score: 0.7929 Model
Epoch 19 - Save Best Loss: 0.7929 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7919(0.7929) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [20][0/542] Elapsed 0m 1s (remain 11m 55s) Loss: 0.7760(0.7760) Grad: 327040.4062  LR: 0.000088  
Epoch: [20][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6230(0.8332) Grad: 100361.9766  LR: 0.000088  
Epoch: [20][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6603(0.8326) Grad: 110471.5781  LR: 0.000088  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7495(0.7495) 


Epoch 20 - avg_train_loss: 0.8326  avg_val_loss: 0.7855  time: 130s
Epoch 20 - avg_train_Score: 0.8326 avgScore: 0.7855
Epoch 20 - Save Best Score: 0.7855 Model
Epoch 20 - Save Best Loss: 0.7855 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7697(0.7855) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [21][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.7849(0.7849) Grad: 238399.8125  LR: 0.000087  
Epoch: [21][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8244(0.8396) Grad: 59652.0273  LR: 0.000087  
Epoch: [21][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8943(0.8425) Grad: 63557.3438  LR: 0.000087  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7391(0.7391) 


Epoch 21 - avg_train_loss: 0.8425  avg_val_loss: 0.7900  time: 130s
Epoch 21 - avg_train_Score: 0.8425 avgScore: 0.7900


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7814(0.7900) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [22][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.8814(0.8814) Grad: 239548.9375  LR: 0.000085  
Epoch: [22][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0684(0.8173) Grad: 256456.4688  LR: 0.000085  
Epoch: [22][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5867(0.8177) Grad: 135625.0000  LR: 0.000085  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7568(0.7568) 


Epoch 22 - avg_train_loss: 0.8177  avg_val_loss: 0.7891  time: 130s
Epoch 22 - avg_train_Score: 0.8177 avgScore: 0.7891


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7695(0.7891) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [23][0/542] Elapsed 0m 1s (remain 11m 46s) Loss: 0.6317(0.6317) Grad: 204375.0938  LR: 0.000084  
Epoch: [23][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6426(0.7753) Grad: 142376.4219  LR: 0.000084  
Epoch: [23][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6898(0.7768) Grad: 109296.9375  LR: 0.000084  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7688(0.7688) 


Epoch 23 - avg_train_loss: 0.7768  avg_val_loss: 0.7756  time: 130s
Epoch 23 - avg_train_Score: 0.7768 avgScore: 0.7756
Epoch 23 - Save Best Score: 0.7756 Model
Epoch 23 - Save Best Loss: 0.7756 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7637(0.7756) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [24][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.6918(0.6918) Grad: 305805.8125  LR: 0.000083  
Epoch: [24][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9038(0.8070) Grad: 59383.2500  LR: 0.000083  
Epoch: [24][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8255(0.8115) Grad: 58983.9961  LR: 0.000083  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7543(0.7543) 


Epoch 24 - avg_train_loss: 0.8115  avg_val_loss: 0.7743  time: 130s
Epoch 24 - avg_train_Score: 0.8115 avgScore: 0.7743
Epoch 24 - Save Best Score: 0.7743 Model
Epoch 24 - Save Best Loss: 0.7743 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7416(0.7743) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [25][0/542] Elapsed 0m 1s (remain 12m 23s) Loss: 0.8588(0.8588) Grad: 285420.9062  LR: 0.000081  
Epoch: [25][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0795(0.7869) Grad: 44102.9609  LR: 0.000081  
Epoch: [25][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6443(0.7863) Grad: 119309.4062  LR: 0.000081  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.7276(0.7276) 


Epoch 25 - avg_train_loss: 0.7863  avg_val_loss: 0.7748  time: 130s
Epoch 25 - avg_train_Score: 0.7863 avgScore: 0.7748


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7515(0.7748) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [26][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 0.7205(0.7205) Grad: 208751.4688  LR: 0.000079  
Epoch: [26][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8546(0.7853) Grad: 112998.1250  LR: 0.000079  
Epoch: [26][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7996(0.7880) Grad: 121827.2578  LR: 0.000079  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7063(0.7063) 


Epoch 26 - avg_train_loss: 0.7880  avg_val_loss: 0.7713  time: 130s
Epoch 26 - avg_train_Score: 0.7880 avgScore: 0.7713
Epoch 26 - Save Best Score: 0.7713 Model
Epoch 26 - Save Best Loss: 0.7713 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7703(0.7713) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [27][0/542] Elapsed 0m 1s (remain 12m 20s) Loss: 0.7048(0.7048) Grad: 227805.7812  LR: 0.000078  
Epoch: [27][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.1149(0.7690) Grad: 96662.8359  LR: 0.000078  
Epoch: [27][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0815(0.7743) Grad: 88231.7266  LR: 0.000078  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7297(0.7297) 


Epoch 27 - avg_train_loss: 0.7743  avg_val_loss: 0.7785  time: 130s
Epoch 27 - avg_train_Score: 0.7743 avgScore: 0.7785


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7739(0.7785) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [28][0/542] Elapsed 0m 1s (remain 13m 16s) Loss: 0.6637(0.6637) Grad: 180298.0312  LR: 0.000076  
Epoch: [28][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6422(0.7823) Grad: 91781.8984  LR: 0.000076  
Epoch: [28][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6205(0.7816) Grad: 85952.2266  LR: 0.000076  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 48s) Loss: 0.7397(0.7397) 


Epoch 28 - avg_train_loss: 0.7816  avg_val_loss: 0.7695  time: 130s
Epoch 28 - avg_train_Score: 0.7816 avgScore: 0.7695
Epoch 28 - Save Best Score: 0.7695 Model
Epoch 28 - Save Best Loss: 0.7695 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7610(0.7695) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [29][0/542] Elapsed 0m 1s (remain 12m 23s) Loss: 0.9311(0.9311) Grad: 275347.2188  LR: 0.000075  
Epoch: [29][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8166(0.7675) Grad: 252680.2500  LR: 0.000075  
Epoch: [29][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7293(0.7652) Grad: 449857.3438  LR: 0.000075  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7312(0.7312) 


Epoch 29 - avg_train_loss: 0.7652  avg_val_loss: 0.7636  time: 130s
Epoch 29 - avg_train_Score: 0.7652 avgScore: 0.7636
Epoch 29 - Save Best Score: 0.7636 Model
Epoch 29 - Save Best Loss: 0.7636 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7689(0.7636) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [30][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.9765(0.9765) Grad: 202820.5938  LR: 0.000073  
Epoch: [30][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7942(0.7702) Grad: 139444.9219  LR: 0.000073  
Epoch: [30][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8976(0.7707) Grad: 95702.7188  LR: 0.000073  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7130(0.7130) 


Epoch 30 - avg_train_loss: 0.7707  avg_val_loss: 0.7629  time: 130s
Epoch 30 - avg_train_Score: 0.7707 avgScore: 0.7629
Epoch 30 - Save Best Score: 0.7629 Model
Epoch 30 - Save Best Loss: 0.7629 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7359(0.7629) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [31][0/542] Elapsed 0m 1s (remain 12m 0s) Loss: 0.9994(0.9994) Grad: 176737.1406  LR: 0.000071  
Epoch: [31][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7557(0.7572) Grad: 81031.6875  LR: 0.000071  
Epoch: [31][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6387(0.7557) Grad: 79969.3047  LR: 0.000071  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7105(0.7105) 


Epoch 31 - avg_train_loss: 0.7557  avg_val_loss: 0.7622  time: 130s
Epoch 31 - avg_train_Score: 0.7557 avgScore: 0.7622
Epoch 31 - Save Best Score: 0.7622 Model
Epoch 31 - Save Best Loss: 0.7622 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7501(0.7622) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [32][0/542] Elapsed 0m 1s (remain 11m 40s) Loss: 0.6205(0.6205) Grad: 147195.1719  LR: 0.000069  
Epoch: [32][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6097(0.7487) Grad: 59802.9570  LR: 0.000069  
Epoch: [32][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7668(0.7561) Grad: 65800.7969  LR: 0.000069  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.7098(0.7098) 


Epoch 32 - avg_train_loss: 0.7561  avg_val_loss: 0.7602  time: 130s
Epoch 32 - avg_train_Score: 0.7561 avgScore: 0.7602
Epoch 32 - Save Best Score: 0.7602 Model
Epoch 32 - Save Best Loss: 0.7602 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7199(0.7602) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [33][0/542] Elapsed 0m 1s (remain 11m 36s) Loss: 0.7424(0.7424) Grad: 209252.7188  LR: 0.000067  
Epoch: [33][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6564(0.7512) Grad: 92748.5938  LR: 0.000067  
Epoch: [33][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5124(0.7528) Grad: 73318.0078  LR: 0.000067  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7280(0.7280) 


Epoch 33 - avg_train_loss: 0.7528  avg_val_loss: 0.7614  time: 130s
Epoch 33 - avg_train_Score: 0.7528 avgScore: 0.7614


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7618(0.7614) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [34][0/542] Elapsed 0m 1s (remain 11m 41s) Loss: 0.7265(0.7265) Grad: 184266.3750  LR: 0.000066  
Epoch: [34][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6754(0.7401) Grad: 91910.0312  LR: 0.000066  
Epoch: [34][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6156(0.7370) Grad: 89973.4766  LR: 0.000066  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 47s) Loss: 0.7256(0.7256) 


Epoch 34 - avg_train_loss: 0.7370  avg_val_loss: 0.7562  time: 130s
Epoch 34 - avg_train_Score: 0.7370 avgScore: 0.7562
Epoch 34 - Save Best Score: 0.7562 Model
Epoch 34 - Save Best Loss: 0.7562 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7801(0.7562) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [35][0/542] Elapsed 0m 1s (remain 12m 36s) Loss: 0.8930(0.8930) Grad: 205757.1094  LR: 0.000064  
Epoch: [35][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7131(0.7465) Grad: 79409.2344  LR: 0.000064  
Epoch: [35][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6186(0.7492) Grad: 45273.9414  LR: 0.000064  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7359(0.7359) 


Epoch 35 - avg_train_loss: 0.7492  avg_val_loss: 0.7526  time: 130s
Epoch 35 - avg_train_Score: 0.7492 avgScore: 0.7526
Epoch 35 - Save Best Score: 0.7526 Model
Epoch 35 - Save Best Loss: 0.7526 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7832(0.7526) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [36][0/542] Elapsed 0m 1s (remain 12m 15s) Loss: 0.8630(0.8630) Grad: 278754.0000  LR: 0.000062  
Epoch: [36][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0323(0.7280) Grad: 40843.6719  LR: 0.000062  
Epoch: [36][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7689(0.7271) Grad: 40724.8359  LR: 0.000062  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7483(0.7483) 


Epoch 36 - avg_train_loss: 0.7271  avg_val_loss: 0.7567  time: 130s
Epoch 36 - avg_train_Score: 0.7271 avgScore: 0.7567


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7976(0.7567) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [37][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.5749(0.5749) Grad: 164569.1250  LR: 0.000060  
Epoch: [37][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5133(0.7204) Grad: 75296.2344  LR: 0.000060  
Epoch: [37][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6214(0.7191) Grad: 136203.4688  LR: 0.000060  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7320(0.7320) 


Epoch 37 - avg_train_loss: 0.7191  avg_val_loss: 0.7569  time: 130s
Epoch 37 - avg_train_Score: 0.7191 avgScore: 0.7569


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7846(0.7569) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [38][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.8147(0.8147) Grad: 221251.3281  LR: 0.000058  
Epoch: [38][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8053(0.7081) Grad: 146702.6875  LR: 0.000058  
Epoch: [38][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6441(0.7083) Grad: 134402.9688  LR: 0.000058  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7393(0.7393) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7430(0.7510) 


Epoch 38 - avg_train_loss: 0.7083  avg_val_loss: 0.7510  time: 130s
Epoch 38 - avg_train_Score: 0.7083 avgScore: 0.7510
Epoch 38 - Save Best Score: 0.7510 Model
Epoch 38 - Save Best Loss: 0.7510 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [39][0/542] Elapsed 0m 1s (remain 11m 58s) Loss: 0.6596(0.6596) Grad: 161665.5781  LR: 0.000056  
Epoch: [39][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6179(0.6948) Grad: 156290.0156  LR: 0.000056  
Epoch: [39][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8814(0.6963) Grad: 151100.0781  LR: 0.000056  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7311(0.7311) 


Epoch 39 - avg_train_loss: 0.6963  avg_val_loss: 0.7495  time: 130s


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7350(0.7495) 


Epoch 39 - avg_train_Score: 0.6963 avgScore: 0.7495
Epoch 39 - Save Best Score: 0.7495 Model
Epoch 39 - Save Best Loss: 0.7495 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [40][0/542] Elapsed 0m 1s (remain 12m 8s) Loss: 0.5270(0.5270) Grad: 165107.4688  LR: 0.000054  
Epoch: [40][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7090(0.7025) Grad: 87016.1562  LR: 0.000054  
Epoch: [40][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6830(0.7053) Grad: 92127.0234  LR: 0.000054  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.7283(0.7283) 


Epoch 40 - avg_train_loss: 0.7053  avg_val_loss: 0.7501  time: 130s
Epoch 40 - avg_train_Score: 0.7053 avgScore: 0.7501


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7494(0.7501) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [41][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.6659(0.6659) Grad: 156942.5156  LR: 0.000052  
Epoch: [41][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6568(0.6961) Grad: 128079.9453  LR: 0.000052  
Epoch: [41][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5400(0.6956) Grad: 83134.6875  LR: 0.000052  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7289(0.7289) 


Epoch 41 - avg_train_loss: 0.6956  avg_val_loss: 0.7497  time: 130s
Epoch 41 - avg_train_Score: 0.6956 avgScore: 0.7497


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7625(0.7497) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [42][0/542] Elapsed 0m 1s (remain 12m 21s) Loss: 0.5856(0.5856) Grad: 157754.5156  LR: 0.000050  
Epoch: [42][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6370(0.7032) Grad: 124924.2344  LR: 0.000050  
Epoch: [42][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6181(0.7040) Grad: 177273.1094  LR: 0.000050  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7114(0.7114) 


Epoch 42 - avg_train_loss: 0.7040  avg_val_loss: 0.7488  time: 130s
Epoch 42 - avg_train_Score: 0.7040 avgScore: 0.7488
Epoch 42 - Save Best Score: 0.7488 Model
Epoch 42 - Save Best Loss: 0.7488 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7550(0.7488) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [43][0/542] Elapsed 0m 1s (remain 14m 41s) Loss: 0.8570(0.8570) Grad: 167671.0000  LR: 0.000048  
Epoch: [43][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7311(0.7201) Grad: 93304.7266  LR: 0.000048  
Epoch: [43][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9711(0.7189) Grad: 56516.7344  LR: 0.000048  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 46s) Loss: 0.7219(0.7219) 


Epoch 43 - avg_train_loss: 0.7189  avg_val_loss: 0.7501  time: 130s
Epoch 43 - avg_train_Score: 0.7189 avgScore: 0.7501


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7533(0.7501) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [44][0/542] Elapsed 0m 1s (remain 12m 38s) Loss: 1.2042(1.2042) Grad: 146116.8750  LR: 0.000046  
Epoch: [44][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7393(0.7148) Grad: 78360.9297  LR: 0.000046  
Epoch: [44][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9307(0.7142) Grad: 100952.5938  LR: 0.000046  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.7204(0.7204) 


Epoch 44 - avg_train_loss: 0.7142  avg_val_loss: 0.7535  time: 130s
Epoch 44 - avg_train_Score: 0.7142 avgScore: 0.7535


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7377(0.7535) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [45][0/542] Elapsed 0m 1s (remain 11m 42s) Loss: 0.7442(0.7442) Grad: 433916.0312  LR: 0.000044  
Epoch: [45][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6726(0.6947) Grad: 105582.1328  LR: 0.000044  
Epoch: [45][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5565(0.6991) Grad: 61919.3164  LR: 0.000044  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7172(0.7172) 


Epoch 45 - avg_train_loss: 0.6991  avg_val_loss: 0.7477  time: 130s
Epoch 45 - avg_train_Score: 0.6991 avgScore: 0.7477
Epoch 45 - Save Best Score: 0.7477 Model
Epoch 45 - Save Best Loss: 0.7477 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7489(0.7477) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [46][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.6951(0.6951) Grad: 139026.8750  LR: 0.000042  
Epoch: [46][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6322(0.6852) Grad: 164515.5625  LR: 0.000042  
Epoch: [46][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1103(0.6885) Grad: 174488.1406  LR: 0.000042  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.7023(0.7023) 


Epoch 46 - avg_train_loss: 0.6885  avg_val_loss: 0.7469  time: 130s
Epoch 46 - avg_train_Score: 0.6885 avgScore: 0.7469
Epoch 46 - Save Best Score: 0.7469 Model
Epoch 46 - Save Best Loss: 0.7469 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7774(0.7469) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [47][0/542] Elapsed 0m 1s (remain 11m 52s) Loss: 0.9619(0.9619) Grad: 142600.4844  LR: 0.000040  
Epoch: [47][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7548(0.6868) Grad: 189402.9375  LR: 0.000040  
Epoch: [47][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6593(0.6883) Grad: 98898.3672  LR: 0.000040  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7110(0.7110) 


Epoch 47 - avg_train_loss: 0.6883  avg_val_loss: 0.7442  time: 130s
Epoch 47 - avg_train_Score: 0.6883 avgScore: 0.7442
Epoch 47 - Save Best Score: 0.7442 Model
Epoch 47 - Save Best Loss: 0.7442 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7518(0.7442) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [48][0/542] Elapsed 0m 1s (remain 14m 14s) Loss: 0.8974(0.8974) Grad: 136760.2656  LR: 0.000038  
Epoch: [48][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7172(0.6842) Grad: 89630.1172  LR: 0.000038  
Epoch: [48][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8504(0.6857) Grad: inf  LR: 0.000038  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7386(0.7386) 


Epoch 48 - avg_train_loss: 0.6857  avg_val_loss: 0.7429  time: 130s
Epoch 48 - avg_train_Score: 0.6857 avgScore: 0.7429
Epoch 48 - Save Best Score: 0.7429 Model
Epoch 48 - Save Best Loss: 0.7429 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7344(0.7429) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [49][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.5037(0.5037) Grad: 589456.8125  LR: 0.000037  
Epoch: [49][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8784(0.6807) Grad: 87279.2422  LR: 0.000037  
Epoch: [49][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7700(0.6800) Grad: 66518.7500  LR: 0.000037  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7180(0.7180) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7480(0.7456) 


Epoch 49 - avg_train_loss: 0.6800  avg_val_loss: 0.7456  time: 130s
Epoch 49 - avg_train_Score: 0.6800 avgScore: 0.7456
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [50][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 0.7305(0.7305) Grad: 183321.7812  LR: 0.000035  
Epoch: [50][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6676(0.6840) Grad: 38960.7422  LR: 0.000035  
Epoch: [50][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5736(0.6816) Grad: 37443.6406  LR: 0.000035  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7129(0.7129) 


Epoch 50 - avg_train_loss: 0.6816  avg_val_loss: 0.7445  time: 130s
Epoch 50 - avg_train_Score: 0.6816 avgScore: 0.7445


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7535(0.7445) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [51][0/542] Elapsed 0m 1s (remain 12m 15s) Loss: 0.6889(0.6889) Grad: 156051.7344  LR: 0.000033  
Epoch: [51][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6199(0.6764) Grad: 150378.0469  LR: 0.000033  
Epoch: [51][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5663(0.6763) Grad: 139440.9688  LR: 0.000033  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.7148(0.7148) 


Epoch 51 - avg_train_loss: 0.6763  avg_val_loss: 0.7416  time: 130s
Epoch 51 - avg_train_Score: 0.6763 avgScore: 0.7416
Epoch 51 - Save Best Score: 0.7416 Model
Epoch 51 - Save Best Loss: 0.7416 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7527(0.7416) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [52][0/542] Elapsed 0m 1s (remain 12m 27s) Loss: 0.5673(0.5673) Grad: 147544.0938  LR: 0.000031  
Epoch: [52][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5670(0.6803) Grad: 67720.7969  LR: 0.000031  
Epoch: [52][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7030(0.6791) Grad: 88413.9609  LR: 0.000031  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.7102(0.7102) 


Epoch 52 - avg_train_loss: 0.6791  avg_val_loss: 0.7424  time: 130s
Epoch 52 - avg_train_Score: 0.6791 avgScore: 0.7424


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7600(0.7424) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [53][0/542] Elapsed 0m 1s (remain 12m 30s) Loss: 0.5807(0.5807) Grad: 152669.7500  LR: 0.000029  
Epoch: [53][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5046(0.6654) Grad: 79584.4531  LR: 0.000029  
Epoch: [53][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7337(0.6692) Grad: 70070.6250  LR: 0.000029  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7177(0.7177) 


Epoch 53 - avg_train_loss: 0.6692  avg_val_loss: 0.7396  time: 129s
Epoch 53 - avg_train_Score: 0.6692 avgScore: 0.7396
Epoch 53 - Save Best Score: 0.7396 Model
Epoch 53 - Save Best Loss: 0.7396 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7440(0.7396) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [54][0/542] Elapsed 0m 1s (remain 11m 44s) Loss: 0.7372(0.7372) Grad: 195382.0156  LR: 0.000027  
Epoch: [54][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6632(0.6679) Grad: 80988.6250  LR: 0.000027  
Epoch: [54][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5913(0.6673) Grad: 67793.7344  LR: 0.000027  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 46s) Loss: 0.7146(0.7146) 


Epoch 54 - avg_train_loss: 0.6673  avg_val_loss: 0.7421  time: 130s
Epoch 54 - avg_train_Score: 0.6673 avgScore: 0.7421


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7442(0.7421) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [55][0/542] Elapsed 0m 1s (remain 13m 37s) Loss: 0.6867(0.6867) Grad: 161545.4844  LR: 0.000026  
Epoch: [55][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6229(0.6591) Grad: 77696.7812  LR: 0.000026  
Epoch: [55][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6180(0.6647) Grad: 66032.9062  LR: 0.000026  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.7214(0.7214) 


Epoch 55 - avg_train_loss: 0.6647  avg_val_loss: 0.7416  time: 130s
Epoch 55 - avg_train_Score: 0.6647 avgScore: 0.7416


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7447(0.7416) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [56][0/542] Elapsed 0m 1s (remain 12m 7s) Loss: 0.8889(0.8889) Grad: 163285.9062  LR: 0.000024  
Epoch: [56][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6748(0.6756) Grad: 92315.1562  LR: 0.000024  
Epoch: [56][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5858(0.6759) Grad: 45807.9727  LR: 0.000024  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7102(0.7102) 


Epoch 56 - avg_train_loss: 0.6759  avg_val_loss: 0.7409  time: 130s
Epoch 56 - avg_train_Score: 0.6759 avgScore: 0.7409


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7448(0.7409) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [57][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.6820(0.6820) Grad: 329882.4062  LR: 0.000022  
Epoch: [57][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5273(0.6631) Grad: 61439.5781  LR: 0.000022  
Epoch: [57][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5379(0.6630) Grad: 90333.3984  LR: 0.000022  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.6934(0.6934) 


Epoch 57 - avg_train_loss: 0.6630  avg_val_loss: 0.7411  time: 130s
Epoch 57 - avg_train_Score: 0.6630 avgScore: 0.7411


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7650(0.7411) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [58][0/542] Elapsed 0m 1s (remain 12m 5s) Loss: 0.5408(0.5408) Grad: 205065.0156  LR: 0.000021  
Epoch: [58][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7097(0.6592) Grad: 20727.4219  LR: 0.000021  
Epoch: [58][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6196(0.6608) Grad: 16668.3457  LR: 0.000021  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.6992(0.6992) 


Epoch 58 - avg_train_loss: 0.6608  avg_val_loss: 0.7414  time: 130s
Epoch 58 - avg_train_Score: 0.6608 avgScore: 0.7414


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7652(0.7414) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [59][0/542] Elapsed 0m 1s (remain 11m 43s) Loss: 0.6566(0.6566) Grad: 129500.7812  LR: 0.000019  
Epoch: [59][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5534(0.6584) Grad: 42008.3828  LR: 0.000019  
Epoch: [59][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7899(0.6580) Grad: 56332.6914  LR: 0.000019  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.6985(0.6985) 


Epoch 59 - avg_train_loss: 0.6580  avg_val_loss: 0.7421  time: 130s
Epoch 59 - avg_train_Score: 0.6580 avgScore: 0.7421


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7545(0.7421) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [60][0/542] Elapsed 0m 1s (remain 12m 4s) Loss: 0.5334(0.5334) Grad: 125610.5703  LR: 0.000018  
Epoch: [60][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5344(0.6560) Grad: 33882.9922  LR: 0.000018  
Epoch: [60][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4822(0.6562) Grad: 35552.3008  LR: 0.000018  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7043(0.7043) 


Epoch 60 - avg_train_loss: 0.6562  avg_val_loss: 0.7419  time: 130s
Epoch 60 - avg_train_Score: 0.6562 avgScore: 0.7419


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7711(0.7419) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [61][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.8936(0.8936) Grad: 171635.9531  LR: 0.000016  
Epoch: [61][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7870(0.6542) Grad: 72929.3047  LR: 0.000016  
Epoch: [61][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5222(0.6553) Grad: 274836.0312  LR: 0.000016  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7143(0.7143) 


Epoch 61 - avg_train_loss: 0.6553  avg_val_loss: 0.7405  time: 130s
Epoch 61 - avg_train_Score: 0.6553 avgScore: 0.7405


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7500(0.7405) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [62][0/542] Elapsed 0m 1s (remain 11m 49s) Loss: 0.6139(0.6139) Grad: 153044.5469  LR: 0.000015  
Epoch: [62][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5065(0.6529) Grad: 56053.2148  LR: 0.000015  
Epoch: [62][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6872(0.6492) Grad: 44917.6016  LR: 0.000015  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7016(0.7016) 


Epoch 62 - avg_train_loss: 0.6492  avg_val_loss: 0.7372  time: 130s
Epoch 62 - avg_train_Score: 0.6492 avgScore: 0.7372


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7454(0.7372) 


Epoch 62 - Save Best Score: 0.7372 Model
Epoch 62 - Save Best Loss: 0.7372 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [63][0/542] Elapsed 0m 1s (remain 11m 55s) Loss: 0.8915(0.8915) Grad: 127005.6719  LR: 0.000013  
Epoch: [63][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6753(0.6539) Grad: 81879.8047  LR: 0.000013  
Epoch: [63][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9580(0.6554) Grad: 86054.5000  LR: 0.000013  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.6949(0.6949) 


Epoch 63 - avg_train_loss: 0.6554  avg_val_loss: 0.7374  time: 130s
Epoch 63 - avg_train_Score: 0.6554 avgScore: 0.7374


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7499(0.7374) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [64][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 1.0339(1.0339) Grad: 580564.1875  LR: 0.000012  
Epoch: [64][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6649(0.6637) Grad: 35793.6562  LR: 0.000012  
Epoch: [64][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6051(0.6644) Grad: 37737.4141  LR: 0.000012  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 45s) Loss: 0.6884(0.6884) 


Epoch 64 - avg_train_loss: 0.6644  avg_val_loss: 0.7375  time: 130s
Epoch 64 - avg_train_Score: 0.6644 avgScore: 0.7375


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7418(0.7375) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [65][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.7788(0.7788) Grad: 192863.3438  LR: 0.000011  
Epoch: [65][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5300(0.6560) Grad: 81670.0156  LR: 0.000011  
Epoch: [65][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5036(0.6540) Grad: 72688.6719  LR: 0.000011  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.6881(0.6881) 


Epoch 65 - avg_train_loss: 0.6540  avg_val_loss: 0.7372  time: 130s
Epoch 65 - avg_train_Score: 0.6540 avgScore: 0.7372


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7279(0.7372) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [66][0/542] Elapsed 0m 1s (remain 14m 55s) Loss: 1.1427(1.1427) Grad: 134301.9219  LR: 0.000010  
Epoch: [66][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5622(0.6408) Grad: 40293.8867  LR: 0.000010  
Epoch: [66][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8206(0.6421) Grad: 85897.3594  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.6962(0.6962) 


Epoch 66 - avg_train_loss: 0.6421  avg_val_loss: 0.7369  time: 130s
Epoch 66 - avg_train_Score: 0.6421 avgScore: 0.7369
Epoch 66 - Save Best Score: 0.7369 Model
Epoch 66 - Save Best Loss: 0.7369 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7305(0.7369) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [67][0/542] Elapsed 0m 1s (remain 11m 57s) Loss: 0.6277(0.6277) Grad: 150645.2031  LR: 0.000009  
Epoch: [67][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6780(0.6408) Grad: 72184.8750  LR: 0.000009  
Epoch: [67][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5988(0.6411) Grad: 190506.6406  LR: 0.000009  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.7019(0.7019) 


Epoch 67 - avg_train_loss: 0.6411  avg_val_loss: 0.7362  time: 130s
Epoch 67 - avg_train_Score: 0.6411 avgScore: 0.7362
Epoch 67 - Save Best Score: 0.7362 Model
Epoch 67 - Save Best Loss: 0.7362 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7360(0.7362) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [68][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 0.6710(0.6710) Grad: nan  LR: 0.000008  
Epoch: [68][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5945(0.6506) Grad: 70251.0547  LR: 0.000008  
Epoch: [68][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5941(0.6522) Grad: 71237.6641  LR: 0.000008  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.6988(0.6988) 


Epoch 68 - avg_train_loss: 0.6522  avg_val_loss: 0.7359  time: 130s
Epoch 68 - avg_train_Score: 0.6522 avgScore: 0.7359
Epoch 68 - Save Best Score: 0.7359 Model
Epoch 68 - Save Best Loss: 0.7359 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7358(0.7359) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [69][0/542] Elapsed 0m 1s (remain 12m 7s) Loss: 0.5626(0.5626) Grad: 131266.3281  LR: 0.000007  
Epoch: [69][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5783(0.6414) Grad: 48296.1523  LR: 0.000007  
Epoch: [69][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6117(0.6429) Grad: 44303.1484  LR: 0.000007  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.6987(0.6987) 


Epoch 69 - avg_train_loss: 0.6429  avg_val_loss: 0.7357  time: 130s
Epoch 69 - avg_train_Score: 0.6429 avgScore: 0.7357
Epoch 69 - Save Best Score: 0.7357 Model
Epoch 69 - Save Best Loss: 0.7357 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7337(0.7357) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [70][0/542] Elapsed 0m 1s (remain 12m 11s) Loss: 0.5867(0.5867) Grad: 211465.3438  LR: 0.000006  
Epoch: [70][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.4967(0.6518) Grad: 78559.9219  LR: 0.000006  
Epoch: [70][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5159(0.6507) Grad: 72844.6875  LR: 0.000006  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.6985(0.6985) 


Epoch 70 - avg_train_loss: 0.6507  avg_val_loss: 0.7363  time: 130s
Epoch 70 - avg_train_Score: 0.6507 avgScore: 0.7363


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7450(0.7363) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [71][0/542] Elapsed 0m 1s (remain 14m 18s) Loss: 0.7621(0.7621) Grad: 321707.0000  LR: 0.000005  
Epoch: [71][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5662(0.6415) Grad: 56045.6602  LR: 0.000005  
Epoch: [71][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8752(0.6400) Grad: 32744.5156  LR: 0.000005  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.6960(0.6960) 


Epoch 71 - avg_train_loss: 0.6400  avg_val_loss: 0.7344  time: 130s
Epoch 71 - avg_train_Score: 0.6400 avgScore: 0.7344
Epoch 71 - Save Best Score: 0.7344 Model
Epoch 71 - Save Best Loss: 0.7344 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7439(0.7344) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [72][0/542] Elapsed 0m 1s (remain 11m 41s) Loss: 0.6169(0.6169) Grad: 178890.5312  LR: 0.000004  
Epoch: [72][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.4903(0.6392) Grad: 45908.6680  LR: 0.000004  
Epoch: [72][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5126(0.6374) Grad: 29589.2656  LR: 0.000004  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.6952(0.6952) 


Epoch 72 - avg_train_loss: 0.6374  avg_val_loss: 0.7340  time: 130s
Epoch 72 - avg_train_Score: 0.6374 avgScore: 0.7340
Epoch 72 - Save Best Score: 0.7340 Model
Epoch 72 - Save Best Loss: 0.7340 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7385(0.7340) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [73][0/542] Elapsed 0m 1s (remain 12m 21s) Loss: 0.8354(0.8354) Grad: 169440.1250  LR: 0.000003  
Epoch: [73][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9433(0.6447) Grad: 70790.2344  LR: 0.000003  
Epoch: [73][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5765(0.6470) Grad: 75348.8594  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.6998(0.6998) 


Epoch 73 - avg_train_loss: 0.6470  avg_val_loss: 0.7341  time: 130s
Epoch 73 - avg_train_Score: 0.6470 avgScore: 0.7341


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7391(0.7341) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [74][0/542] Elapsed 0m 1s (remain 12m 11s) Loss: 0.5570(0.5570) Grad: 225112.3438  LR: 0.000003  
Epoch: [74][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5375(0.6407) Grad: 76277.6250  LR: 0.000003  
Epoch: [74][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6398(0.6419) Grad: 73643.4844  LR: 0.000003  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7028(0.7028) 


Epoch 74 - avg_train_loss: 0.6419  avg_val_loss: 0.7343  time: 130s
Epoch 74 - avg_train_Score: 0.6419 avgScore: 0.7343


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7337(0.7343) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [75][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.7648(0.7648) Grad: 199385.3438  LR: 0.000002  
Epoch: [75][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5250(0.6397) Grad: 37975.5938  LR: 0.000002  
Epoch: [75][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7049(0.6409) Grad: 52406.6055  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.7042(0.7042) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7400(0.7339) 


Epoch 75 - avg_train_loss: 0.6409  avg_val_loss: 0.7339  time: 130s
Epoch 75 - avg_train_Score: 0.6409 avgScore: 0.7339
Epoch 75 - Save Best Score: 0.7339 Model
Epoch 75 - Save Best Loss: 0.7339 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [76][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 0.6045(0.6045) Grad: 153800.7031  LR: 0.000002  
Epoch: [76][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6336(0.6422) Grad: 22758.3359  LR: 0.000002  
Epoch: [76][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5207(0.6395) Grad: 14418.2471  LR: 0.000002  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7044(0.7044) 


Epoch 76 - avg_train_loss: 0.6395  avg_val_loss: 0.7339  time: 130s
Epoch 76 - avg_train_Score: 0.6395 avgScore: 0.7339
Epoch 76 - Save Best Score: 0.7339 Model
Epoch 76 - Save Best Loss: 0.7339 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7328(0.7339) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [77][0/542] Elapsed 0m 1s (remain 11m 38s) Loss: 0.6550(0.6550) Grad: 143353.9219  LR: 0.000001  
Epoch: [77][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5748(0.6425) Grad: 37684.7266  LR: 0.000001  
Epoch: [77][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.4986(0.6433) Grad: 26879.6270  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.7025(0.7025) 


Epoch 77 - avg_train_loss: 0.6433  avg_val_loss: 0.7347  time: 129s
Epoch 77 - avg_train_Score: 0.6433 avgScore: 0.7347


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7364(0.7347) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [78][0/542] Elapsed 0m 1s (remain 11m 45s) Loss: 0.4147(0.4147) Grad: 111044.5391  LR: 0.000001  
Epoch: [78][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5633(0.6456) Grad: 87460.7969  LR: 0.000001  
Epoch: [78][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4991(0.6445) Grad: 57756.6094  LR: 0.000001  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.7020(0.7020) 


Epoch 78 - avg_train_loss: 0.6445  avg_val_loss: 0.7344  time: 130s
Epoch 78 - avg_train_Score: 0.6445 avgScore: 0.7344


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7352(0.7344) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [79][0/542] Elapsed 0m 1s (remain 11m 40s) Loss: 0.8918(0.8918) Grad: 146506.1406  LR: 0.000000  
Epoch: [79][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6066(0.6542) Grad: 73237.0156  LR: 0.000000  
Epoch: [79][541/542] Elapsed 1m 49s (remain 0m 0s) Loss: 0.5984(0.6523) Grad: 74951.4844  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.7014(0.7014) 


Epoch 79 - avg_train_loss: 0.6523  avg_val_loss: 0.7338  time: 129s
Epoch 79 - avg_train_Score: 0.6523 avgScore: 0.7338
Epoch 79 - Save Best Score: 0.7338 Model
Epoch 79 - Save Best Loss: 0.7338 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.7348(0.7338) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [80][0/542] Elapsed 0m 1s (remain 12m 18s) Loss: 0.5394(0.5394) Grad: 160341.2812  LR: 0.000000  
Epoch: [80][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5798(0.6332) Grad: 73028.6641  LR: 0.000000  
Epoch: [80][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7975(0.6353) Grad: 336079.0000  LR: 0.000000  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.7025(0.7025) 


Epoch 80 - avg_train_loss: 0.6353  avg_val_loss: 0.7336  time: 130s
Epoch 80 - avg_train_Score: 0.6353 avgScore: 0.7336
Epoch 80 - Save Best Score: 0.7336 Model
Epoch 80 - Save Best Loss: 0.7336 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.7345(0.7336) 


  check_point = torch.load(
score: 0.7336


check_point_pred shape (8674, 18)
pretrained: True


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [1][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 5.3372(5.3372) Grad: 270340.9375  LR: 0.000010  
Epoch: [1][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 5.2101(5.2298) Grad: 574537.0000  LR: 0.000010  
Epoch: [1][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 4.7605(5.2078) Grad: 87320.1484  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 4.8697(4.8697) 


Epoch 1 - avg_train_loss: 5.2078  avg_val_loss: 4.9107  time: 130s
Epoch 1 - avg_train_Score: 5.2078 avgScore: 4.9107
Epoch 1 - Save Best Score: 4.9107 Model
Epoch 1 - Save Best Loss: 4.9107 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 4.3563(4.9107) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [2][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 4.6571(4.6571) Grad: 440863.4375  LR: 0.000010  
Epoch: [2][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 4.0847(4.7544) Grad: 309410.8750  LR: 0.000010  
Epoch: [2][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 4.7157(4.7414) Grad: 33993.7812  LR: 0.000010  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 4.3355(4.3355) 


Epoch 2 - avg_train_loss: 4.7414  avg_val_loss: 4.3717  time: 130s
Epoch 2 - avg_train_Score: 4.7414 avgScore: 4.3717
Epoch 2 - Save Best Score: 4.3717 Model
Epoch 2 - Save Best Loss: 4.3717 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 3.8609(4.3717) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):


Epoch: [3][0/542] Elapsed 0m 1s (remain 11m 50s) Loss: 4.4641(4.4641) Grad: 259481.0000  LR: 0.000100  
Epoch: [3][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 2.1117(3.2740) Grad: 35189.3047  LR: 0.000100  
Epoch: [3][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.4896(3.1574) Grad: 29826.0059  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 2.3266(2.3266) 


  _warn_get_lr_called_within_step(self)
Epoch 3 - avg_train_loss: 3.1574  avg_val_loss: 2.2423  time: 130s
Epoch 3 - avg_train_Score: 3.1574 avgScore: 2.2423
Epoch 3 - Save Best Score: 2.2423 Model
Epoch 3 - Save Best Loss: 2.2423 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 1.9797(2.2423) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [4][0/542] Elapsed 0m 1s (remain 11m 59s) Loss: 1.6354(1.6354) Grad: 406720.4688  LR: 0.000100  
Epoch: [4][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.5033(1.5051) Grad: 64718.8047  LR: 0.000100  
Epoch: [4][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2155(1.4938) Grad: 106364.0547  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 1.1489(1.1489) 


Epoch 4 - avg_train_loss: 1.4938  avg_val_loss: 1.1830  time: 130s
Epoch 4 - avg_train_Score: 1.4938 avgScore: 1.1830
Epoch 4 - Save Best Score: 1.1830 Model
Epoch 4 - Save Best Loss: 1.1830 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.1272(1.1830) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [5][0/542] Elapsed 0m 1s (remain 13m 54s) Loss: 1.1717(1.1717) Grad: 500891.8438  LR: 0.000100  
Epoch: [5][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0034(1.2794) Grad: 202989.2031  LR: 0.000100  
Epoch: [5][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0308(1.2748) Grad: 497462.7812  LR: 0.000100  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 1.0093(1.0093) 


Epoch 5 - avg_train_loss: 1.2748  avg_val_loss: 1.0540  time: 130s
Epoch 5 - avg_train_Score: 1.2748 avgScore: 1.0540
Epoch 5 - Save Best Score: 1.0540 Model
Epoch 5 - Save Best Loss: 1.0540 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.1383(1.0540) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [6][0/542] Elapsed 0m 1s (remain 12m 17s) Loss: 1.1140(1.1140) Grad: 616863.4375  LR: 0.000099  
Epoch: [6][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1927(1.2252) Grad: 203544.1250  LR: 0.000099  
Epoch: [6][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0581(1.2238) Grad: 216120.8125  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.9832(0.9832) 


Epoch 6 - avg_train_loss: 1.2238  avg_val_loss: 1.0153  time: 130s
Epoch 6 - avg_train_Score: 1.2238 avgScore: 1.0153
Epoch 6 - Save Best Score: 1.0153 Model
Epoch 6 - Save Best Loss: 1.0153 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.1193(1.0153) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [7][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 1.2351(1.2351) Grad: 351384.7812  LR: 0.000099  
Epoch: [7][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0717(1.1612) Grad: 178045.9531  LR: 0.000099  
Epoch: [7][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8689(1.1620) Grad: 236717.8594  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.9876(0.9876) 


Epoch 7 - avg_train_loss: 1.1620  avg_val_loss: 0.9712  time: 130s
Epoch 7 - avg_train_Score: 1.1620 avgScore: 0.9712
Epoch 7 - Save Best Score: 0.9712 Model
Epoch 7 - Save Best Loss: 0.9712 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.1112(0.9712) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [8][0/542] Elapsed 0m 1s (remain 11m 52s) Loss: 1.0255(1.0255) Grad: 301079.8438  LR: 0.000099  
Epoch: [8][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1962(1.1332) Grad: 88520.8984  LR: 0.000099  
Epoch: [8][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.7079(1.1316) Grad: 58965.9805  LR: 0.000099  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.9568(0.9568) 


Epoch 8 - avg_train_loss: 1.1316  avg_val_loss: 0.9541  time: 130s
Epoch 8 - avg_train_Score: 1.1316 avgScore: 0.9541
Epoch 8 - Save Best Score: 0.9541 Model
Epoch 8 - Save Best Loss: 0.9541 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0896(0.9541) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [9][0/542] Elapsed 0m 1s (remain 11m 43s) Loss: 1.1444(1.1444) Grad: 269068.6562  LR: 0.000098  
Epoch: [9][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.1971(1.0428) Grad: 365784.6562  LR: 0.000098  
Epoch: [9][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9074(1.0391) Grad: 148557.1094  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 40s) Loss: 0.9137(0.9137) 


Epoch 9 - avg_train_loss: 1.0391  avg_val_loss: 0.9172  time: 130s
Epoch 9 - avg_train_Score: 1.0391 avgScore: 0.9172
Epoch 9 - Save Best Score: 0.9172 Model
Epoch 9 - Save Best Loss: 0.9172 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0208(0.9172) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [10][0/542] Elapsed 0m 1s (remain 13m 40s) Loss: 0.8811(0.8811) Grad: 356856.7500  LR: 0.000098  
Epoch: [10][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9703(1.0167) Grad: 178099.5312  LR: 0.000098  
Epoch: [10][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1315(1.0155) Grad: 154683.0469  LR: 0.000098  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.8865(0.8865) 


Epoch 10 - avg_train_loss: 1.0155  avg_val_loss: 0.9051  time: 130s
Epoch 10 - avg_train_Score: 1.0155 avgScore: 0.9051
Epoch 10 - Save Best Score: 0.9051 Model
Epoch 10 - Save Best Loss: 0.9051 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0134(0.9051) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [11][0/542] Elapsed 0m 1s (remain 11m 53s) Loss: 1.1388(1.1388) Grad: 344572.9688  LR: 0.000097  
Epoch: [11][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 1.3052(0.9697) Grad: 184444.6719  LR: 0.000097  
Epoch: [11][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.2452(0.9727) Grad: 182103.0000  LR: 0.000097  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8770(0.8770) 


Epoch 11 - avg_train_loss: 0.9727  avg_val_loss: 0.8854  time: 130s
Epoch 11 - avg_train_Score: 0.9727 avgScore: 0.8854
Epoch 11 - Save Best Score: 0.8854 Model
Epoch 11 - Save Best Loss: 0.8854 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0104(0.8854) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [12][0/542] Elapsed 0m 1s (remain 12m 6s) Loss: 0.8666(0.8666) Grad: 276320.4062  LR: 0.000096  
Epoch: [12][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8010(0.9553) Grad: 148363.4844  LR: 0.000096  
Epoch: [12][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8328(0.9545) Grad: 118490.6953  LR: 0.000096  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 46s) Loss: 0.8710(0.8710) 


Epoch 12 - avg_train_loss: 0.9545  avg_val_loss: 0.8776  time: 130s
Epoch 12 - avg_train_Score: 0.9545 avgScore: 0.8776
Epoch 12 - Save Best Score: 0.8776 Model
Epoch 12 - Save Best Loss: 0.8776 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9896(0.8776) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [13][0/542] Elapsed 0m 1s (remain 11m 44s) Loss: 0.8691(0.8691) Grad: 465187.3750  LR: 0.000095  
Epoch: [13][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9125(0.9631) Grad: 168990.2500  LR: 0.000095  
Epoch: [13][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9625(0.9679) Grad: 127682.8516  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8900(0.8900) 


Epoch 13 - avg_train_loss: 0.9679  avg_val_loss: 0.8721  time: 130s
Epoch 13 - avg_train_Score: 0.9679 avgScore: 0.8721
Epoch 13 - Save Best Score: 0.8721 Model
Epoch 13 - Save Best Loss: 0.8721 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 1.0347(0.8721) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [14][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 1.0236(1.0236) Grad: 236067.4688  LR: 0.000095  
Epoch: [14][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8409(0.9082) Grad: 210712.0938  LR: 0.000095  
Epoch: [14][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8631(0.9081) Grad: 243886.5625  LR: 0.000095  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 43s) Loss: 0.8581(0.8581) 


Epoch 14 - avg_train_loss: 0.9081  avg_val_loss: 0.8608  time: 130s
Epoch 14 - avg_train_Score: 0.9081 avgScore: 0.8608
Epoch 14 - Save Best Score: 0.8608 Model
Epoch 14 - Save Best Loss: 0.8608 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9890(0.8608) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [15][0/542] Elapsed 0m 1s (remain 11m 47s) Loss: 0.8079(0.8079) Grad: 275514.2188  LR: 0.000094  
Epoch: [15][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.7789(0.9246) Grad: 164337.8594  LR: 0.000094  
Epoch: [15][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9966(0.9243) Grad: 219785.4688  LR: 0.000094  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 39s) Loss: 0.8455(0.8455) 


Epoch 15 - avg_train_loss: 0.9243  avg_val_loss: 0.8511  time: 130s
Epoch 15 - avg_train_Score: 0.9243 avgScore: 0.8511
Epoch 15 - Save Best Score: 0.8511 Model
Epoch 15 - Save Best Loss: 0.8511 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9628(0.8511) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [16][0/542] Elapsed 0m 1s (remain 11m 54s) Loss: 0.8546(0.8546) Grad: 343621.3750  LR: 0.000093  
Epoch: [16][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.2213(0.8845) Grad: 97541.2500  LR: 0.000093  
Epoch: [16][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8137(0.8869) Grad: 171472.1875  LR: 0.000093  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8569(0.8569) 


Epoch 16 - avg_train_loss: 0.8869  avg_val_loss: 0.8448  time: 130s
Epoch 16 - avg_train_Score: 0.8869 avgScore: 0.8448
Epoch 16 - Save Best Score: 0.8448 Model
Epoch 16 - Save Best Loss: 0.8448 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.9688(0.8448) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [17][0/542] Elapsed 0m 1s (remain 13m 2s) Loss: 0.9835(0.9835) Grad: 210513.0938  LR: 0.000092  
Epoch: [17][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8998(0.8613) Grad: 217200.5625  LR: 0.000092  
Epoch: [17][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6895(0.8609) Grad: 305503.9688  LR: 0.000092  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8339(0.8339) 


Epoch 17 - avg_train_loss: 0.8609  avg_val_loss: 0.8385  time: 130s
Epoch 17 - avg_train_Score: 0.8609 avgScore: 0.8385
Epoch 17 - Save Best Score: 0.8385 Model
Epoch 17 - Save Best Loss: 0.8385 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9702(0.8385) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [18][0/542] Elapsed 0m 1s (remain 12m 2s) Loss: 0.7888(0.7888) Grad: 214019.5469  LR: 0.000091  
Epoch: [18][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7849(0.9621) Grad: 83774.8594  LR: 0.000091  
Epoch: [18][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.0082(0.9607) Grad: 110486.1797  LR: 0.000091  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8953(0.8953) 


Epoch 18 - avg_train_loss: 0.9607  avg_val_loss: 0.8621  time: 130s
Epoch 18 - avg_train_Score: 0.9607 avgScore: 0.8621


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 1.0247(0.8621) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [19][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.9583(0.9583) Grad: 238421.4375  LR: 0.000089  
Epoch: [19][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7426(0.8941) Grad: 115023.2969  LR: 0.000089  
Epoch: [19][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9631(0.8923) Grad: 85088.5703  LR: 0.000089  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8764(0.8764) 


Epoch 19 - avg_train_loss: 0.8923  avg_val_loss: 0.8488  time: 130s
Epoch 19 - avg_train_Score: 0.8923 avgScore: 0.8488


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9835(0.8488) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [20][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 0.8404(0.8404) Grad: 296959.1875  LR: 0.000088  
Epoch: [20][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.8793(0.8441) Grad: 151789.0000  LR: 0.000088  
Epoch: [20][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7018(0.8459) Grad: 143102.6094  LR: 0.000088  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.8492(0.8492) 


Epoch 20 - avg_train_loss: 0.8459  avg_val_loss: 0.8349  time: 130s
Epoch 20 - avg_train_Score: 0.8459 avgScore: 0.8349
Epoch 20 - Save Best Score: 0.8349 Model
Epoch 20 - Save Best Loss: 0.8349 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9412(0.8349) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [21][0/542] Elapsed 0m 1s (remain 12m 23s) Loss: 0.7926(0.7926) Grad: 235117.0000  LR: 0.000087  
Epoch: [21][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7552(0.8100) Grad: 267248.1875  LR: 0.000087  
Epoch: [21][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8132(0.8124) Grad: 221397.5312  LR: 0.000087  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 42s) Loss: 0.8356(0.8356) 


Epoch 21 - avg_train_loss: 0.8124  avg_val_loss: 0.8283  time: 130s
Epoch 21 - avg_train_Score: 0.8124 avgScore: 0.8283
Epoch 21 - Save Best Score: 0.8283 Model
Epoch 21 - Save Best Loss: 0.8283 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9487(0.8283) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [22][0/542] Elapsed 0m 1s (remain 12m 3s) Loss: 0.6639(0.6639) Grad: 197009.0625  LR: 0.000085  
Epoch: [22][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6647(0.8064) Grad: 231291.9844  LR: 0.000085  
Epoch: [22][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6992(0.8061) Grad: 265420.5000  LR: 0.000085  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8441(0.8441) 


Epoch 22 - avg_train_loss: 0.8061  avg_val_loss: 0.8230  time: 130s
Epoch 22 - avg_train_Score: 0.8061 avgScore: 0.8230
Epoch 22 - Save Best Score: 0.8230 Model
Epoch 22 - Save Best Loss: 0.8230 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.9159(0.8230) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [23][0/542] Elapsed 0m 1s (remain 13m 58s) Loss: 0.8056(0.8056) Grad: 197272.0938  LR: 0.000084  
Epoch: [23][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8214(0.8144) Grad: 123562.9688  LR: 0.000084  
Epoch: [23][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.3497(0.8189) Grad: 108863.7266  LR: 0.000084  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.8568(0.8568) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9402(0.8216) 


Epoch 23 - avg_train_loss: 0.8189  avg_val_loss: 0.8216  time: 130s
Epoch 23 - avg_train_Score: 0.8189 avgScore: 0.8216
Epoch 23 - Save Best Score: 0.8216 Model
Epoch 23 - Save Best Loss: 0.8216 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [24][0/542] Elapsed 0m 1s (remain 11m 56s) Loss: 0.7389(0.7389) Grad: 280943.2812  LR: 0.000083  
Epoch: [24][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7121(0.7982) Grad: 218419.3125  LR: 0.000083  
Epoch: [24][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8572(0.7971) Grad: 287285.5625  LR: 0.000083  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.8610(0.8610) 


Epoch 24 - avg_train_loss: 0.7971  avg_val_loss: 0.8218  time: 130s
Epoch 24 - avg_train_Score: 0.7971 avgScore: 0.8218


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9518(0.8218) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [25][0/542] Elapsed 0m 1s (remain 12m 22s) Loss: 0.6865(0.6865) Grad: 363261.1562  LR: 0.000081  
Epoch: [25][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.9392(0.8165) Grad: 152045.7188  LR: 0.000081  
Epoch: [25][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7316(0.8195) Grad: 116373.8516  LR: 0.000081  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8313(0.8313) 


Epoch 25 - avg_train_loss: 0.8195  avg_val_loss: 0.8261  time: 130s
Epoch 25 - avg_train_Score: 0.8195 avgScore: 0.8261


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.9398(0.8261) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [26][0/542] Elapsed 0m 1s (remain 12m 0s) Loss: 0.9662(0.9662) Grad: 203161.2969  LR: 0.000079  
Epoch: [26][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8505(0.8033) Grad: 127265.0000  LR: 0.000079  
Epoch: [26][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7129(0.8015) Grad: 109464.8594  LR: 0.000079  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 34s) Loss: 0.8306(0.8306) 


Epoch 26 - avg_train_loss: 0.8015  avg_val_loss: 0.8196  time: 130s
Epoch 26 - avg_train_Score: 0.8015 avgScore: 0.8196
Epoch 26 - Save Best Score: 0.8196 Model
Epoch 26 - Save Best Loss: 0.8196 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9345(0.8196) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [27][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.6674(0.6674) Grad: 214852.4844  LR: 0.000078  
Epoch: [27][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6937(0.7833) Grad: 233405.8125  LR: 0.000078  
Epoch: [27][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7917(0.7853) Grad: 190634.9062  LR: 0.000078  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8149(0.8149) 


Epoch 27 - avg_train_loss: 0.7853  avg_val_loss: 0.8128  time: 130s
Epoch 27 - avg_train_Score: 0.7853 avgScore: 0.8128
Epoch 27 - Save Best Score: 0.8128 Model
Epoch 27 - Save Best Loss: 0.8128 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8740(0.8128) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [28][0/542] Elapsed 0m 1s (remain 12m 13s) Loss: 0.8243(0.8243) Grad: 220051.9375  LR: 0.000076  
Epoch: [28][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8346(0.7910) Grad: 155514.8594  LR: 0.000076  
Epoch: [28][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9157(0.7944) Grad: 157379.6562  LR: 0.000076  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8177(0.8177) 


Epoch 28 - avg_train_loss: 0.7944  avg_val_loss: 0.8138  time: 130s
Epoch 28 - avg_train_Score: 0.7944 avgScore: 0.8138


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8984(0.8138) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [29][0/542] Elapsed 0m 1s (remain 12m 1s) Loss: 0.7160(0.7160) Grad: 223463.3281  LR: 0.000075  
Epoch: [29][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6913(0.7688) Grad: 227029.3125  LR: 0.000075  
Epoch: [29][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5991(0.7699) Grad: 171562.3281  LR: 0.000075  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8322(0.8322) 


Epoch 29 - avg_train_loss: 0.7699  avg_val_loss: 0.8150  time: 130s
Epoch 29 - avg_train_Score: 0.7699 avgScore: 0.8150


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9026(0.8150) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [30][0/542] Elapsed 0m 1s (remain 12m 12s) Loss: 0.6517(0.6517) Grad: 220964.7188  LR: 0.000073  
Epoch: [30][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6916(0.7621) Grad: 107305.0625  LR: 0.000073  
Epoch: [30][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7484(0.7629) Grad: 92089.7422  LR: 0.000073  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8101(0.8101) 


Epoch 30 - avg_train_loss: 0.7629  avg_val_loss: 0.8122  time: 130s
Epoch 30 - avg_train_Score: 0.7629 avgScore: 0.8122
Epoch 30 - Save Best Score: 0.8122 Model
Epoch 30 - Save Best Loss: 0.8122 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8951(0.8122) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [31][0/542] Elapsed 0m 1s (remain 11m 51s) Loss: 0.6941(0.6941) Grad: 234726.4219  LR: 0.000071  
Epoch: [31][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0882(0.7471) Grad: 192821.0781  LR: 0.000071  
Epoch: [31][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5927(0.7496) Grad: 233986.4844  LR: 0.000071  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 31s) Loss: 0.8127(0.8127) 


Epoch 31 - avg_train_loss: 0.7496  avg_val_loss: 0.8057  time: 130s
Epoch 31 - avg_train_Score: 0.7496 avgScore: 0.8057
Epoch 31 - Save Best Score: 0.8057 Model
Epoch 31 - Save Best Loss: 0.8057 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8979(0.8057) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [32][0/542] Elapsed 0m 1s (remain 12m 34s) Loss: 0.8367(0.8367) Grad: 484758.2812  LR: 0.000069  
Epoch: [32][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.9275(0.7480) Grad: 198083.4062  LR: 0.000069  
Epoch: [32][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.9239(0.7458) Grad: 299590.8750  LR: 0.000069  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 44s) Loss: 0.8649(0.8649) 


Epoch 32 - avg_train_loss: 0.7458  avg_val_loss: 0.8020  time: 130s
Epoch 32 - avg_train_Score: 0.7458 avgScore: 0.8020
Epoch 32 - Save Best Score: 0.8020 Model
Epoch 32 - Save Best Loss: 0.8020 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8560(0.8020) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [33][0/542] Elapsed 0m 1s (remain 12m 10s) Loss: 0.6431(0.6431) Grad: 187838.2500  LR: 0.000067  
Epoch: [33][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7064(0.7504) Grad: 257557.3281  LR: 0.000067  
Epoch: [33][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7070(0.7493) Grad: 219126.7031  LR: 0.000067  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 35s) Loss: 0.8482(0.8482) 
EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8605(0.8024) 


Epoch 33 - avg_train_loss: 0.7493  avg_val_loss: 0.8024  time: 130s
Epoch 33 - avg_train_Score: 0.7493 avgScore: 0.8024
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [34][0/542] Elapsed 0m 1s (remain 12m 23s) Loss: 0.7253(0.7253) Grad: 192788.3906  LR: 0.000066  
Epoch: [34][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8832(0.7361) Grad: 190322.0312  LR: 0.000066  
Epoch: [34][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.5078(0.7394) Grad: 243349.5000  LR: 0.000066  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.8214(0.8214) 


Epoch 34 - avg_train_loss: 0.7394  avg_val_loss: 0.7959  time: 130s
Epoch 34 - avg_train_Score: 0.7394 avgScore: 0.7959
Epoch 34 - Save Best Score: 0.7959 Model
Epoch 34 - Save Best Loss: 0.7959 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8580(0.7959) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [35][0/542] Elapsed 0m 1s (remain 12m 21s) Loss: 0.6248(0.6248) Grad: 205734.3438  LR: 0.000064  
Epoch: [35][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6322(0.7264) Grad: 208511.4219  LR: 0.000064  
Epoch: [35][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5835(0.7259) Grad: 179556.1562  LR: 0.000064  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8151(0.8151) 


Epoch 35 - avg_train_loss: 0.7259  avg_val_loss: 0.8003  time: 130s
Epoch 35 - avg_train_Score: 0.7259 avgScore: 0.8003


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8579(0.8003) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [36][0/542] Elapsed 0m 1s (remain 12m 11s) Loss: 0.6274(0.6274) Grad: 197218.6094  LR: 0.000062  
Epoch: [36][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7066(0.7219) Grad: 163078.4688  LR: 0.000062  
Epoch: [36][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6480(0.7222) Grad: 199424.5938  LR: 0.000062  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8009(0.8009) 


Epoch 36 - avg_train_loss: 0.7222  avg_val_loss: 0.7986  time: 130s
Epoch 36 - avg_train_Score: 0.7222 avgScore: 0.7986


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8818(0.7986) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [37][0/542] Elapsed 0m 1s (remain 12m 19s) Loss: 0.6972(0.6972) Grad: 205828.4531  LR: 0.000060  
Epoch: [37][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.6340(0.7287) Grad: 207517.1094  LR: 0.000060  
Epoch: [37][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5996(0.7261) Grad: 208299.6250  LR: 0.000060  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 33s) Loss: 0.8004(0.8004) 


Epoch 37 - avg_train_loss: 0.7261  avg_val_loss: 0.7961  time: 130s
Epoch 37 - avg_train_Score: 0.7261 avgScore: 0.7961


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8746(0.7961) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [38][0/542] Elapsed 0m 1s (remain 12m 4s) Loss: 0.5454(0.5454) Grad: 164237.6875  LR: 0.000058  
Epoch: [38][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6675(0.7214) Grad: 219648.0938  LR: 0.000058  
Epoch: [38][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6543(0.7211) Grad: 186860.8281  LR: 0.000058  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8099(0.8099) 


Epoch 38 - avg_train_loss: 0.7211  avg_val_loss: 0.7985  time: 130s
Epoch 38 - avg_train_Score: 0.7211 avgScore: 0.7985


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8860(0.7985) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [39][0/542] Elapsed 0m 1s (remain 13m 14s) Loss: 0.5759(0.5759) Grad: 193585.5312  LR: 0.000056  
Epoch: [39][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0637(0.7211) Grad: 185712.0156  LR: 0.000056  
Epoch: [39][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 1.1023(0.7215) Grad: 110963.8203  LR: 0.000056  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 38s) Loss: 0.8116(0.8116) 


Epoch 39 - avg_train_loss: 0.7215  avg_val_loss: 0.8012  time: 130s
Epoch 39 - avg_train_Score: 0.7215 avgScore: 0.8012


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8998(0.8012) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [40][0/542] Elapsed 0m 1s (remain 12m 8s) Loss: 0.6360(0.6360) Grad: 178660.4531  LR: 0.000054  
Epoch: [40][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6328(0.7166) Grad: 192951.4062  LR: 0.000054  
Epoch: [40][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5238(0.7167) Grad: 232345.5625  LR: 0.000054  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8076(0.8076) 


Epoch 40 - avg_train_loss: 0.7167  avg_val_loss: 0.7972  time: 130s
Epoch 40 - avg_train_Score: 0.7167 avgScore: 0.7972


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8853(0.7972) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [41][0/542] Elapsed 0m 1s (remain 12m 20s) Loss: 0.6492(0.6492) Grad: 161367.8438  LR: 0.000052  
Epoch: [41][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.8402(0.7187) Grad: 195882.2188  LR: 0.000052  
Epoch: [41][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7697(0.7197) Grad: 173656.5938  LR: 0.000052  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.8102(0.8102) 


Epoch 41 - avg_train_loss: 0.7197  avg_val_loss: 0.7934  time: 130s
Epoch 41 - avg_train_Score: 0.7197 avgScore: 0.7934
Epoch 41 - Save Best Score: 0.7934 Model
Epoch 41 - Save Best Loss: 0.7934 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8954(0.7934) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [42][0/542] Elapsed 0m 1s (remain 13m 5s) Loss: 0.6744(0.6744) Grad: 170772.3750  LR: 0.000050  
Epoch: [42][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0588(0.7089) Grad: 168135.5156  LR: 0.000050  
Epoch: [42][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5643(0.7107) Grad: 336252.5000  LR: 0.000050  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8164(0.8164) 


Epoch 42 - avg_train_loss: 0.7107  avg_val_loss: 0.7966  time: 130s
Epoch 42 - avg_train_Score: 0.7107 avgScore: 0.7966


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9043(0.7966) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [43][0/542] Elapsed 0m 1s (remain 12m 5s) Loss: 0.7933(0.7933) Grad: 190424.6719  LR: 0.000048  
Epoch: [43][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7284(0.7215) Grad: 110908.5391  LR: 0.000048  
Epoch: [43][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.8669(0.7186) Grad: 116318.9531  LR: 0.000048  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8256(0.8256) 


Epoch 43 - avg_train_loss: 0.7186  avg_val_loss: 0.7929  time: 130s
Epoch 43 - avg_train_Score: 0.7186 avgScore: 0.7929
Epoch 43 - Save Best Score: 0.7929 Model
Epoch 43 - Save Best Loss: 0.7929 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8669(0.7929) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [44][0/542] Elapsed 0m 1s (remain 12m 20s) Loss: 0.5325(0.5325) Grad: 147482.7031  LR: 0.000046  
Epoch: [44][500/542] Elapsed 1m 41s (remain 0m 8s) Loss: 0.5079(0.6975) Grad: 170178.6562  LR: 0.000046  
Epoch: [44][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5080(0.6981) Grad: 170345.6562  LR: 0.000046  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 32s) Loss: 0.8011(0.8011) 


Epoch 44 - avg_train_loss: 0.6981  avg_val_loss: 0.7899  time: 130s
Epoch 44 - avg_train_Score: 0.6981 avgScore: 0.7899
Epoch 44 - Save Best Score: 0.7899 Model
Epoch 44 - Save Best Loss: 0.7899 Model


EVAL: [67/68] Elapsed 0m 18s (remain 0m 0s) Loss: 0.8888(0.7899) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [45][0/542] Elapsed 0m 1s (remain 12m 14s) Loss: 1.2835(1.2835) Grad: 141175.6719  LR: 0.000044  
Epoch: [45][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.6858(0.6907) Grad: 177324.0469  LR: 0.000044  
Epoch: [45][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.5641(0.6920) Grad: 213248.7344  LR: 0.000044  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.8159(0.8159) 


Epoch 45 - avg_train_loss: 0.6920  avg_val_loss: 0.7901  time: 130s
Epoch 45 - avg_train_Score: 0.6920 avgScore: 0.7901


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8871(0.7901) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [46][0/542] Elapsed 0m 1s (remain 12m 7s) Loss: 0.9358(0.9358) Grad: 160981.0312  LR: 0.000042  
Epoch: [46][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 1.0477(0.7008) Grad: 86210.2578  LR: 0.000042  
Epoch: [46][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.7362(0.6996) Grad: 82459.8750  LR: 0.000042  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 41s) Loss: 0.8309(0.8309) 


Epoch 46 - avg_train_loss: 0.6996  avg_val_loss: 0.7901  time: 130s
Epoch 46 - avg_train_Score: 0.6996 avgScore: 0.7901


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.9119(0.7901) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [47][0/542] Elapsed 0m 1s (remain 12m 15s) Loss: 0.4708(0.4708) Grad: 214736.5469  LR: 0.000040  
Epoch: [47][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.5498(0.6940) Grad: 70399.8438  LR: 0.000040  
Epoch: [47][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.4930(0.6940) Grad: 90811.4141  LR: 0.000040  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 37s) Loss: 0.8203(0.8203) 


Epoch 47 - avg_train_loss: 0.6940  avg_val_loss: 0.7917  time: 130s
Epoch 47 - avg_train_Score: 0.6940 avgScore: 0.7917


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8821(0.7917) 


  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [48][0/542] Elapsed 0m 1s (remain 13m 9s) Loss: 0.6322(0.6322) Grad: 317044.0000  LR: 0.000038  
Epoch: [48][500/542] Elapsed 1m 42s (remain 0m 8s) Loss: 0.7708(0.6795) Grad: 192060.1094  LR: 0.000038  
Epoch: [48][541/542] Elapsed 1m 50s (remain 0m 0s) Loss: 0.6130(0.6794) Grad: 179846.8594  LR: 0.000038  
EVAL: [0/68] Elapsed 0m 1s (remain 1m 36s) Loss: 0.7947(0.7947) 


Epoch 48 - avg_train_loss: 0.6794  avg_val_loss: 0.7887  time: 130s
Epoch 48 - avg_train_Score: 0.6794 avgScore: 0.7887
Epoch 48 - Save Best Score: 0.7887 Model


EVAL: [67/68] Elapsed 0m 19s (remain 0m 0s) Loss: 0.8604(0.7887) 


Epoch 48 - Save Best Loss: 0.7887 Model
  scaler = GradScaler(enabled=CFG.use_amp)
  with autocast(CFG.use_amp):
  _warn_get_lr_called_within_step(self)


Epoch: [49][0/542] Elapsed 0m 1s (remain 11m 50s) Loss: 1.0522(1.0522) Grad: 171171.9062  LR: 0.000037  


KeyboardInterrupt: 

# inference

In [22]:
class EnsembleModel:
    def __init__(self):
        self.models = []

    def __call__(self, x):
        outputs = []
        for model in self.models:
            if CFG.objective_cv == 'binary':
                outputs.append(torch.sigmoid(model(x)).to('cpu').numpy())
            elif CFG.objective_cv == 'multiclass':
                outputs.append(torch.softmax(
                    model(x), axis=1).to('cpu').numpy())
            elif CFG.objective_cv == 'regression':
                outputs.append(model(x).to('cpu').numpy())

        avg_preds = np.mean(outputs, axis=0)
        return avg_preds

    def add_model(self, model):
        self.models.append(model)


def test_fn(valid_loader, model, device):
    preds = []

    for step, (images) in tqdm(enumerate(valid_loader), total=len(valid_loader)):
        images = images.to(device)

        with torch.no_grad():
            y_preds = model(images)

        preds.append(y_preds)

    predictions = np.concatenate(preds)
    return predictions


def inference():
    test = pd.read_csv(CFG.comp_dataset_path +
                       'test_features.csv')

    test['base_path'] = CFG.comp_dataset_path + 'images/' + test['ID'] + '/'

    paths = []
    for base_path in test['base_path'].values:
        suffixs = ['image_t-1.0.png', 'image_t-0.5.png', 'image_t.png']
        for suffix in suffixs:
            path = base_path + suffix
            paths.append(path)

    print(paths[:5])

    CFG.video_cache = make_video_cache(paths)

    print(test.head(5))

    valid_dataset = CustomDataset(
        test, CFG, transform=get_transforms(data='valid', cfg=CFG))
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size * 2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    model = EnsembleModel()
    folds = [0] if CFG.use_holdout else list(range(CFG.n_fold))
    for fold in folds:
        _model = CustomModel(CFG, pretrained=False)
        _model.to(device)

        model_path = CFG.model_dir + \
            f'{CFG.model_name}_fold{fold}_{CFG.inf_weight}.pth'
        print('load', model_path)
        state = torch.load(model_path)['model']
        _model.load_state_dict(state)
        _model.eval()

        # _model = tta.ClassificationTTAWrapper(
        #     _model, tta.aliases.five_crop_transform(256, 256))

        model.add_model(_model)

    preds = test_fn(valid_loader, model, device)

    test[CFG.target_col] = preds
    test.to_csv(CFG.submission_dir +
                'submission_oof.csv', index=False)
    test[CFG.target_col].to_csv(
        CFG.submission_dir + f'submission_{CFG.exp_name}.csv', index=False)

In [23]:
inference()

['../raw/atmacup_18_dataset/images/012baccc145d400c896cb82065a93d42_120/image_t-1.0.png', '../raw/atmacup_18_dataset/images/012baccc145d400c896cb82065a93d42_120/image_t-0.5.png', '../raw/atmacup_18_dataset/images/012baccc145d400c896cb82065a93d42_120/image_t.png', '../raw/atmacup_18_dataset/images/012baccc145d400c896cb82065a93d42_220/image_t-1.0.png', '../raw/atmacup_18_dataset/images/012baccc145d400c896cb82065a93d42_220/image_t-0.5.png']
[255, 227, 199, 170, 142, 114, 85, 57, 29]


0it [00:00, ?it/s]

                                     ID      vEgo      aEgo  steeringAngleDeg  \
0  012baccc145d400c896cb82065a93d42_120  3.374273 -0.019360        -34.008415   
1  012baccc145d400c896cb82065a93d42_220  2.441048 -0.022754        307.860077   
2  012baccc145d400c896cb82065a93d42_320  3.604152 -0.286239         10.774388   
3  012baccc145d400c896cb82065a93d42_420  2.048902 -0.537628         61.045235   
4  01d738e799d260a10f6324f78023b38f_120  2.201528 -1.898600          5.740093   

   steeringTorque  brake  brakePressed  gas  gasPressed gearShifter  \
0            17.0    0.0         False  0.0       False       drive   
1           295.0    0.0          True  0.0       False       drive   
2          -110.0    0.0          True  0.0       False       drive   
3           189.0    0.0          True  0.0       False       drive   
4           -41.0    0.0          True  0.0       False       drive   

   leftBlinker  rightBlinker  \
0        False         False   
1        False        

  state = torch.load(model_path)['model']


load ../proc/baseline/outputs/atmacup_18_cnn_swin_small_2/atmacup_18-models/swin_small_patch4_window7_224_fold1_last.pth
pretrained: False
load ../proc/baseline/outputs/atmacup_18_cnn_swin_small_2/atmacup_18-models/swin_small_patch4_window7_224_fold2_last.pth
pretrained: False
load ../proc/baseline/outputs/atmacup_18_cnn_swin_small_2/atmacup_18-models/swin_small_patch4_window7_224_fold3_last.pth
pretrained: False
load ../proc/baseline/outputs/atmacup_18_cnn_swin_small_2/atmacup_18-models/swin_small_patch4_window7_224_fold4_last.pth


  0%|          | 0/14 [00:00<?, ?it/s]