# Library

In [1]:
! pip install wandb opencv-python-headless==4.1.2.30 albumentations torch-summary timm==0.5.4 einops joblib icecream  -qq -U

[K     |████████████████████████████████| 1.7 MB 13.2 MB/s 
[K     |████████████████████████████████| 21.8 MB 1.5 MB/s 
[K     |████████████████████████████████| 102 kB 49.7 MB/s 
[K     |████████████████████████████████| 431 kB 55.2 MB/s 
[K     |████████████████████████████████| 180 kB 48.2 MB/s 
[K     |████████████████████████████████| 97 kB 5.9 MB/s 
[K     |████████████████████████████████| 142 kB 48.1 MB/s 
[K     |████████████████████████████████| 63 kB 1.7 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [2]:
from sklearn.metrics import f1_score
from glob import glob
import pathlib
from pathlib import Path
from torchsummary import summary
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from pprint import pprint
import urllib.request
import csv
import numpy as np
from einops import rearrange, reduce, repeat
from torch.cuda import amp
from tqdm import tqdm
import wandb
import time
import copy
from collections import defaultdict
from sklearn.metrics import mean_squared_error
import joblib
import gc
import os
from icecream import ic
from sklearn.model_selection import train_test_split
import gc
import cv2
import copy
import time
import random
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

import timm

import json

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


from sklearn.metrics import f1_score

# ENV

In [3]:

# ENV = 'COLAB'
ENV = 'KAGGLE'
# ENV = 'SYSTEM'

# Option for Mixed Precision
# FP16 = True
FP16 = False


CONFIG = dict(
    nickname = 'BEiT 224 in22k',
    seed=42,
    backbone='beit_base_patch16_224_in22k',
    embedder=None,
    train_batch_size=8,
    valid_batch_size=16,
    img_size=224,
    num_epochs=50,
    early_stopping = False,
    early_stopping_step = 5,
    learning_rate=1e-4,
    scheduler='CosineAnnealingLR',
    min_lr=1e-6,
    T_max=100,
    num_classes = 25,
    weight_decay=1e-6,
    device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    competition='lg',
    _wandb_kernel='deb'
)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# SET SEED 

In [4]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)


set_seed(CONFIG['seed'])


# Read the Data


In [5]:
import wandb
run = wandb.init(project="lg", entity="jiwon7258",
                 config=CONFIG, job_type='train', resume=True)
dataset = wandb.run.use_artifact(
    'jiwon7258/lg/lg_train:v0', type='dataset')

run.name = CONFIG['nickname']

# Download the artifact's contents
dataset_dir = dataset.download()
dataset_dir = Path(dataset_dir)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: Downloading large artifact lg_train:v0, 918.24MB. 17301 files... Done. 0:0:0


In [6]:
TRAIN_PATH = dataset_dir
# TEST_PATH = dataset_dir / 'test'

# Augmentations

In [7]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1,
                           rotate_limit=35, p=0.5),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15,
                   b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Normalize(),
        ToTensorV2()], p=1.),

    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(),
        ToTensorV2()], p=1.)
}


# Dataset

In [8]:
train_csv = sorted(glob(str(TRAIN_PATH / '*/*.csv')))
train_jpg = sorted(glob(str(TRAIN_PATH / '*/*.jpg')))
train_json = sorted(glob(str(TRAIN_PATH / '*/*.json')))


crops = []
diseases = []
risks = []
labels = []

for i in range(len(train_json)):
    with open(train_json[i], 'r') as f:
        sample = json.load(f)
        crop = sample['annotations']['crop']
        disease = sample['annotations']['disease']
        risk = sample['annotations']['risk']
        label=f"{crop}_{disease}_{risk}"
    
        crops.append(crop)
        diseases.append(disease)
        risks.append(risk)
        labels.append(label)
        
label_unique = sorted(np.unique(labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in labels] # len = train_len

In [9]:
train_jpg = np.array(train_jpg)
train_labels = np.array(train_labels)

In [10]:
class CustomDataset(Dataset):
    def __init__(self, train_img, train_label, transforms=None):
        self.imgs = train_img
        self.labels = train_label
        self.transforms = transforms
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, index):
        img_path = self.imgs[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        target = self.labels[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return img, target
    
# trainDataset = CustomDataset(X_train, y_train, transforms = data_transforms['train'])
# trainDataloader = DataLoader(
#     trainDataset, batch_size=CONFIG['train_batch_size'], shuffle=True)

# validDataset = CustomDataset(X_val, y_val, transforms = data_transforms['valid'])
# validDataloader = DataLoader(validDataset, batch_size = CONFIG['valid_batch_size'], shuffle = True)

In [11]:
from sklearn.model_selection import StratifiedKFold
train_datasets = []
valid_datasets = []
train_dataloaders = []
valid_dataloaders = []

skf = StratifiedKFold(n_splits = 5)

for step, (train_index, val_index) in enumerate(skf.split(X = train_jpg, y= train_labels)):
    X_train = train_jpg[train_index]
    y_train = train_labels[train_index]
    X_val = train_jpg[val_index]
    y_val = train_labels[val_index]
    train_datasets.append(CustomDataset(
        X_train, y_train, transforms=data_transforms['train']))
    valid_datasets.append(CustomDataset(
        X_val, y_val, transforms=data_transforms['valid']))
    train_dataloaders.append(DataLoader(
        train_datasets[step], batch_size=CONFIG['train_batch_size'], shuffle=True)
    )
    valid_dataloaders.append(
        DataLoader(
            valid_datasets[step], batch_size=CONFIG['valid_batch_size'], shuffle=True)
    )


# Model

In [12]:
class Model(nn.Module):
    def __init__(self, backbone, embedder, pretrained=True):
        super(Model, self).__init__()
        self.backbone = timm.create_model(backbone, pretrained=pretrained)
        self.backbone.reset_classifier(0)
        self.n_features = self.backbone.num_features
        self.fc = nn.Linear(self.n_features, CONFIG['num_classes'])

    def forward(self, images):
        # features = (bs, embedding_size)
        features = self.backbone(images)
        # outputs  = (bs, num_classes)
        # if isinstance(features, tuple):
        #   features = features[0]
        output = self.fc(features)
        return output


model = Model(CONFIG['backbone'], CONFIG['embedder'], pretrained = True)
model.to(CONFIG['device'])
;

Downloading: "https://unilm.blob.core.windows.net/beit/beit_base_patch16_224_pt22k_ft22k.pth" to /root/.cache/torch/hub/checkpoints/beit_base_patch16_224_pt22k_ft22k.pth


''

In [13]:
optimizer = torch.optim.Adam(
    params=model.parameters(), lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])


In [14]:
def criterion(logits: torch.tensor, targets: torch.tensor):
    return nn.CrossEntropyLoss()(logits.view(-1,CONFIG['num_classes']), targets.view(-1))

   # Training Function

In [15]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    # train 모드로 변경
    model.train()

    # for the Mixed Precision
    # Pytorch 예제 : https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples
    if(FP16):
        scaler = amp.GradScaler()

    dataset_size = 0
    running_loss = 0
    step_acc = 0
    accuracy = 0
    f1 = 0

    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (img, target) in bar:
        img = img.to(device)
        target = target.to(device)

        batch_size = img.shape[0]

        if(FP16):
            with amp.autocast(enabled=True):
                logits = model(img)
                loss = criterion(logits, target)

                # loss를 Scale
                # Scaled Grdients를 계산(call)하기 위해 scaled loss를 backward()
                scaler.scale(loss).backward()
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(optimizer)

                # Updates the scale for next iteration.
                scaler.update()

        else:
            logits = model(img)
            loss = criterion(logits, target)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()

        # logits (bs, seq_len, VOCAB_SIZE)
        # trg_output (bs, seq_len)

        # zero the parameter gradients
        optimizer.zero_grad()

        # change learning rate by Scheduler
        if scheduler is not None:
            scheduler.step()

        # loss.item()은 loss를 Python Float으로 반환
        # loss.item()은 batch data의 average loss이므로, sum of loss를 구하기 위해 batch_size를 곱해준다
        running_loss += loss.item() * batch_size
        dataset_size += batch_size
        
        # logits
        logits = logits.detach().cpu()

        # acc, f1
        output = np.argmax(torch.softmax(logits, dim = -1), axis = -1)
        step_acc = np.mean(
            output.view(-1).numpy() == target.view(-1).detach().cpu().numpy())
        step_f1 = f1_score(output.view(-1).numpy(),
                           target.view(-1).detach().cpu().numpy(),average='macro')
        accuracy += step_acc
        f1 += step_f1

        # loss
        train_loss = running_loss / dataset_size

        bar.set_postfix(
            Epoch=epoch, Train_Loss=train_loss, LR=optimizer.param_groups[0]["lr"], accuracy=accuracy / np.float(
                step+1), f1 = f1 / np.float(step+1)
        )

        # break

    accuracy /= len(dataloader)
    f1 /= len(dataloader)
    # Garbage Collector
    gc.collect()

    return train_loss, accuracy, f1


   # Validation Function

In [16]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()

    dataset_size = 0
    running_loss = 0
    accuracy = 0
    f1 = 0


    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (img, target) in bar:
        img = img.to(device)
        target = target.to(device)

        batch_size = img.shape[0]

        logits = model(img)
        loss = criterion(logits, target)

        running_loss += loss.item() * batch_size
        dataset_size += batch_size

        # 실시간으로 정보를 표시하기 위한 epoch loss
        val_loss = running_loss / dataset_size

        # logits
        logits = logits.detach().cpu()
        
        output = np.argmax(torch.softmax(logits, dim=-1), axis=-1)
        step_acc = np.mean(
            output.view(-1).numpy() == target.view(-1).detach().cpu().numpy())
        step_f1 = f1_score(output.view(-1).numpy(),
                           target.view(-1).detach().cpu().numpy(), average='macro')
        accuracy += step_acc
        f1 += step_f1


        bar.set_postfix(
            Epoch=epoch, Valid_Loss=val_loss, LR=optimizer.param_groups[0]["lr"], accuracy=accuracy / np.float(
                step+1), f1 = f1 / np.float(step+1)
        )

        # break

    accuracy /= len(dataloader)
    f1 /= len(dataloader)

    gc.collect()

    return val_loss, accuracy, f1


In [17]:
START_EPOCH = 0


def run_training(
    model,
    optimizer,
    scheduler,
    device,
    num_epochs,
    metric_prefix="",
    file_prefix="",
    early_stopping=True,
    early_stopping_step=10,
):
    # To automatically log graidents
    wandb.watch(model, log_freq=100)

    if torch.cuda.is_available():
        print("[INFO] Using GPU:{}\n".format(torch.cuda.get_device_name()))

    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf
    history = defaultdict(list)
    early_stop_counter = 0
    global START_EPOCH

    # num_epochs만큼, train과 val을 실행한다
    for epoch in range(START_EPOCH, START_EPOCH + num_epochs):
        gc.collect()

        fold_num = 5
        fold = epoch % fold_num

        # for fold in range(fold_num) :

        trainDataloader = train_dataloaders[fold]
        validDataloader = valid_dataloaders[fold]

        # train_train_loss_list = []
        # train_accuracy_list = []
        # train_f1_list = []
        # val_loss_list = []
        # val_accuracy_list= []
        # val_f1_list = []

        train_train_loss, train_accuracy, train_f1 = train_one_epoch(
            model,
            optimizer,
            scheduler,
            dataloader=trainDataloader,
            device=device,
            epoch=epoch,
        )

        val_loss, val_accuracy, val_f1 = valid_one_epoch(
            model, validDataloader, device=device, epoch=epoch
        )

        #     train_train_loss_list.append(train_train_loss)
        #     train_accuracy_list.append(train_accuracy)
        #     train_f1_list.append(train_f1)
        #     val_loss_list.append(val_loss)
        #     val_accuracy_list.append(val_accuracy)
        #     val_f1_list.append(val_f1)

        # train_train_loss = np.mean(train_train_loss_list)
        # train_accuracy = np.mean(train_accuracy_list)
        # train_f1 = np.mean(train_f1_list)
        # val_loss = np.mean(val_loss_list)
        # val_accuracy = np.mean(val_accuracy_list)
        # val_f1 = np.mean(val_f1_list)

        history[f"{metric_prefix}Train Loss"].append(train_train_loss)
        history[f"{metric_prefix}Train Accuracy"].append(train_accuracy)
        history[f"{metric_prefix}Train F1"].append(train_f1)
        history[f"{metric_prefix}Valid Loss"].append(val_loss)
        history[f"{metric_prefix}Valid Accuracy"].append(val_accuracy)
        history[f"{metric_prefix}Valid F1"].append(val_f1)

        # Log the metrics
        wandb.log(
            {
                f"{metric_prefix}Train Loss": train_train_loss,
                f"{metric_prefix}Valid Loss": val_loss,
                f"{metric_prefix}Train Accuracy": train_accuracy,
                f"{metric_prefix}Valid Accuracy": val_accuracy,
                f"{metric_prefix}Train F1": train_f1,
                f"{metric_prefix}Valid F1": val_f1,
            }
        )

        print(f"Valid Loss : {val_loss}")

        torch.save(model.state_dict(), 'last.bin')
        wandb.save('last.bin')

        # deep copy the model
        if val_loss <= best_loss:
            early_stop_counter = 0

            print(
                f"Validation Loss improved( {best_loss} ---> {val_loss}  )"
            )

            # Update Best Loss
            best_loss = val_loss

            # Update Best Model Weight
            # run.summary['Best RMSE'] = best_loss
            best_model_wts = copy.deepcopy(model.state_dict())

            PATH = "{}epoch{:.0f}_Loss{:.4f}.bin".format(
                file_prefix, epoch, best_loss)
            torch.save(model.state_dict(), PATH)
            torch.save(model.state_dict(),
                       f"{file_prefix}best_{epoch}epoch.bin")
            # Save a model file from the current directory
            wandb.save(PATH)

            print(f"Model Saved")

        elif early_stopping:
            early_stop_counter += 1
            if early_stop_counter > early_stopping_step:
                break

        START_EPOCH = epoch + 1
        # break

    end = time.time()
    time_elapsed = end - start
    print(
        "Training complete in {:.0f}h {:.0f}m {:.0f}s".format(
            time_elapsed // 3600,
            (time_elapsed % 3600) // 60,
            (time_elapsed % 3600) % 60,
        )
    )
    print("Best Loss: {:.4f}".format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, history


In [18]:
# wandb.restore('epoch3_Loss0.1026.bin', 'jiwon7258/lg/2mzc3731', root='./')
# model.load_state_dict(torch.load('epoch3_Loss0.1026.bin',
#                       map_location=CONFIG['device']))


# Fast Start : 10 epoch

In [19]:
for param in model.backbone.named_parameters():
    param[1].requires_grad = False

In [20]:
run_training(
    model=model,
    optimizer=optimizer,
    scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer, T_max=CONFIG['T_max'], eta_min=CONFIG['min_lr']),
    device=device,
    num_epochs=10,
    metric_prefix="",
    file_prefix="",
    early_stopping=CONFIG['early_stopping'],
    early_stopping_step=CONFIG['early_stopping_step'],
);


[INFO] Using GPU:Tesla P100-PCIE-16GB



100%|██████████| 577/577 [01:26<00:00,  6.65it/s, Epoch=0, LR=8.76e-5, Train_Loss=2.39, accuracy=0.387, f1=0.245]
100%|██████████| 73/73 [00:17<00:00,  4.11it/s, Epoch=0, LR=8.76e-5, Valid_Loss=1.81, accuracy=0.532, f1=0.311]


Valid Loss : 1.8113709434066323
Validation Loss improved( inf ---> 1.8113709434066323  )
Model Saved


100%|██████████| 577/577 [01:31<00:00,  6.29it/s, Epoch=1, LR=5.67e-5, Train_Loss=1.75, accuracy=0.541, f1=0.378]
100%|██████████| 73/73 [00:18<00:00,  3.90it/s, Epoch=1, LR=5.67e-5, Valid_Loss=1.4, accuracy=0.616, f1=0.393]


Valid Loss : 1.4017175464944163
Validation Loss improved( 1.8113709434066323 ---> 1.4017175464944163  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.51it/s, Epoch=2, LR=2.27e-5, Train_Loss=1.5, accuracy=0.605, f1=0.439]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=2, LR=2.27e-5, Valid_Loss=1.21, accuracy=0.665, f1=0.465]


Valid Loss : 1.2146024025737567
Validation Loss improved( 1.4017175464944163 ---> 1.2146024025737567  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.53it/s, Epoch=3, LR=2.56e-6, Train_Loss=1.35, accuracy=0.639, f1=0.481]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=3, LR=2.56e-6, Valid_Loss=1.06, accuracy=0.735, f1=0.549]


Valid Loss : 1.0571924845549923
Validation Loss improved( 1.2146024025737567 ---> 1.0571924845549923  )
Model Saved


100%|██████████| 577/577 [01:29<00:00,  6.47it/s, Epoch=4, LR=6.4e-6, Train_Loss=1.25, accuracy=0.662, f1=0.506]
100%|██████████| 73/73 [00:17<00:00,  4.11it/s, Epoch=4, LR=6.4e-6, Valid_Loss=1, accuracy=0.722, f1=0.525]


Valid Loss : 0.9999679840862079
Validation Loss improved( 1.0571924845549923 ---> 0.9999679840862079  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.54it/s, Epoch=5, LR=3.23e-5, Train_Loss=1.17, accuracy=0.684, f1=0.532]
100%|██████████| 73/73 [00:17<00:00,  4.12it/s, Epoch=5, LR=3.23e-5, Valid_Loss=0.889, accuracy=0.759, f1=0.582]


Valid Loss : 0.8885941447583601
Validation Loss improved( 0.9999679840862079 ---> 0.8885941447583601  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.50it/s, Epoch=6, LR=6.73e-5, Train_Loss=1.11, accuracy=0.696, f1=0.546]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=6, LR=6.73e-5, Valid_Loss=0.826, accuracy=0.775, f1=0.604]


Valid Loss : 0.8262378823535075
Validation Loss improved( 0.8885941447583601 ---> 0.8262378823535075  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.50it/s, Epoch=7, LR=9.39e-5, Train_Loss=1.05, accuracy=0.708, f1=0.56]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=7, LR=9.39e-5, Valid_Loss=0.802, accuracy=0.767, f1=0.603]


Valid Loss : 0.8015876318823226
Validation Loss improved( 0.8262378823535075 ---> 0.8015876318823226  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.51it/s, Epoch=8, LR=9.88e-5, Train_Loss=1.03, accuracy=0.704, f1=0.556]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=8, LR=9.88e-5, Valid_Loss=0.73, accuracy=0.804, f1=0.642]


Valid Loss : 0.7303761985905772
Validation Loss improved( 0.8015876318823226 ---> 0.7303761985905772  )
Model Saved


100%|██████████| 577/577 [01:28<00:00,  6.50it/s, Epoch=9, LR=7.96e-5, Train_Loss=0.986, accuracy=0.725, f1=0.581]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=9, LR=7.96e-5, Valid_Loss=0.738, accuracy=0.802, f1=0.654]


Valid Loss : 0.7381006251545648
Training complete in 0h 18m 38s
Best Loss: 0.7304


# Reduced LR (1e-5 ~ 1e-6) : 50epochs (~60)

In [21]:
optimizer = torch.optim.Adam(
    params=model.parameters(), lr=1e-5, weight_decay=CONFIG['weight_decay'])

In [22]:
for param in model.backbone.named_parameters():
    param[1].requires_grad = True


In [23]:
# START_EPOCH=3

In [24]:
run_training(
    model=model,
    optimizer=optimizer,
    scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer, T_max=CONFIG['T_max'], eta_min=CONFIG['min_lr']),
    # scheduler=None,
    device=device,
    num_epochs=CONFIG['num_epochs'],
    metric_prefix="",
    file_prefix="",
    early_stopping=CONFIG['early_stopping'],
    early_stopping_step=CONFIG['early_stopping_step'],
);


[INFO] Using GPU:Tesla P100-PCIE-16GB



100%|██████████| 577/577 [03:31<00:00,  2.73it/s, Epoch=10, LR=8.88e-6, Train_Loss=0.512, accuracy=0.837, f1=0.724]
100%|██████████| 73/73 [00:17<00:00,  4.12it/s, Epoch=10, LR=8.88e-6, Valid_Loss=0.322, accuracy=0.892, f1=0.774]


Valid Loss : 0.3217257613306494
Validation Loss improved( inf ---> 0.3217257613306494  )
Model Saved


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=11, LR=6.06e-6, Train_Loss=0.294, accuracy=0.899, f1=0.812]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=11, LR=6.06e-6, Valid_Loss=0.195, accuracy=0.934, f1=0.843]


Valid Loss : 0.19454379475948302
Validation Loss improved( 0.3217257613306494 ---> 0.19454379475948302  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=12, LR=2.97e-6, Train_Loss=0.208, accuracy=0.928, f1=0.863]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=12, LR=2.97e-6, Valid_Loss=0.135, accuracy=0.951, f1=0.883]


Valid Loss : 0.13472708773937364
Validation Loss improved( 0.19454379475948302 ---> 0.13472708773937364  )
Model Saved


100%|██████████| 577/577 [03:32<00:00,  2.72it/s, Epoch=13, LR=1.14e-6, Train_Loss=0.152, accuracy=0.947, f1=0.897]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=13, LR=1.14e-6, Valid_Loss=0.0913, accuracy=0.969, f1=0.916]


Valid Loss : 0.09126368712016736
Validation Loss improved( 0.13472708773937364 ---> 0.09126368712016736  )
Model Saved


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=14, LR=1.49e-6, Train_Loss=0.119, accuracy=0.957, f1=0.911]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=14, LR=1.49e-6, Valid_Loss=0.0861, accuracy=0.966, f1=0.919]


Valid Loss : 0.08611465217476971
Validation Loss improved( 0.09126368712016736 ---> 0.08611465217476971  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=15, LR=3.84e-6, Train_Loss=0.0993, accuracy=0.962, f1=0.921]
100%|██████████| 73/73 [00:17<00:00,  4.12it/s, Epoch=15, LR=3.84e-6, Valid_Loss=0.0624, accuracy=0.98, f1=0.946]


Valid Loss : 0.0623513181569568
Validation Loss improved( 0.08611465217476971 ---> 0.0623513181569568  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=16, LR=7.02e-6, Train_Loss=0.087, accuracy=0.967, f1=0.931]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=16, LR=7.02e-6, Valid_Loss=0.0654, accuracy=0.974, f1=0.929]


Valid Loss : 0.06539354639925204


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=17, LR=9.44e-6, Train_Loss=0.0733, accuracy=0.975, f1=0.947]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=17, LR=9.44e-6, Valid_Loss=0.0549, accuracy=0.982, f1=0.953]


Valid Loss : 0.05487073176444364
Validation Loss improved( 0.0623513181569568 ---> 0.05487073176444364  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=18, LR=9.89e-6, Train_Loss=0.0603, accuracy=0.977, f1=0.952]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=18, LR=9.89e-6, Valid_Loss=0.0894, accuracy=0.97, f1=0.924]


Valid Loss : 0.0893640659372092


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=19, LR=8.15e-6, Train_Loss=0.0628, accuracy=0.978, f1=0.955]
100%|██████████| 73/73 [00:18<00:00,  4.04it/s, Epoch=19, LR=8.15e-6, Valid_Loss=0.0346, accuracy=0.989, f1=0.971]


Valid Loss : 0.03462866305021978
Validation Loss improved( 0.05487073176444364 ---> 0.03462866305021978  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=20, LR=5.08e-6, Train_Loss=0.0506, accuracy=0.982, f1=0.963]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=20, LR=5.08e-6, Valid_Loss=0.0191, accuracy=0.993, f1=0.98]


Valid Loss : 0.0191366850471417
Validation Loss improved( 0.03462866305021978 ---> 0.0191366850471417  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=21, LR=2.22e-6, Train_Loss=0.0472, accuracy=0.985, f1=0.968]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=21, LR=2.22e-6, Valid_Loss=0.0331, accuracy=0.991, f1=0.977]


Valid Loss : 0.03312692375739865


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=22, LR=1e-6, Train_Loss=0.0386, accuracy=0.989, f1=0.975]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=22, LR=1e-6, Valid_Loss=0.0244, accuracy=0.991, f1=0.978]


Valid Loss : 0.024408119596806572


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=23, LR=2.03e-6, Train_Loss=0.0352, accuracy=0.986, f1=0.971]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=23, LR=2.03e-6, Valid_Loss=0.013, accuracy=0.995, f1=0.988]


Valid Loss : 0.013007138941172895
Validation Loss improved( 0.0191366850471417 ---> 0.013007138941172895  )
Model Saved


100%|██████████| 577/577 [03:34<00:00,  2.69it/s, Epoch=24, LR=4.8e-6, Train_Loss=0.0352, accuracy=0.989, f1=0.975]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=24, LR=4.8e-6, Valid_Loss=0.0166, accuracy=0.992, f1=0.98]


Valid Loss : 0.016592377744862472


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=25, LR=7.91e-6, Train_Loss=0.0307, accuracy=0.989, f1=0.975]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=25, LR=7.91e-6, Valid_Loss=0.0113, accuracy=0.998, f1=0.995]


Valid Loss : 0.011264854829092195
Validation Loss improved( 0.013007138941172895 ---> 0.011264854829092195  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=26, LR=9.82e-6, Train_Loss=0.0226, accuracy=0.992, f1=0.982]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=26, LR=9.82e-6, Valid_Loss=0.0112, accuracy=0.995, f1=0.988]


Valid Loss : 0.011228679909188853
Validation Loss improved( 0.011264854829092195 ---> 0.011228679909188853  )
Model Saved


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=27, LR=9.57e-6, Train_Loss=0.0227, accuracy=0.991, f1=0.982]
100%|██████████| 73/73 [00:17<00:00,  4.12it/s, Epoch=27, LR=9.57e-6, Valid_Loss=0.0104, accuracy=0.994, f1=0.989]


Valid Loss : 0.01040499508485265
Validation Loss improved( 0.011228679909188853 ---> 0.01040499508485265  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=28, LR=7.29e-6, Train_Loss=0.0199, accuracy=0.993, f1=0.985]
100%|██████████| 73/73 [00:17<00:00,  4.11it/s, Epoch=28, LR=7.29e-6, Valid_Loss=0.00459, accuracy=0.998, f1=0.995]


Valid Loss : 0.004589949013407479
Validation Loss improved( 0.01040499508485265 ---> 0.004589949013407479  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=29, LR=4.11e-6, Train_Loss=0.0146, accuracy=0.995, f1=0.989]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=29, LR=4.11e-6, Valid_Loss=0.00237, accuracy=0.999, f1=0.999]


Valid Loss : 0.0023688295129683564
Validation Loss improved( 0.004589949013407479 ---> 0.0023688295129683564  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=30, LR=1.63e-6, Train_Loss=0.0151, accuracy=0.995, f1=0.989]
100%|██████████| 73/73 [00:17<00:00,  4.13it/s, Epoch=30, LR=1.63e-6, Valid_Loss=0.006, accuracy=0.998, f1=0.997]


Valid Loss : 0.00599544918295452


100%|██████████| 577/577 [03:32<00:00,  2.72it/s, Epoch=31, LR=1.08e-6, Train_Loss=0.0174, accuracy=0.995, f1=0.989]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=31, LR=1.08e-6, Valid_Loss=0.00508, accuracy=0.998, f1=0.996]


Valid Loss : 0.005076255401532696


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=32, LR=2.74e-6, Train_Loss=0.0174, accuracy=0.995, f1=0.988]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=32, LR=2.74e-6, Valid_Loss=0.00397, accuracy=0.999, f1=0.998]


Valid Loss : 0.003973823239649145


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=33, LR=5.78e-6, Train_Loss=0.00917, accuracy=0.997, f1=0.995]
100%|██████████| 73/73 [00:17<00:00,  4.11it/s, Epoch=33, LR=5.78e-6, Valid_Loss=0.00192, accuracy=0.999, f1=0.997]


Valid Loss : 0.001923687078034437
Validation Loss improved( 0.0023688295129683564 ---> 0.001923687078034437  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=34, LR=8.68e-6, Train_Loss=0.0134, accuracy=0.996, f1=0.991]
100%|██████████| 73/73 [00:17<00:00,  4.11it/s, Epoch=34, LR=8.68e-6, Valid_Loss=0.00507, accuracy=0.999, f1=0.997]


Valid Loss : 0.005070347238051278


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=35, LR=9.99e-6, Train_Loss=0.00765, accuracy=0.997, f1=0.994]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=35, LR=9.99e-6, Valid_Loss=0.00158, accuracy=0.999, f1=0.998]


Valid Loss : 0.00158095816158315
Validation Loss improved( 0.001923687078034437 ---> 0.00158095816158315  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=36, LR=9.06e-6, Train_Loss=0.0146, accuracy=0.995, f1=0.991]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=36, LR=9.06e-6, Valid_Loss=0.000468, accuracy=1, f1=1]


Valid Loss : 0.00046832370520721715
Validation Loss improved( 0.00158095816158315 ---> 0.00046832370520721715  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=37, LR=6.34e-6, Train_Loss=0.00931, accuracy=0.998, f1=0.997]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=37, LR=6.34e-6, Valid_Loss=8.4e-5, accuracy=1, f1=1]


Valid Loss : 8.403138977687294e-05
Validation Loss improved( 0.00046832370520721715 ---> 8.403138977687294e-05  )
Model Saved


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=38, LR=3.21e-6, Train_Loss=0.00862, accuracy=0.997, f1=0.993]
100%|██████████| 73/73 [00:18<00:00,  4.05it/s, Epoch=38, LR=3.21e-6, Valid_Loss=0.000222, accuracy=1, f1=1]


Valid Loss : 0.0002222036015795538


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=39, LR=1.22e-6, Train_Loss=0.0127, accuracy=0.997, f1=0.993]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=39, LR=1.22e-6, Valid_Loss=0.00433, accuracy=0.999, f1=0.998]


Valid Loss : 0.004330208096478991


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=40, LR=1.37e-6, Train_Loss=0.00976, accuracy=0.997, f1=0.994]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=40, LR=1.37e-6, Valid_Loss=0.00229, accuracy=0.999, f1=0.996]


Valid Loss : 0.002291378714184929


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=41, LR=3.58e-6, Train_Loss=0.00897, accuracy=0.998, f1=0.996]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=41, LR=3.58e-6, Valid_Loss=0.0112, accuracy=0.998, f1=0.996]


Valid Loss : 0.011239966385259545


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=42, LR=6.76e-6, Train_Loss=0.00739, accuracy=0.998, f1=0.996]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=42, LR=6.76e-6, Valid_Loss=0.00347, accuracy=0.999, f1=0.998]


Valid Loss : 0.0034676394619247977


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=43, LR=9.3e-6, Train_Loss=0.007, accuracy=0.998, f1=0.995]
100%|██████████| 73/73 [00:17<00:00,  4.10it/s, Epoch=43, LR=9.3e-6, Valid_Loss=0.00217, accuracy=0.998, f1=0.995]


Valid Loss : 0.002168353585859107


100%|██████████| 577/577 [03:32<00:00,  2.71it/s, Epoch=44, LR=9.94e-6, Train_Loss=0.00757, accuracy=0.998, f1=0.996]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=44, LR=9.94e-6, Valid_Loss=0.00311, accuracy=0.998, f1=0.993]


Valid Loss : 0.003105276160048798


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=45, LR=8.37e-6, Train_Loss=0.015, accuracy=0.996, f1=0.993]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=45, LR=8.37e-6, Valid_Loss=2.35e-5, accuracy=1, f1=1]


Valid Loss : 2.3525116055843903e-05
Validation Loss improved( 8.403138977687294e-05 ---> 2.3525116055843903e-05  )
Model Saved


100%|██████████| 577/577 [03:34<00:00,  2.70it/s, Epoch=46, LR=5.36e-6, Train_Loss=0.0154, accuracy=0.996, f1=0.993]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=46, LR=5.36e-6, Valid_Loss=5.32e-5, accuracy=1, f1=1]


Valid Loss : 5.3243997351795455e-05


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=47, LR=2.42e-6, Train_Loss=0.00511, accuracy=0.998, f1=0.997]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=47, LR=2.42e-6, Valid_Loss=4.89e-5, accuracy=1, f1=1]


Valid Loss : 4.885214592479602e-05


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=48, LR=1.02e-6, Train_Loss=0.00481, accuracy=0.998, f1=0.996]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=48, LR=1.02e-6, Valid_Loss=5.36e-5, accuracy=1, f1=1]


Valid Loss : 5.3623909323754625e-05


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=49, LR=1.86e-6, Train_Loss=0.0176, accuracy=0.995, f1=0.99]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=49, LR=1.86e-6, Valid_Loss=0.00149, accuracy=0.999, f1=0.998]


Valid Loss : 0.001488578301155681


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=50, LR=4.52e-6, Train_Loss=0.0078, accuracy=0.997, f1=0.993]
100%|██████████| 73/73 [00:17<00:00,  4.07it/s, Epoch=50, LR=4.52e-6, Valid_Loss=0.000258, accuracy=1, f1=1]


Valid Loss : 0.0002578216462487104


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=51, LR=7.67e-6, Train_Loss=0.00801, accuracy=0.998, f1=0.995]
100%|██████████| 73/73 [00:18<00:00,  4.05it/s, Epoch=51, LR=7.67e-6, Valid_Loss=0.0153, accuracy=0.997, f1=0.992]


Valid Loss : 0.015252053557007168


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=52, LR=9.73e-6, Train_Loss=0.0144, accuracy=0.997, f1=0.993]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=52, LR=9.73e-6, Valid_Loss=0.00508, accuracy=0.999, f1=0.998]


Valid Loss : 0.005080828317941718


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=53, LR=9.68e-6, Train_Loss=0.00529, accuracy=0.998, f1=0.997]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=53, LR=9.68e-6, Valid_Loss=7.99e-5, accuracy=1, f1=1]


Valid Loss : 7.992474331332506e-05


100%|██████████| 577/577 [03:34<00:00,  2.70it/s, Epoch=54, LR=7.54e-6, Train_Loss=0.00553, accuracy=0.999, f1=0.997]
100%|██████████| 73/73 [00:18<00:00,  4.04it/s, Epoch=54, LR=7.54e-6, Valid_Loss=0.00571, accuracy=0.997, f1=0.994]


Valid Loss : 0.005714325926991291


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=55, LR=4.38e-6, Train_Loss=0.0117, accuracy=0.997, f1=0.992]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=55, LR=4.38e-6, Valid_Loss=1.68e-5, accuracy=1, f1=1]


Valid Loss : 1.677868719925156e-05
Validation Loss improved( 2.3525116055843903e-05 ---> 1.677868719925156e-05  )
Model Saved


100%|██████████| 577/577 [03:35<00:00,  2.67it/s, Epoch=56, LR=1.78e-6, Train_Loss=0.00762, accuracy=0.998, f1=0.997]
100%|██████████| 73/73 [00:18<00:00,  4.04it/s, Epoch=56, LR=1.78e-6, Valid_Loss=5.93e-5, accuracy=1, f1=1]


Valid Loss : 5.9253846042373085e-05


100%|██████████| 577/577 [03:33<00:00,  2.71it/s, Epoch=57, LR=1.04e-6, Train_Loss=0.0089, accuracy=0.998, f1=0.997]
100%|██████████| 73/73 [00:17<00:00,  4.09it/s, Epoch=57, LR=1.04e-6, Valid_Loss=0.00793, accuracy=0.999, f1=0.997]


Valid Loss : 0.007934271850344357


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=58, LR=2.52e-6, Train_Loss=0.0123, accuracy=0.996, f1=0.992]
100%|██████████| 73/73 [00:17<00:00,  4.08it/s, Epoch=58, LR=2.52e-6, Valid_Loss=0.0118, accuracy=0.996, f1=0.987]


Valid Loss : 0.011762693562600452


100%|██████████| 577/577 [03:33<00:00,  2.70it/s, Epoch=59, LR=5.5e-6, Train_Loss=0.00814, accuracy=0.998, f1=0.996]
100%|██████████| 73/73 [00:17<00:00,  4.06it/s, Epoch=59, LR=5.5e-6, Valid_Loss=0.0176, accuracy=0.985, f1=0.981]


Valid Loss : 0.01760747180647987
Training complete in 3h 15m 20s
Best Loss: 0.0000
