# Library

In [1]:
! pip install wandb opencv-python-headless==4.1.2.30 albumentations torch-summary timm==0.5.4 einops joblib icecream  -qq -U

In [2]:
from sklearn.metrics import f1_score
from glob import glob
import pathlib
from pathlib import Path
from torchsummary import summary
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from pprint import pprint
import urllib.request
import csv
import numpy as np
from einops import rearrange, reduce, repeat
from torch.cuda import amp
from tqdm import tqdm
import wandb
import time
import copy
from collections import defaultdict
from sklearn.metrics import mean_squared_error
import joblib
import gc
import os
from icecream import ic
from sklearn.model_selection import train_test_split
import gc
import cv2
import copy
import time
import random
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

import timm

import json

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


from sklearn.metrics import f1_score

# ENV

In [3]:

ENV = 'COLAB'
# ENV = 'KAGGLE'
# ENV = 'SYSTEM'

# Option for Mixed Precision
# FP16 = True
FP16 = False


CONFIG = dict(
    seed=42,
    nickname='tf_efficientnetv2_m finecutmix',
    backbone='tf_efficientnetv2_m',
    embedder=None,
    train_batch_size=8,
    valid_batch_size=16,
    img_size=384,
    num_epochs=50,
    early_stopping=False,
    early_stopping_step=5,
    learning_rate=1e-4,
    scheduler='CosineAnnealingLR',
    min_lr=1e-6,
    T_max=100,
    num_classes=25,
    weight_decay=1e-6,
    device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    competition='lg',
    _wandb_kernel='deb'
)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# SET SEED 

In [4]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)


set_seed(CONFIG['seed'])


# Read the Data


In [5]:
import wandb
run = wandb.init(project="lg",
                 entity="jiwon7258",
                 config=CONFIG,
                 job_type='train',
                 id='lwda8bn3',
                 resume='must',
                 )
dataset = wandb.run.use_artifact(
    'jiwon7258/lg/lg_train:v0', type='dataset')

run.name = CONFIG['nickname']

# Download the artifact's contents
dataset_dir = dataset.download()
dataset_dir = Path(dataset_dir)


[34m[1mwandb[0m: Currently logged in as: [33mjiwon7258[0m (use `wandb login --relogin` to force relogin)


[34m[1mwandb[0m: Downloading large artifact lg_train:v0, 918.24MB. 17301 files... Done. 0:0:0


In [6]:
TRAIN_PATH = dataset_dir
# TEST_PATH = dataset_dir / 'test'

# Augmentations

In [7]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1,
                           rotate_limit=90, p=0.5),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15,
                   b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Normalize(),
        ToTensorV2()], p=1.),

    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(),
        ToTensorV2()], p=1.)
}


# Dataset

In [8]:
train_csv = sorted(glob(str(TRAIN_PATH / '*/*.csv')))
train_jpg = sorted(glob(str(TRAIN_PATH / '*/*.jpg')))
train_json = sorted(glob(str(TRAIN_PATH / '*/*.json')))


crops = []
diseases = []
risks = []
labels = []

for i in range(len(train_json)):
    with open(train_json[i], 'r') as f:
        sample = json.load(f)
        crop = sample['annotations']['crop']
        disease = sample['annotations']['disease']
        risk = sample['annotations']['risk']
        label=f"{crop}_{disease}_{risk}"
    
        crops.append(crop)
        diseases.append(disease)
        risks.append(risk)
        labels.append(label)
        
label_unique = sorted(np.unique(labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in labels] # len = train_len

In [9]:
train_jpg = np.array(train_jpg)
train_labels = np.array(train_labels)

In [10]:
class CustomDataset(Dataset):
    def __init__(self, train_img, train_label, transforms=None):
        self.imgs = train_img
        self.labels = train_label
        self.transforms = transforms
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, index):
        img_path = self.imgs[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        target = self.labels[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return img, target
    
# trainDataset = CustomDataset(X_train, y_train, transforms = data_transforms['train'])
# trainDataloader = DataLoader(
#     trainDataset, batch_size=CONFIG['train_batch_size'], shuffle=True)

# validDataset = CustomDataset(X_val, y_val, transforms = data_transforms['valid'])
# validDataloader = DataLoader(validDataset, batch_size = CONFIG['valid_batch_size'], shuffle = True)

In [11]:
from sklearn.model_selection import StratifiedKFold
train_datasets = []
valid_datasets = []
train_dataloaders = []
valid_dataloaders = []

skf = StratifiedKFold(n_splits = 5)

for step, (train_index, val_index) in enumerate(skf.split(X = train_jpg, y= train_labels)):
    X_train = train_jpg[train_index]
    y_train = train_labels[train_index]
    X_val = train_jpg[val_index]
    y_val = train_labels[val_index]
    train_datasets.append(CustomDataset(
        X_train, y_train, transforms=data_transforms['train']))
    valid_datasets.append(CustomDataset(
        X_val, y_val, transforms=data_transforms['valid']))
    train_dataloaders.append(DataLoader(
        train_datasets[step], batch_size=CONFIG['train_batch_size'], shuffle=True)
    )
    valid_dataloaders.append(
        DataLoader(
            valid_datasets[step], batch_size=CONFIG['valid_batch_size'], shuffle=True)
    )


# Model

In [12]:
class Model(nn.Module):
    def __init__(self, backbone, embedder, pretrained=True):
        super(Model, self).__init__()
        self.backbone = timm.create_model(backbone, pretrained=pretrained)
        self.backbone.reset_classifier(0)
        self.n_features = self.backbone.num_features
        self.fc = nn.Linear(self.n_features, CONFIG['num_classes'])

    def forward(self, images):
        # features = (bs, embedding_size)
        features = self.backbone(images)
        # outputs  = (bs, num_classes)
        # if isinstance(features, tuple):
        #   features = features[0]
        output = self.fc(features)
        return output


model = Model(CONFIG['backbone'], CONFIG['embedder'], pretrained=True)
model.to(CONFIG['device']);


In [13]:
optimizer = torch.optim.Adam(
    params=model.parameters(), lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])


In [14]:
def criterion(logits: torch.tensor, targets: torch.tensor):
    return nn.CrossEntropyLoss()(logits.view(-1,CONFIG['num_classes']), targets.view(-1))

# CutMix

In [15]:
def cutmix(img, target):
    """ 
    img : (bs, C, H, W)
    target
        - (bs,)
        - integer scalar
    """
    batch_size, C, H, W, = img.shape
    # ic(img.shape)

    img_a = img
    target_a = target
    img_b = img
    target_b = target

    mask = np.arange(batch_size)
    mask = np.random.permutation(mask)
    # ic(mask)
    img_b = img_a[mask]
    target_b = target_a[mask]
    # ic(target, target_b)

    lam = np.random.uniform(low=0.3, high=0.7)
    r_x = np.random.uniform(low=0, high=W)
    r_y = np.random.uniform(low=0, high=H)
    r_w = W * np.sqrt(1 - lam)
    r_h = H * np.sqrt(1 - lam)
    ic(lam, r_x, r_y, r_w, r_h)
    x1 = np.int(np.clip((r_x - r_w) / 2, 0, W))
    x2 = np.int(np.clip((r_x + r_w) / 2, 0, W))
    y1 = np.int(np.clip((r_y - r_h) / 2, 0, H))
    y2 = np.int(np.clip((r_y + r_h) / 2, 0, H))
    ic(x1, x2, y1, y2)

    img_a[:, :, y1:y2, x1:x2] = img_b[:, :, y1:y2, x1:x2]

    # Adjust lambda to exact ratio

    lam = 1 - (x2 - x1) * (y2 - y1) / float(W * H)

    return img_a, target_b, lam


   # Training Function

In [16]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    # train 모드로 변경
    model.train()

    # for the Mixed Precision
    # Pytorch 예제 : https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples
    if(FP16):
        scaler = amp.GradScaler()

    losses = AverageMeter()
    accuracy = AverageMeter()
    f1 = AverageMeter()

    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (img, target) in bar:
        img, target_b, lam = cutmix(img, target)

        img = img.to(device)
        target = target.to(device)
        target_b = target_b.to(device)

        batch_size = img.shape[0]

        if(FP16):
            with amp.autocast(enabled=True):
                logits = model(img)
                loss = criterion(logits, target) * lam + \
                    criterion(logits, target_b) * (1-lam)

                # loss를 Scale
                # Scaled Grdients를 계산(call)하기 위해 scaled loss를 backward()
                scaler.scale(loss).backward()
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(optimizer)

                # Updates the scale for next iteration.
                scaler.update()

        else:
            logits = model(img)
            loss = criterion(logits, target) * lam + \
                criterion(logits, target_b) * (1-lam)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()

        # zero the parameter gradients
        optimizer.zero_grad()

        # change learning rate by Scheduler
        if scheduler is not None:
            scheduler.step()

        # loss.item()은 loss를 Python Float으로 반환
        losses.update(loss.item())

        # logits
        logits = logits.detach().cpu()

        # acc, f1
        probs = torch.softmax(logits, dim = -1)
        output = np.argmax(probs, axis=-1)
        output_b = np.argsort(probs)[:,-2]
        if (lam >= 0.5):
            step_acc = np.mean(
                output.view(-1).numpy() == target.view(-1).detach().cpu().numpy())
            step_f1 = f1_score(output.view(-1).numpy(),
                               target.view(-1).detach().cpu().numpy(), average='macro')
            step_acc_b = np.mean(
                output_b.view(-1).numpy() == target_b.view(-1).detach().cpu().numpy())
            step_f1_b = f1_score(output_b.view(-1).numpy(),
                                 target_b.view(-1).detach().cpu().numpy(), average='macro')
        else:
            step_acc = np.mean(
                output.view(-1).numpy() == target_b.view(-1).detach().cpu().numpy())
            step_f1 = f1_score(output.view(-1).numpy(),
                               target_b.view(-1).detach().cpu().numpy(), average='macro')
            step_acc_b = np.mean(
                output_b.view(-1).numpy() == target.view(-1).detach().cpu().numpy())
            step_f1_b = f1_score(output_b.view(-1).numpy(),
                                 target.view(-1).detach().cpu().numpy(), average='macro')

                                 
        step_acc = step_acc * lam + step_acc_b * (1-lam)
        step_f1 = step_f1 * lam + step_f1_b * (1-lam)

        accuracy.update(step_acc)
        f1.update(step_f1)

        # loss
        train_loss = losses.avg
        train_acc = accuracy.avg
        train_f1 = f1.avg

        bar.set_postfix(
            Epoch=epoch, Train_Loss=train_loss, LR=optimizer.param_groups[
                0]["lr"], accuracy=train_acc, f1=train_f1
        )

    # Garbage Collector
    gc.collect()

    return losses.avg, accuracy.avg, f1.avg


   # Validation Function

In [17]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()

    losses = AverageMeter()
    accuracy = AverageMeter()
    f1 = AverageMeter()

    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (img, target) in bar:
        img = img.to(device)
        target = target.to(device)

        batch_size = img.shape[0]

        logits = model(img)
        loss = criterion(logits, target)

        # loss.item()은 loss를 Python Float으로 반환
        losses.update(loss.item())

        # logits
        logits = logits.detach().cpu()

        output = np.argmax(torch.softmax(logits, dim=-1), axis=-1)
        step_acc = np.mean(
            output.view(-1).numpy() == target.view(-1).detach().cpu().numpy())
        step_f1 = f1_score(output.view(-1).numpy(),
                           target.view(-1).detach().cpu().numpy(), average='macro')

        accuracy.update(step_acc)
        f1.update(step_f1)

        # loss
        val_loss = losses.avg
        val_acc = accuracy.avg
        val_f1 = f1.avg

        bar.set_postfix(
            Epoch=epoch, Valid_Loss=val_loss, LR=optimizer.param_groups[
                0]["lr"], accuracy=val_acc, f1=val_f1
        )

    gc.collect()

    return losses.avg, accuracy.avg, f1.avg


In [18]:

def run_training(
    model,
    optimizer,
    scheduler,
    device,
    num_epochs,
    metric_prefix="",
    file_prefix="",
    early_stopping=True,
    early_stopping_step=10,
    START_EPOCH = 0,
):
    # To automatically log graidents
    wandb.watch(model, log_freq=100)

    if torch.cuda.is_available():
        print("[INFO] Using GPU:{}\n".format(torch.cuda.get_device_name()))

    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf
    history = defaultdict(list)
    early_stop_counter = 0

    # num_epochs만큼, train과 val을 실행한다
    for epoch in range(START_EPOCH, START_EPOCH + num_epochs):
        gc.collect()

        fold_num = 5
        fold = epoch % fold_num

        # for fold in range(fold_num) :

        trainDataloader = train_dataloaders[fold]
        validDataloader = valid_dataloaders[fold]

        train_train_loss, train_accuracy, train_f1 = train_one_epoch(
            model,
            optimizer,
            scheduler,
            dataloader=trainDataloader,
            device=device,
            epoch=epoch,
        )

        val_loss, val_accuracy, val_f1 = valid_one_epoch(
            model, validDataloader, device=device, epoch=epoch
        )


        history[f"{metric_prefix}Train Loss"].append(train_train_loss)
        history[f"{metric_prefix}Train Accuracy"].append(train_accuracy)
        history[f"{metric_prefix}Train F1"].append(train_f1)
        history[f"{metric_prefix}Valid Loss"].append(val_loss)
        history[f"{metric_prefix}Valid Accuracy"].append(val_accuracy)
        history[f"{metric_prefix}Valid F1"].append(val_f1)

        # Log the metrics
        wandb.log(
            {
                f"{metric_prefix}Train Loss": train_train_loss,
                f"{metric_prefix}Valid Loss": val_loss,
                f"{metric_prefix}Train Accuracy": train_accuracy,
                f"{metric_prefix}Valid Accuracy": val_accuracy,
                f"{metric_prefix}Train F1": train_f1,
                f"{metric_prefix}Valid F1": val_f1,
            }
        )

        print(f"Valid Loss : {val_loss}")

        torch.save(model.state_dict(), f'{CONFIG["nickname"]}last.bin')
        wandb.save(f'{CONFIG["nickname"]}last.bin')

        # deep copy the model
        if val_loss <= best_loss:
            early_stop_counter = 0

            print(
                f"Validation Loss improved( {best_loss} ---> {val_loss}  )"
            )

            # Update Best Loss
            best_loss = val_loss

            # Update Best Model Weight
            # run.summary['Best RMSE'] = best_loss
            best_model_wts = copy.deepcopy(model.state_dict())

            PATH = "{}epoch{:.0f}_Loss{:.4f}.bin".format(
                file_prefix, epoch, best_loss)
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            wandb.save(PATH)

            print(f"Model Saved")

        elif early_stopping:
            early_stop_counter += 1
            if early_stop_counter > early_stopping_step:
                break

        START_EPOCH = epoch + 1
        # break

    end = time.time()
    time_elapsed = end - start
    print(
        "Training complete in {:.0f}h {:.0f}m {:.0f}s".format(
            time_elapsed // 3600,
            (time_elapsed % 3600) // 60,
            (time_elapsed % 3600) % 60,
        )
    )
    print("Best Loss: {:.4f}".format(best_loss))

    return model, history


In [19]:
MODEL_NAME = 'last.bin'
RUN_PATH = 'jiwon7258/lg/koxrlhhf'
wandb.restore(MODEL_NAME, RUN_PATH, root='./')
model.load_state_dict(torch.load(
    MODEL_NAME, map_location=CONFIG['device']))


<All keys matched successfully>

# Fast Start : 5 epoch

In [20]:
for param in model.backbone.named_parameters():
    param[1].requires_grad = False

In [21]:
ic.disable()
run_training(
    model=model,
    optimizer=optimizer,
    scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer, T_max=CONFIG['T_max'], eta_min=CONFIG['min_lr']),
    device=device,
    num_epochs=5,
    metric_prefix="",
    file_prefix="",
    early_stopping=CONFIG['early_stopping'],
    early_stopping_step=CONFIG['early_stopping_step'],
    START_EPOCH=0,
);


[INFO] Using GPU:Tesla P100-PCIE-16GB



100%|██████████| 577/577 [02:28<00:00,  3.89it/s, Epoch=0, LR=8.76e-5, Train_Loss=1.97, accuracy=0.585, f1=0.529]
100%|██████████| 73/73 [00:24<00:00,  2.92it/s, Epoch=0, LR=8.76e-5, Valid_Loss=0.00463, accuracy=0.999, f1=0.998]


Valid Loss : 0.0046257363717639435
Validation Loss improved( inf ---> 0.0046257363717639435  )
Model Saved


100%|██████████| 577/577 [02:22<00:00,  4.04it/s, Epoch=1, LR=5.67e-5, Train_Loss=1.55, accuracy=0.601, f1=0.544]
100%|██████████| 73/73 [00:24<00:00,  2.94it/s, Epoch=1, LR=5.67e-5, Valid_Loss=0.0216, accuracy=0.997, f1=0.993]


Valid Loss : 0.02160020579571185


100%|██████████| 577/577 [02:21<00:00,  4.08it/s, Epoch=2, LR=2.27e-5, Train_Loss=1.33, accuracy=0.601, f1=0.546]
100%|██████████| 73/73 [00:24<00:00,  3.00it/s, Epoch=2, LR=2.27e-5, Valid_Loss=0.0897, accuracy=0.997, f1=0.99]


Valid Loss : 0.0896594715368462


100%|██████████| 577/577 [02:21<00:00,  4.07it/s, Epoch=3, LR=2.56e-6, Train_Loss=1.26, accuracy=0.609, f1=0.552]
100%|██████████| 73/73 [00:24<00:00,  3.00it/s, Epoch=3, LR=2.56e-6, Valid_Loss=0.0839, accuracy=1, f1=1]


Valid Loss : 0.08392594632220594


100%|██████████| 577/577 [02:21<00:00,  4.07it/s, Epoch=4, LR=6.4e-6, Train_Loss=1.29, accuracy=0.583, f1=0.523]
100%|██████████| 73/73 [00:24<00:00,  3.01it/s, Epoch=4, LR=6.4e-6, Valid_Loss=0.0811, accuracy=0.999, f1=0.999]


Valid Loss : 0.08105312093888244
Training complete in 0h 14m 5s
Best Loss: 0.0046


# 400 epochs

In [24]:
optimizer = torch.optim.Adam(
    params=model.parameters(), lr=1e-6, weight_decay=CONFIG['weight_decay'])

In [25]:
for param in model.backbone.named_parameters():
    param[1].requires_grad = True


In [None]:
ic.disable()
run_training(
    model=model,
    optimizer=optimizer,
    # scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(
    #     optimizer=optimizer, T_max=CONFIG['T_max'], eta_min=CONFIG['min_lr']),
    scheduler=None,
    device=device,
    num_epochs=400,
    metric_prefix="",
    file_prefix="",
    early_stopping=CONFIG['early_stopping'],
    early_stopping_step=CONFIG['early_stopping_step'],
    START_EPOCH=17,
);


[INFO] Using GPU:Tesla P100-PCIE-16GB



100%|██████████| 577/577 [07:00<00:00,  1.37it/s, Epoch=17, LR=1e-6, Train_Loss=1.24, accuracy=0.614, f1=0.56]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=17, LR=1e-6, Valid_Loss=0.0914, accuracy=0.997, f1=0.993]


Valid Loss : 0.0913543612887598
Validation Loss improved( inf ---> 0.0913543612887598  )
Model Saved


100%|██████████| 577/577 [07:00<00:00,  1.37it/s, Epoch=18, LR=1e-6, Train_Loss=1.2, accuracy=0.617, f1=0.562]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=18, LR=1e-6, Valid_Loss=0.0842, accuracy=1, f1=1]


Valid Loss : 0.08419596450802015
Validation Loss improved( 0.0913543612887598 ---> 0.08419596450802015  )
Model Saved


100%|██████████| 577/577 [07:01<00:00,  1.37it/s, Epoch=19, LR=1e-6, Train_Loss=1.2, accuracy=0.612, f1=0.552]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=19, LR=1e-6, Valid_Loss=0.0748, accuracy=1, f1=1]


Valid Loss : 0.0747992258059652
Validation Loss improved( 0.08419596450802015 ---> 0.0747992258059652  )
Model Saved


100%|██████████| 577/577 [07:01<00:00,  1.37it/s, Epoch=20, LR=1e-6, Train_Loss=1.17, accuracy=0.62, f1=0.564]
100%|██████████| 73/73 [00:24<00:00,  3.00it/s, Epoch=20, LR=1e-6, Valid_Loss=0.0821, accuracy=0.999, f1=0.997]


Valid Loss : 0.08207957038324173


100%|██████████| 577/577 [07:01<00:00,  1.37it/s, Epoch=21, LR=1e-6, Train_Loss=1.15, accuracy=0.633, f1=0.577]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=21, LR=1e-6, Valid_Loss=0.116, accuracy=0.997, f1=0.99]


Valid Loss : 0.11624851475839745


100%|██████████| 577/577 [07:00<00:00,  1.37it/s, Epoch=22, LR=1e-6, Train_Loss=1.16, accuracy=0.635, f1=0.574]
100%|██████████| 73/73 [00:24<00:00,  3.00it/s, Epoch=22, LR=1e-6, Valid_Loss=0.112, accuracy=0.997, f1=0.992]


Valid Loss : 0.1117869397140529


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=23, LR=1e-6, Train_Loss=1.14, accuracy=0.639, f1=0.579]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=23, LR=1e-6, Valid_Loss=0.0834, accuracy=1, f1=1]


Valid Loss : 0.08344332332888695


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=24, LR=1e-6, Train_Loss=1.11, accuracy=0.643, f1=0.584]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=24, LR=1e-6, Valid_Loss=0.119, accuracy=0.999, f1=0.999]


Valid Loss : 0.11867698156976536


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=25, LR=1e-6, Train_Loss=1.11, accuracy=0.65, f1=0.592]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=25, LR=1e-6, Valid_Loss=0.111, accuracy=0.999, f1=0.999]


Valid Loss : 0.1107062189138099


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=26, LR=1e-6, Train_Loss=1.09, accuracy=0.664, f1=0.606]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=26, LR=1e-6, Valid_Loss=0.107, accuracy=0.997, f1=0.994]


Valid Loss : 0.10667512652604547


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=27, LR=1e-6, Train_Loss=1.11, accuracy=0.647, f1=0.59]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=27, LR=1e-6, Valid_Loss=0.109, accuracy=0.997, f1=0.989]


Valid Loss : 0.1086614359527418


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=28, LR=1e-6, Train_Loss=1.1, accuracy=0.643, f1=0.584]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=28, LR=1e-6, Valid_Loss=0.11, accuracy=1, f1=1]


Valid Loss : 0.10978255824071087


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=29, LR=1e-6, Train_Loss=1.08, accuracy=0.651, f1=0.593]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=29, LR=1e-6, Valid_Loss=0.11, accuracy=0.998, f1=0.995]


Valid Loss : 0.11034604641672684


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=30, LR=1e-6, Train_Loss=1.09, accuracy=0.643, f1=0.583]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=30, LR=1e-6, Valid_Loss=0.108, accuracy=1, f1=1]


Valid Loss : 0.10751546327381918


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=31, LR=1e-6, Train_Loss=1.08, accuracy=0.663, f1=0.607]
100%|██████████| 73/73 [00:24<00:00,  2.94it/s, Epoch=31, LR=1e-6, Valid_Loss=0.123, accuracy=0.995, f1=0.987]


Valid Loss : 0.12283941470596889


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=32, LR=1e-6, Train_Loss=1.07, accuracy=0.666, f1=0.608]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=32, LR=1e-6, Valid_Loss=0.103, accuracy=0.997, f1=0.992]


Valid Loss : 0.10283501109440032


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=33, LR=1e-6, Train_Loss=1.06, accuracy=0.66, f1=0.6]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=33, LR=1e-6, Valid_Loss=0.118, accuracy=1, f1=1]


Valid Loss : 0.11758421106289511


100%|██████████| 577/577 [07:06<00:00,  1.35it/s, Epoch=34, LR=1e-6, Train_Loss=1.05, accuracy=0.657, f1=0.597]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=34, LR=1e-6, Valid_Loss=0.111, accuracy=0.998, f1=0.995]


Valid Loss : 0.11148094651225494


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=35, LR=1e-6, Train_Loss=1.03, accuracy=0.664, f1=0.603]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=35, LR=1e-6, Valid_Loss=0.105, accuracy=0.999, f1=0.997]


Valid Loss : 0.1050451397895813


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=36, LR=1e-6, Train_Loss=1.02, accuracy=0.668, f1=0.608]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=36, LR=1e-6, Valid_Loss=0.115, accuracy=0.995, f1=0.986]


Valid Loss : 0.11500429173242556


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=37, LR=1e-6, Train_Loss=1.04, accuracy=0.668, f1=0.609]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=37, LR=1e-6, Valid_Loss=0.121, accuracy=0.996, f1=0.991]


Valid Loss : 0.12103959310748806


100%|██████████| 577/577 [07:05<00:00,  1.36it/s, Epoch=38, LR=1e-6, Train_Loss=1.02, accuracy=0.673, f1=0.613]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=38, LR=1e-6, Valid_Loss=0.0767, accuracy=1, f1=1]


Valid Loss : 0.07668992320764555


100%|██████████| 577/577 [07:05<00:00,  1.36it/s, Epoch=39, LR=1e-6, Train_Loss=1.01, accuracy=0.669, f1=0.606]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=39, LR=1e-6, Valid_Loss=0.0958, accuracy=0.997, f1=0.992]


Valid Loss : 0.09577062169778837


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=40, LR=1e-6, Train_Loss=1.01, accuracy=0.676, f1=0.616]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=40, LR=1e-6, Valid_Loss=0.0855, accuracy=1, f1=1]


Valid Loss : 0.08552178219981389


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=41, LR=1e-6, Train_Loss=1.01, accuracy=0.668, f1=0.607]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=41, LR=1e-6, Valid_Loss=0.0859, accuracy=0.995, f1=0.984]


Valid Loss : 0.0858684115826267


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=42, LR=1e-6, Train_Loss=1, accuracy=0.683, f1=0.623]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=42, LR=1e-6, Valid_Loss=0.12, accuracy=0.996, f1=0.989]


Valid Loss : 0.12007827689386394


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=43, LR=1e-6, Train_Loss=0.984, accuracy=0.681, f1=0.621]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=43, LR=1e-6, Valid_Loss=0.0838, accuracy=1, f1=1]


Valid Loss : 0.08384198491295723


100%|██████████| 577/577 [07:05<00:00,  1.36it/s, Epoch=44, LR=1e-6, Train_Loss=0.996, accuracy=0.687, f1=0.627]
100%|██████████| 73/73 [00:24<00:00,  2.94it/s, Epoch=44, LR=1e-6, Valid_Loss=0.0701, accuracy=0.998, f1=0.995]


Valid Loss : 0.0700818670097075
Validation Loss improved( 0.0747992258059652 ---> 0.0700818670097075  )
Model Saved


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=45, LR=1e-6, Train_Loss=0.978, accuracy=0.673, f1=0.612]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=45, LR=1e-6, Valid_Loss=0.0781, accuracy=0.999, f1=0.997]


Valid Loss : 0.07809016680064267


100%|██████████| 577/577 [07:05<00:00,  1.36it/s, Epoch=46, LR=1e-6, Train_Loss=0.971, accuracy=0.694, f1=0.635]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=46, LR=1e-6, Valid_Loss=0.0889, accuracy=0.996, f1=0.988]


Valid Loss : 0.08887619683391428


100%|██████████| 577/577 [07:05<00:00,  1.36it/s, Epoch=47, LR=1e-6, Train_Loss=0.973, accuracy=0.699, f1=0.64]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=47, LR=1e-6, Valid_Loss=0.128, accuracy=0.997, f1=0.991]


Valid Loss : 0.1281316402329974


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=48, LR=1e-6, Train_Loss=0.962, accuracy=0.688, f1=0.627]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=48, LR=1e-6, Valid_Loss=0.0788, accuracy=1, f1=1]


Valid Loss : 0.07879961014100134


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=49, LR=1e-6, Train_Loss=0.952, accuracy=0.696, f1=0.637]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=49, LR=1e-6, Valid_Loss=0.0905, accuracy=0.997, f1=0.993]


Valid Loss : 0.09045899424650898


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=50, LR=1e-6, Train_Loss=0.956, accuracy=0.691, f1=0.629]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=50, LR=1e-6, Valid_Loss=0.08, accuracy=0.999, f1=0.997]


Valid Loss : 0.07995206806553554


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=51, LR=1e-6, Train_Loss=0.961, accuracy=0.687, f1=0.623]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=51, LR=1e-6, Valid_Loss=0.0664, accuracy=0.997, f1=0.992]


Valid Loss : 0.06643219873921512
Validation Loss improved( 0.0700818670097075 ---> 0.06643219873921512  )
Model Saved


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=52, LR=1e-6, Train_Loss=0.948, accuracy=0.694, f1=0.632]
100%|██████████| 73/73 [00:24<00:00,  2.97it/s, Epoch=52, LR=1e-6, Valid_Loss=0.103, accuracy=0.995, f1=0.988]


Valid Loss : 0.10285864336645767


100%|██████████| 577/577 [07:03<00:00,  1.36it/s, Epoch=53, LR=1e-6, Train_Loss=0.94, accuracy=0.708, f1=0.647]
100%|██████████| 73/73 [00:24<00:00,  2.99it/s, Epoch=53, LR=1e-6, Valid_Loss=0.0614, accuracy=1, f1=1]


Valid Loss : 0.06137495329731131
Validation Loss improved( 0.06643219873921512 ---> 0.06137495329731131  )
Model Saved


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=54, LR=1e-6, Train_Loss=0.946, accuracy=0.69, f1=0.626]
100%|██████████| 73/73 [00:24<00:00,  2.96it/s, Epoch=54, LR=1e-6, Valid_Loss=0.0799, accuracy=0.998, f1=0.996]


Valid Loss : 0.07989563445930611


100%|██████████| 577/577 [07:04<00:00,  1.36it/s, Epoch=55, LR=1e-6, Train_Loss=0.916, accuracy=0.705, f1=0.644]
100%|██████████| 73/73 [00:24<00:00,  2.98it/s, Epoch=55, LR=1e-6, Valid_Loss=0.0575, accuracy=0.999, f1=0.997]


Valid Loss : 0.05752313489170924
Validation Loss improved( 0.06137495329731131 ---> 0.05752313489170924  )
Model Saved


 58%|█████▊    | 336/577 [04:08<02:57,  1.36it/s, Epoch=56, LR=1e-6, Train_Loss=0.93, accuracy=0.7, f1=0.639]