# Trial of Pseudo-labeling by Cassava Model [Train with pseudo-label]

## Purpose: Imrove the generality of the model of by using the pseudo-labeled dataset

In this notebook, I tried to train with pseudo labeled data of [New Plants Disease Dataset](https://www.kaggle.com/vipoooool/new-plant-diseases-dataset) by an efficientnet.  

By using the pseudo labeled dataset, I can increase the dataset and maybe can make the model's generalization.  
The motivation is based on the [Noisy Student](https://arxiv.org/abs/1911.04252)


This note book referenced https://www.kaggle.com/yasufuminakama/cassava-resnext50-32x4d-starter-training.

In [None]:
# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
# !python pytorch-xla-env-setup.py

In [None]:
pip install timm

In [None]:
pip install torch_optimizer

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import glob
import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

For the training, I used pre-trained model and Radam optimizer, I installed these libraries

In [None]:
# import torch_xla
# import torch_xla.core.xla_model as xm

# Definition of hyper parameters

In [None]:
CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 512,
    'epochs': 10,
    'train_bs': 10,
    'valid_bs': 32,
    'T_0': 10,
    'lr': 5e-3,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 4,
    'accum_iter': 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda',
    'target_size': 5,
    "gradient_accumulation_steps": 1, 
    "max_grad_norm": 5,
    "print_freq": 10,
    "label_smoothing": 0.0,
    "t1": 1.0,
    "t2": 1.0,
    "loss": "bce", # bi_tempered_loss, logloss
    "optimizer": "AdamW", # Radam AdamW
    "scheduler": "OneCycleLR",
    "model_average": 3,
    "pre-train": True,
    "use_2019": True,
}
TRAIN_FOLDS = [0]

# gpu run
device = "cuda"
# device = xm.xla_device()

# Load dataframes

In [None]:
data_dirs = glob.glob("/kaggle/input/new-plant-diseases-dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/*/*/*.JPG")
plants_df = pd.read_csv("../input/cassava-pseudo-labeling-of-plants-disease/plants_df.csv")

# remove images under confidence level of 0.8
th = 0.7
plants_df = plants_df[(plants_df["0"] > th) | (plants_df["1"] > th) | (plants_df["2"] > th) | (plants_df["3"] > th) | (plants_df["4"] > th)].reset_index(drop=True)

In [None]:
plants_df.describe()

In [None]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
test = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
label_map = pd.read_json('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json', 
                         orient='index')

In [None]:
DATA_PATH_2019 = '../input/cassava-leaf-disease-merged/'
TRAIN_DIR_2019 = DATA_PATH_2019 + 'train/'

In [None]:
train_merged = pd.read_csv("../input/cassava-leaf-disease-merged/merged.csv")

In [None]:
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

TRAIN_PATH = '../input/cassava-leaf-disease-classification/train_images'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

In [None]:
import os
import math
import time
import random
import shutil
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import matplotlib.pyplot as plt

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau, OneCycleLR
from skimage import io, transform

import torch_optimizer as optim
import timm
from torchvision.transforms import transforms

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

import warnings 
warnings.filterwarnings('ignore')

# Dataloader

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df["image_id"].values
        self.labels = df[['0', '1', '2', '3', '4']].values
        self.labels.astype("float32")
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = file_name
        image = io.imread(file_path)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]
        label = torch.tensor(self.labels[idx])
        return image.float(), label.float()

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df["image_id"].values
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}'
        image = io.imread(file_path)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]
        return image

In [None]:
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize, RGBShift
)

In [None]:
def get_transform():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(),
        ], p=1.)
    

def val_transform():
    return Compose([
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(),
        ], p=1.)

In [None]:
class EfficientNet(nn.Module):
    def __init__(self, model_name="tf_efficientnet_b0_ns"):
        super(EfficientNet, self).__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, CFG["target_size"])
    
    def forward(self, x):
        return self.model(x)

# Bi-tempered-logistic-loss
It makes model robust for the outliers and noisy datasets.  
Reference: https://github.com/mlpanda/bi-tempered-loss-pytorch

In [None]:
## Bi-tempered-logistic-loss 

def log_t(u, t):
    """Compute log_t for `u`."""

    if t == 1.0:
        return torch.log(u)
    else:
        return (u ** (1.0 - t) - 1.0) / (1.0 - t)


def exp_t(u, t):
    """Compute exp_t for `u`."""

    if t == 1.0:
        return torch.exp(u)
    else:
        return torch.relu(1.0 + (1.0 - t) * u) ** (1.0 / (1.0 - t))


def compute_normalization_fixed_point(activations, t, num_iters=5):
    """Returns the normalization value for each example (t > 1.0).
    Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    t: Temperature 2 (> 1.0 for tail heaviness).
    num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """

    mu = torch.max(activations, dim=-1).values.view(-1, 1)
    normalized_activations_step_0 = activations - mu

    normalized_activations = normalized_activations_step_0
    i = 0
    while i < num_iters:
        i += 1
        logt_partition = torch.sum(exp_t(normalized_activations, t), dim=-1).view(-1, 1)
        normalized_activations = normalized_activations_step_0 * (logt_partition ** (1.0 - t))

    logt_partition = torch.sum(exp_t(normalized_activations, t), dim=-1).view(-1, 1)

    return -log_t(1.0 / logt_partition, t) + mu


def compute_normalization(activations, t, num_iters=5):
    """Returns the normalization value for each example.
    Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    t: Temperature 2 (< 1.0 for finite support, > 1.0 for tail heaviness).
    num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """

    if t < 1.0:
        return None # not implemented as these values do not occur in the authors experiments...
    else:
        return compute_normalization_fixed_point(activations, t, num_iters)


def tempered_softmax(activations, t, num_iters=5):
    """Tempered softmax function.
    Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    t: Temperature tensor > 0.0.
    num_iters: Number of iterations to run the method.
    Returns:
    A probabilities tensor.
    """

    if t == 1.0:
        normalization_constants = torch.log(torch.sum(torch.exp(activations), dim=-1))
    else:
        normalization_constants = compute_normalization(activations, t, num_iters)

    return exp_t(activations - normalization_constants, t)


def bi_tempered_logistic_loss(activations, labels, t1, t2, label_smoothing=0.0, num_iters=5):

    """Bi-Tempered Logistic Loss with custom gradient.
    Args:
    activations: A multi-dimensional tensor with last dimension `num_classes`.
    labels: A tensor with shape and dtype as activations.
    t1: Temperature 1 (< 1.0 for boundedness).
    t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
    label_smoothing: Label smoothing parameter between [0, 1).
    num_iters: Number of iterations to run the method.
    Returns:
    A loss tensor.
    """

    if label_smoothing > 0.0:
        num_classes = labels.shape[-1]
        labels = (1 - num_classes / (num_classes - 1) * label_smoothing) * labels + label_smoothing / (num_classes - 1)

    probabilities = tempered_softmax(activations, t2, num_iters)

    temp1 = (log_t(labels + 1e-10, t1) - log_t(probabilities, t1)) * labels
    temp2 = (1 / (2 - t1)) * (torch.pow(labels, 2 - t1) - torch.pow(probabilities, 2 - t1))
    loss_values = temp1 - temp2

    return torch.sum(loss_values, dim=-1)

Definition of utilities

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=42)

In [None]:
def load_model(path, model):
    model.load_state_dict(torch.load(path)["model"])
    return model    

def average_snapshots(list_of_snapshots_paths, original_model):

    snapshots_weights = {}

    for snapshot_path in list_of_snapshots_paths:
        model = load_model(snapshot_path, original_model)
        snapshots_weights[snapshot_path] = dict(model.named_parameters())

    params = model.named_parameters()
    dict_params = dict(params)

    N = len(snapshots_weights)

    for name in dict_params.keys():
        custom_params = None
        for _, snapshot_params in snapshots_weights.items():
            if custom_params is None:
                custom_params = snapshot_params[name].data
            else:
                custom_params += snapshot_params[name].data
        dict_params[name].data.copy_(custom_params/N)

    model_dict = model.state_dict()
    model_dict.update(dict_params)

    model.load_state_dict(model_dict)
    model.eval()

    return model

In [None]:
class Train:
    def __init__(self, model, step_per_epoch):
        self.model = model
        if CFG["optimizer"] == "RAdam":
            self.optimizer = optim.RAdam(
                model.parameters(),
                lr= CFG["lr"],
                betas=(0.9, 0.999),
                eps=1e-8,
                weight_decay=0,
            )
        elif CFG["optimizer"] == "AdamW":
            self.optimizer = torch.optim.AdamW(
                model.parameters(), 
                lr=CFG["lr"], 
                betas=(0.9, 0.999), 
                eps=1e-08, 
                weight_decay=0.0, 
                amsgrad=False)
        if CFG["scheduler"] == "OneCycleLR":
            self.scheduler = OneCycleLR(
                self.optimizer, 
                CFG["lr"],
                epochs=CFG["epochs"], 
                steps_per_epoch=step_per_epoch, 
                pct_start=0.3, 
                anneal_strategy='cos', 
                )
        else:
            self.scheduler = None
        self.scaler = torch.cuda.amp.GradScaler()
    
    def train(self, train_loader, epoch):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        scores = AverageMeter()
        
        self.model.train()
        start = end = time.time()
        global_step = 0
        for step, (images, labels) in enumerate(train_loader):
            # measure data loading time
            images = images.to(device)
            labels = labels.to(device)
            batch_size = labels.size(0)
            
#             with torch.cuda.amp.autocast():
            y_preds = self.model(images)
            if CFG["loss"] == "bi_tempered_loss":
                loss = bi_tempered_logistic_loss(activations=y_preds, labels=labels, t1=CFG["t1"], t2=CFG["t2"], label_smoothing=CFG["label_smoothing"])
            elif CFG["loss"] == "logloss":
                loss = F.cross_entropy(y_preds, labels)
            elif CFG["loss"] == "KLDivLoss":
                prob_m = F.log_softmax(y_preds, dim=1)
                prob_t = F.softmax(labels)
                loss = - (prob_m * prob_t).sum(dim=1).mean()
            elif CFG["loss"] == "bce":
                loss = F.binary_cross_entropy(F.sigmoid(y_preds), labels)

#             loss = loss.mean()
            loss.backward()
#             self.scaler.scale(loss).backward()
            
            if (step + 1) % CFG["accum_iter"] == 0:
#                 self.scaler.unscale_(self.optimizer)
                grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), CFG["max_grad_norm"])
#                 self.scaler.step(self.optimizer)
#                 self.scaler.update()
                self.optimizer.step()
                self.optimizer.zero_grad()
            
            losses.update(loss.item(), batch_size)
            if self.scheduler:
                self.scheduler.step()
            
#             xm.mark_step()
            
            global_step += 1
            
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            
            if (step + 1) % CFG["print_freq"] == 0 or step == (len(train_loader)-1):
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      'Grad: {grad_norm:.4f}  '
                      'LR: {lr:.6f}  '
                      .format(
                       epoch+1, step, len(train_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(train_loader)),
                       grad_norm=grad_norm,
                       lr=self.scheduler.get_lr()[0] if self.scheduler is not None else CFG["lr"],
                       ))
                
        return losses.avg
    
    def validate(self, valid_loader):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        scores = AverageMeter()
        # switch to evaluation mode
        self.model.eval()
        preds = []
        start = end = time.time()
        for step, (images, labels) in enumerate(valid_loader):
            # measure data loading time
            data_time.update(time.time() - end)
#             if CFG["loss"] == "bi_tempered_loss":
#                 labels = torch.nn.functional.one_hot(torch.tensor(labels), num_classes=5)
            images = images.to(device)
            labels = labels.to(device)
            batch_size = labels.size(0)
            # compute loss
            with torch.no_grad():
                y_preds = self.model(images)
                if CFG["loss"] == "bi_tempered_loss":
                    loss = bi_tempered_logistic_loss(activations=y_preds, labels=labels, t1=CFG["t1"], t2=CFG["t2"], label_smoothing=CFG["label_smoothing"])
                elif CFG["loss"] == "logloss":
                    loss = F.cross_entropy(y_preds, labels)
                elif CFG["loss"] == "KLDivLoss":
                    prob_m = F.log_softmax(y_preds, dim=1)
                    prob_t = F.softmax(labels)
                    loss = - (prob_m * prob_t).sum(dim=1).mean()
                elif CFG["loss"] == "bce":
                    loss = F.binary_cross_entropy(F.sigmoid(y_preds), labels)
            losses.update(loss.mean().item(), batch_size)
            # record accuracy
            preds.append(y_preds.softmax(1).to('cpu').numpy())
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if (step + 1) % CFG["print_freq"] == 0 or step == (len(valid_loader)-1):
                print('EVAL: [{0}/{1}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      .format(
                       step, len(valid_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(valid_loader)),
                       ))
                
        predictions = np.concatenate(preds)
        return losses.avg, predictions
    
    def inference(self, states, test_loader):
        self.model.to(device)
        tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
        probs = []
        for i, (images) in tk0:
            images = images.to(device)
            avg_preds = []
            for state in states:
                self.model.load_state_dict(state['model'])
                self.model.eval()
                with torch.no_grad():
                    y_preds = self.model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probs.append(avg_preds)
        probs = np.concatenate(probs)
        return probs

# 5 folds and start trainig

In [None]:
train_tmp = pd.get_dummies(train, columns=["label"])
train_tmp = train_tmp.merge(train, on="image_id")
train_tmp = train_tmp.rename({f"label_{i}": f"{i}" for i in range(5)}, axis=1)
train_tmp["image_id"] = train["image_id"].map(lambda x: TRAIN_DIR_2019+x)
folds = train_tmp.copy()

extra = train_merged[train_merged["source"] == 2019][["image_id", "label"]].reset_index(drop=True).copy()
extra_tmp = extra.copy()
extra = pd.get_dummies(extra, columns=["label"])
extra = extra.merge(extra_tmp, on="image_id")
extra = extra.rename({f"label_{i}": f"{i}" for i in range(5)}, axis=1)
extra["image_id"] = extra["image_id"].map(lambda x: TRAIN_DIR_2019+x)

extra = pd.concat([extra, plants_df]).reset_index(drop=True)
Fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds["label"])):
    folds.loc[val_index, 'fold'] = int(n)
for n in range(len(extra)):
    extra.loc[n, "fold"] = int(6)
if CFG["use_2019"]:
    folds = pd.concat([folds, extra]).reset_index(drop=True)
folds['fold'] = folds['fold'].astype(int)
print(folds.groupby(['fold', "label"]).size())

In [None]:
# ====================================================
# model & optimizer
# ====================================================
efmodel = EfficientNet(CFG["model_arch"])
efmodel.to(device)
efmodel.drop_rate = 0.3

In [None]:
# ====================================================
# Train loop
# ====================================================
def train_loop(folds, fold, efmodel):

    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    train_dataset = TrainDataset(train_folds, 
                                 transform=get_transform())
    valid_dataset = TrainDataset(valid_folds, 
                                 transform=val_transform())

    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG["train_bs"], 
                              shuffle=True, 
                              num_workers=CFG["num_workers"], pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG["train_bs"], 
                              shuffle=False, 
                              num_workers=CFG["num_workers"], pin_memory=True, drop_last=False)

    
    trainer = Train(efmodel, len(train_dataset) // CFG["train_bs"])

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG["epochs"]):
        
        start_time = time.time()
        
        # train
        avg_loss = trainer.train(train_loader, epoch)

        # eval
        avg_val_loss, preds = trainer.validate(valid_loader)
        valid_labels = valid_folds[["label"]].values

        # scoring
        score = accuracy_score(valid_labels, preds.argmax(1))

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Accuracy: {score}')

        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': efmodel.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG["model_arch"]}_fold{fold}_best.pth')
        torch.save({'model': efmodel.state_dict(), 
                    'preds': preds},
                    OUTPUT_DIR+f'{CFG["model_arch"]}_fold{fold}_snapshot{epoch}.pth')
    
#     trainer.model = average_snapshots([OUTPUT_DIR+f'{CFG["model_arch"]}_fold{fold}_snapshot{e}.pth' for e in range(CFG["epochs"] - CFG["model_average"], CFG["epochs"])], model)
#     trainer.model.to(device)
#     avg_val_loss, preds = trainer.validate(valid_loader)
#     score = accuracy_score(valid_labels, preds.argmax(1))
#     LOGGER.info(f"averaged model score: {score:.4f}")
#     if score > best_score:
#         LOGGER.info("averaged model is selected!")
#         torch.save({'model': model.state_dict(), 
#                     'preds': preds},
#                      OUTPUT_DIR+f'{CFG["model_arch"]}_fold{fold}_best.pth')
    check_point = torch.load(OUTPUT_DIR+f'{CFG["model_arch"]}_fold{fold}_best.pth')
    valid_folds[[str(c) for c in range(5)]] = check_point['preds']
    valid_folds['preds'] = check_point['preds'].argmax(1)

    return valid_folds

In [None]:
"""
Prepare: 1.train  2.test  3.submission  4.folds
"""

def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df["label"].values
    score = accuracy_score(labels, preds)
    LOGGER.info(f'Score: {score:<.5f}')
    
oof_df = pd.DataFrame()
for fold in TRAIN_FOLDS:
    _oof_df = train_loop(folds, fold, efmodel)
    oof_df = pd.concat([oof_df, _oof_df])
    LOGGER.info(f"========== fold: {fold} result ==========")
get_result(_oof_df)
        
# CV result
LOGGER.info(f"========== CV ==========")
get_result(oof_df)
# save result
oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)