In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
# Preliminaries
from pathlib import Path
from tqdm import tqdm
tqdm.pandas()
import json
import random
import os
import pandas as pd
import numpy as np

# Visuals and CV2
import seaborn as sn
import matplotlib.pyplot as plt
import cv2

# albumentations for augs
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score


#torch
import torch
import timm
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

# Configuration

In [None]:
DIM = (512,512)

NUM_WORKERS = 4
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
EPOCHS = 10
SEED = 2020
LR = 1e-4

BASE_DIR = Path('../input/cassava-leaf-disease-merged')

DEVICE = "cuda"

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

USE_FULL_DATA = True


################################################# MODEL ####################################################################

MODEL_NAME = 'se_resnext50_32x4d' #resnext50_32x4d #resnext101_32x4d #efficientnet_b3 #efficientnetb5 #efficientnetb7


################################################ Loss and its params #######################################################
LOSS = 'bi_tempered_loss' #'label_smoothing' #'focal_cosine_loss' #'bi_tempered_loss' #'CE'
SMOOTHING = 0.05
ALPHA = 1
GAMMA = 2
XENT = 0.1
t1=0.3 # bi-tempered-loss https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/202017
t2=1.0 # bi-tempered-loss https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/202017
SMOOTHING_TEMP = 0.0

####################################### Scheduler and its params ############################################################
SCHEDULER = 'CosineAnnealingWarmRestarts' #'CosineAnnealingLR'
factor=0.2 # ReduceLROnPlateau
patience=4 # ReduceLROnPlateau
eps=1e-6 # ReduceLROnPlateau
T_max=10 # CosineAnnealingLR
T_0=10 # CosineAnnealingWarmRestarts
min_lr=1e-6

# Seed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

# Utils

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class AccuracyMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.prediction = []
        self.target = []
        self.accuracy = []
        
    def update(self, y_true, y_pred):  
        y_true = y_true.detach().cpu().numpy().astype(int)
        y_pred = F.softmax(y_pred, dim=1).argmax(axis=1).detach().cpu().numpy().astype(int)
        
        self.prediction.append(y_pred)
        self.target.append(y_true)
        
        y_true = np.concatenate(self.prediction, axis=0)
        y_pred = np.concatenate(self.target, axis=0)
        
        self.accuracy = accuracy_score(y_true,y_pred)
        
    @property
    def avg(self):
        return self.accuracy

In [None]:
def fetch_scheduler(optimizer):
        if SCHEDULER =='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=factor, patience=patience, verbose=True, eps=eps)
        elif SCHEDULER =='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=T_max, eta_min=min_lr, last_epoch=-1)
        elif SCHEDULER =='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=T_0, T_mult=1, eta_min=min_lr, last_epoch=-1)
        return scheduler

In [None]:
def fetch_loss():
        if LOSS=='label_smoothing':
            loss = LabelSmoothingLoss(smoothing=SMOOTHING)
        elif LOSS=='bi_tempered_loss':
            loss = BiTemperedLogisticLoss(t1=t1, t2=t2, smoothing=SMOOTHING_TEMP)
        elif LOSS=='focal_cosine_loss':
            loss = FocalCosineLoss(alpha=ALPHA,gamma=GAMMA,xent=XENT)
        elif LOSS =='CE':
            loss = nn.CrossEntropyLoss()
        return loss

# Transforms

In [None]:
def get_train_transforms():
    return albumentations.Compose(
        [
            albumentations.HorizontalFlip(p=0.5),
            albumentations.VerticalFlip(p=0.5),
            albumentations.Rotate(limit=120, p=0.8),
            albumentations.RandomBrightness(limit=(0.09, 0.6), p=0.5),
            albumentations.CenterCrop (384, 384, always_apply=False, p=0.5),
            albumentations.Resize(DIM[0],DIM[1]),
            albumentations.Cutout(num_holes=8, max_h_size=8, max_w_size=8, fill_value=0, always_apply=False, p=0.5),
            albumentations.ShiftScaleRotate(
                shift_limit=0.25, scale_limit=0.1, rotate_limit=0
            ),
            albumentations.Normalize(
                MEAN, STD, max_pixel_value=255.0, always_apply=True
            ),
        
            ToTensorV2(p=1.0),
        ]
    )

def get_valid_transforms():

    return albumentations.Compose(
        [albumentations.Normalize(MEAN, STD, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0)
        ]
    )

# Dataset

In [None]:
class CassavaDataset(Dataset):
    def __init__(self,image_ids,labels,dimension=None,augmentations=None):
        super().__init__()
        self.image_ids = image_ids
        self.labels = labels
        self.dim = dimension
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self,idx):
        
        img = cv2.imread(str(BASE_DIR/'train'/self.image_ids[idx]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                         
        if self.dim:
            img = cv2.resize(img,self.dim)
        
        if self.augmentations:
            augmented = self.augmentations(image=img)
            image = augmented['image']
                         
        return {
            'image': image,
            'target': torch.tensor(self.labels[idx],dtype=torch.long)
        }

# Model

In [None]:
class CassavaModel(nn.Module):
    def __init__(self, model_name='seresnext50_32x4d',out_features=5,pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        
        if model_name in ['efficientnet_b3','efficientnet_b5','efficientnet_b7']:
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, out_features)
            
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, out_features)

    def forward(self, x):
        x = self.model(x)
        return x

# Train Function

In [None]:
def train_fn(dataloader,model,criterion,optimizer,device,scheduler,epoch):
    model.train()
    loss_score = AverageMeter()
    auccuracy = AccuracyMeter()
    
    tk0 = tqdm(enumerate(dataloader), total=len(dataloader))
    for bi,d in tk0:
        
        images = d['image']
        targets = d['target']
                
        images = images.to(device)
        targets = targets.to(device)
        
        batch_size = images.shape[0]
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs,targets)
        loss.backward()
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()
        
        
        auccuracy.update(targets, outputs)
        loss_score.update(loss.detach().item(), batch_size)
        
        
        tk0.set_postfix(Train_Loss=loss_score.avg,Train_accuracy = auccuracy.avg,Epoch=epoch)
        
    return loss_score, auccuracy

# Evaluation Function

In [None]:
def eval_fn(data_loader,model,criterion,device):
    
    loss_score = AverageMeter()
    auccuracy = AccuracyMeter()
    
    model.eval()
    tk0 = tqdm(enumerate(data_loader), total=len(data_loader))
    
    with torch.no_grad():
        
        for bi, d in tk0:
            images = d['image']
            targets = d['target']
            
            images = images.to(device)
            targets = targets.to(device)
            
            batch_size = images.shape[0]
            
            outputs = model(images)
            loss = criterion(outputs,targets)
            
            auccuracy.update(targets, outputs)
            loss_score.update(loss.detach().item(), batch_size)
            
            tk0.set_postfix(Valid_Loss=loss_score.avg,Valid_accuracy = auccuracy.avg)
        
    return loss_score, auccuracy

# Loss

In [None]:
class FocalCosineLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, xent=.1):
        super(FocalCosineLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

        self.xent = xent

        self.y = torch.Tensor([1]).cuda()

    def forward(self, input, target, reduction="mean"):
        cosine_loss = F.cosine_embedding_loss(input, F.one_hot(target, num_classes=input.size(-1)), self.y, reduction=reduction)

        cent_loss = F.cross_entropy(F.normalize(input), target, reduce=False)
        pt = torch.exp(-cent_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * cent_loss

        if reduction == "mean":
            focal_loss = torch.mean(focal_loss)

        return cosine_loss + self.xent * focal_loss

In [None]:
def log_t(u, t):
    """Compute log_t for `u'."""
    if t==1.0:
        return u.log()
    else:
        return (u.pow(1.0 - t) - 1.0) / (1.0 - t)

def exp_t(u, t):
    """Compute exp_t for `u'."""
    if t==1:
        return u.exp()
    else:
        return (1.0 + (1.0-t)*u).relu().pow(1.0 / (1.0 - t))

def compute_normalization_fixed_point(activations, t, num_iters):

    """Returns the normalization value for each example (t > 1.0).
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (> 1.0 for tail heaviness).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same shape as activation with the last dimension being 1.
    """
    mu, _ = torch.max(activations, -1, keepdim=True)
    normalized_activations_step_0 = activations - mu

    normalized_activations = normalized_activations_step_0
    for _ in range(num_iters):
        logt_partition = torch.sum(
                exp_t(normalized_activations, t), -1, keepdim=True)
        normalized_activations = normalized_activations_step_0 * \
                logt_partition.pow(1.0-t)

    logt_partition = torch.sum(
            exp_t(normalized_activations, t), -1, keepdim=True)
    normalization_constants = - log_t(1.0 / logt_partition, t) + mu

    return normalization_constants

def compute_normalization_binary_search(activations, t, num_iters):

    """Returns the normalization value for each example (t < 1.0).
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (< 1.0 for finite support).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """

    mu, _ = torch.max(activations, -1, keepdim=True)
    normalized_activations = activations - mu

    effective_dim = \
        torch.sum(
                (normalized_activations > -1.0 / (1.0-t)).to(torch.int32),
            dim=-1, keepdim=True).to(activations.dtype)

    shape_partition = activations.shape[:-1] + (1,)
    lower = torch.zeros(shape_partition, dtype=activations.dtype, device=activations.device)
    upper = -log_t(1.0/effective_dim, t) * torch.ones_like(lower)

    for _ in range(num_iters):
        logt_partition = (upper + lower)/2.0
        sum_probs = torch.sum(
                exp_t(normalized_activations - logt_partition, t),
                dim=-1, keepdim=True)
        update = (sum_probs < 1.0).to(activations.dtype)
        lower = torch.reshape(
                lower * update + (1.0-update) * logt_partition,
                shape_partition)
        upper = torch.reshape(
                upper * (1.0 - update) + update * logt_partition,
                shape_partition)

    logt_partition = (upper + lower)/2.0
    return logt_partition + mu

class ComputeNormalization(torch.autograd.Function):
    """
    Class implementing custom backward pass for compute_normalization. See compute_normalization.
    """
    @staticmethod
    def forward(ctx, activations, t, num_iters):
        if t < 1.0:
            normalization_constants = compute_normalization_binary_search(activations, t, num_iters)
        else:
            normalization_constants = compute_normalization_fixed_point(activations, t, num_iters)

        ctx.save_for_backward(activations, normalization_constants)
        ctx.t=t
        return normalization_constants

    @staticmethod
    def backward(ctx, grad_output):
        activations, normalization_constants = ctx.saved_tensors
        t = ctx.t
        normalized_activations = activations - normalization_constants 
        probabilities = exp_t(normalized_activations, t)
        escorts = probabilities.pow(t)
        escorts = escorts / escorts.sum(dim=-1, keepdim=True)
        grad_input = escorts * grad_output
        
        return grad_input, None, None

def compute_normalization(activations, t, num_iters=5):
    """Returns the normalization value for each example. 
    Backward pass is implemented.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """
    return ComputeNormalization.apply(activations, t, num_iters)

def tempered_sigmoid(activations, t, num_iters = 5):
    """Tempered sigmoid function.
    Args:
      activations: Activations for the positive class for binary classification.
      t: Temperature tensor > 0.0.
      num_iters: Number of iterations to run the method.
    Returns:
      A probabilities tensor.
    """
    internal_activations = torch.stack([activations,
        torch.zeros_like(activations)],
        dim=-1)
    internal_probabilities = tempered_softmax(internal_activations, t, num_iters)
    return internal_probabilities[..., 0]

def tempered_softmax(activations, t, num_iters=5):
    """Tempered softmax function.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature > 1.0.
      num_iters: Number of iterations to run the method.
    Returns:
      A probabilities tensor.
    """
    if t == 1.0:
        return activations.softmax(dim=-1)

    normalization_constants = compute_normalization(activations, t, num_iters)
    return exp_t(activations - normalization_constants, t)

def bi_tempered_binary_logistic_loss(activations,
        labels,
        t1,
        t2,
        label_smoothing = 0.0,
        num_iters=5,
        reduction='mean'):

    """Bi-Tempered binary logistic loss.
    Args:
      activations: A tensor containing activations for class 1.
      labels: A tensor with shape as activations, containing probabilities for class 1
      t1: Temperature 1 (< 1.0 for boundedness).
      t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      label_smoothing: Label smoothing
      num_iters: Number of iterations to run the method.
    Returns:
      A loss tensor.
    """
    internal_activations = torch.stack([activations,
        torch.zeros_like(activations)],
        dim=-1)
    internal_labels = torch.stack([labels.to(activations.dtype),
        1.0 - labels.to(activations.dtype)],
        dim=-1)
    return bi_tempered_logistic_loss(internal_activations, 
            internal_labels,
            t1,
            t2,
            label_smoothing = label_smoothing,
            num_iters = num_iters,
            reduction = reduction)

def bi_tempered_logistic_loss(activations,
        labels,
        t1,
        t2,
        label_smoothing=0.0,
        num_iters=5,
        reduction = 'mean'):

    """Bi-Tempered Logistic Loss.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      labels: A tensor with shape and dtype as activations (onehot), 
        or a long tensor of one dimension less than activations (pytorch standard)
      t1: Temperature 1 (< 1.0 for boundedness).
      t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      label_smoothing: Label smoothing parameter between [0, 1). Default 0.0.
      num_iters: Number of iterations to run the method. Default 5.
      reduction: ``'none'`` | ``'mean'`` | ``'sum'``. Default ``'mean'``.
        ``'none'``: No reduction is applied, return shape is shape of
        activations without the last dimension.
        ``'mean'``: Loss is averaged over minibatch. Return shape (1,)
        ``'sum'``: Loss is summed over minibatch. Return shape (1,)
    Returns:
      A loss tensor.
      """

    if len(labels.shape)<len(activations.shape): #not one-hot
        labels_onehot = torch.zeros_like(activations)
        labels_onehot.scatter_(1, labels[..., None], 1)
    else:
        labels_onehot = labels

    if label_smoothing > 0:
        num_classes = labels_onehot.shape[-1]
        labels_onehot = ( 1 - label_smoothing * num_classes / (num_classes - 1) ) \
                * labels_onehot + \
                label_smoothing / (num_classes - 1)

    probabilities = tempered_softmax(activations, t2, num_iters)

    loss_values = labels_onehot * log_t(labels_onehot + 1e-10, t1) \
            - labels_onehot * log_t(probabilities, t1) \
            - labels_onehot.pow(2.0 - t1) / (2.0 - t1) \
            + probabilities.pow(2.0 - t1) / (2.0 - t1)
    loss_values = loss_values.sum(dim = -1) #sum over classes

    if reduction == 'none':
        return loss_values
    if reduction == 'sum':
        return loss_values.sum()
    if reduction == 'mean':
        return loss_values.mean()
    
class BiTemperedLogisticLoss(nn.Module): 
    def __init__(self, t1, t2, smoothing=0.0): 
        super(BiTemperedLogisticLoss, self).__init__() 
        self.t1 = t1
        self.t2 = t2
        self.smoothing = smoothing
    def forward(self, logit_label, truth_label):
        loss_label = bi_tempered_logistic_loss(
            logit_label, truth_label,
            t1=self.t1, t2=self.t2,
            label_smoothing=self.smoothing,
            reduction='none'
        )
        
        loss_label = loss_label.mean()
        return loss_label

In [None]:
class LabelSmoothingLoss(nn.Module): 
    def __init__(self, classes=5, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        pred = pred.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

# Plotter

In [None]:
def print_history(fold,history,num_epochs=EPOCHS):
        plt.figure(figsize=(15,5))
        
        plt.plot(
            np.arange(num_epochs),
            history['train_history_accuracy'],
            '-o',
            label='Train ACCURACY',
            color='#ff7f0e'
        )
        
        plt.plot(
            np.arange(num_epochs),
            history['val_history_accuracy'],
            '-o',
            label='Val ACCURACY',
            color='#1f77b4'
        )
        
        x = np.argmax(history['val_history_accuracy'])
        y = np.max(history['val_history_accuracy'])
        
        xdist = plt.xlim()[1] - plt.xlim()[0]
        ydist = plt.ylim()[1] - plt.ylim()[0]
        
        plt.scatter(x, y, s=200, color='#1f77b4')
        
        plt.text(
            x-0.03*xdist,
            y-0.13*ydist,
            'max auc\n%.2f'%y,
            size=14
        )
        
        plt.ylabel('ACCURACY', size=14)
        plt.xlabel('Epoch', size=14)
        
        plt.legend(loc=2)
        
        plt2 = plt.gca().twinx()
        
        plt2.plot(
            np.arange(num_epochs),
            history['train_history_loss'],
            '-o',
            label='Train Loss',
            color='#2ca02c'
        )
        
        plt2.plot(
            np.arange(num_epochs),
            history['val_history_loss'],
            '-o',
            label='Val Loss',
            color='#d62728'
        )
        
        x = np.argmin(history['val_history_loss'])
        y = np.min(history['val_history_loss'])
        
        ydist = plt.ylim()[1] - plt.ylim()[0]
        
        plt.scatter(x, y, s=200, color='#d62728')
        
        plt.text(
            x-0.03*xdist, 
            y+0.05*ydist, 
            'min loss', 
            size=14
        )
        
        plt.ylabel('Loss', size=14)
        
        plt.title(f'FOLD {fold + 1}',size=18)
        
        plt.legend(loc=3)
        plt.show()  

# Engine

In [None]:
if USE_FULL_DATA:
    df = pd.read_csv('../input/cassava-train-folds/train_folds.csv')
    df['source'] = 2020
    df_2019 = pd.read_csv('../input/cassava-2019-data-folds/folds_2019.csv')
    
    df = df.append(df_2019)
    print(df)
    
else:
    df = pd.read_csv('../input/cassava-train-folds/train_folds.csv')
    print(df)

In [None]:
def run(fold):
    
    if USE_FULL_DATA:
        df_train = df[df.kfold != fold].reset_index(drop=True)
        df_valid = df[(df['kfold'] == fold)&(df['source'] == 2020)].reset_index(drop=True)
    
    else:
        df_train = df[df.kfold != fold].reset_index(drop=True)
        df_valid = df[df['kfold'] == fold].reset_index(drop=True)
    
    # Defining DataSet
    train_dataset = CassavaDataset(
        image_ids=df_train['image_id'].values,
        labels=df_train['label'].values,
        augmentations=get_train_transforms(),
        dimension = DIM
    )
        
    valid_dataset = CassavaDataset(
        image_ids=df_valid['image_id'].values,
        labels=df_valid['label'].values,
        augmentations=get_valid_transforms(),
        dimension = DIM
    )
        
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TRAIN_BATCH_SIZE,
        pin_memory=True,
        drop_last=True,
        num_workers=NUM_WORKERS
    )
    
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=VALID_BATCH_SIZE,
        num_workers=NUM_WORKERS,
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )
    
    # Defining Device
    device = torch.device("cuda")
    
    # Defining Model for specific fold
    model = CassavaModel(out_features=5)
    model.to(device)
    
    #DEfining criterion
    criterion = fetch_loss()
    criterion.to(device)
        
    # Defining Optimizer with weight decay to params other than bias and layer norms
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
            ]  
    
    optimizer = torch.optim.Adam(optimizer_parameters, lr=LR)
    
    #Defining LR SCheduler
    scheduler = fetch_scheduler(optimizer)
    
    # History dictionary to store everything
    history = {
            'train_history_loss': [],
            'train_history_accuracy': [],
            'val_history_loss': [],
            'val_history_accuracy': [],
        }
        
    # THE ENGINE LOOP
    best_loss = 10000
    fold_val_accuracy = []
    for epoch in range(EPOCHS):
        train_loss,train_accuracy = train_fn(train_loader, model,criterion, optimizer, device,scheduler=scheduler,epoch=epoch)
        
        valid_loss,valid_accuracy = eval_fn(valid_loader, model, criterion,device)
        
        history['train_history_loss'].append(train_loss.avg)
        history['train_history_accuracy'].append(train_accuracy.avg)
        history['val_history_loss'].append(valid_loss.avg)
        history['val_history_accuracy'].append(valid_accuracy.avg)
        
        fold_val_accuracy.append(valid_accuracy.avg)
        
        if valid_loss.avg < best_loss:
            best_loss = valid_loss.avg
            torch.save(model.state_dict(),f'model_best_loss_fold_{fold}.bin')
            
    print('FOLD_CV : {}'.format(np.mean(fold_val_accuracy)))        
    print_history(fold,history,num_epochs=epoch+1)

In [None]:
run(fold=0)

In [None]:
run(fold=1)