# GPU Info

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
else:
    print(gpu_info)

# Dependencies and Imports

In [None]:
!pip install -q yamale==3.0.4
!pip install -q scikit-learn==0.23.2
!pip install -q torch==1.7.0
!pip install -q torchvision==0.8.1
!pip install -q albumentations==0.5.1
!pip install -q torchtoolbox==0.1.5

import datetime
import gc
import os
import random
import sys
import time
import warnings
from abc import ABC, abstractmethod
from collections import Counter
from glob import glob
from typing import *
from typing import List, Optional
import albumentations
import cv2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz
import seaborn as sns
import sklearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchtoolbox
import torchvision
import yamale
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold
from torch.optim import *
from torch.utils.data import DataLoader, Dataset, Subset
from torchtoolbox.transform import Cutout
from tqdm import tqdm


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

sys.path.append('../input/hongnangeffnet/gen-efficientnet-pytorch-master-hongnan')
#sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/autoaug')

import geffnet
from auto_augment import AutoAugment, Cutout



# Config

In [None]:
class GlobalConfig:
    seed = 1992
    num_classes = 11
    class_list = [0,1,2,3,4,5,6,7,8,9,10]
    batch_size = 32
    n_epochs = 3
   
    # unpack the key dict
    scheduler = 'CosineAnnealingLR'
    scheduler_params = {'StepLR': {'step_size':2, 'gamma':0.3, 'last_epoch':-1, 'verbose':True},
                
                'ReduceLROnPlateau': {'mode':'max', 'factor':0.5, 'patience':0, 'threshold':0.0001,
                                      'threshold_mode':'rel', 'cooldown':0, 'min_lr':1e-5,
                                      'eps':1e-08, 'verbose':True},
                
                'CosineAnnealingWarmRestarts': {'T_0':10, 'T_mult':1, 'eta_min':1e-6, 'last_epoch':-1,
                                                'verbose':True},
                'CosineAnnealingLR':{'T_max':6,'eta_min':1e-6, 'last_epoch':-1}}
    
    # do scheduler.step after optimizer.step
    train_step_scheduler = False  
    val_step_scheduler = True
    
    # optimizer
    optimizer = 'AdamW'
    optimizer_params = {'AdamW':{'lr':1e-4, 'betas':(0.9,0.999), 'eps':1e-08,
                                 'weight_decay':1e-6,'amsgrad':False}, 
                        'Adam':{'lr':1e-4,'betas':(0.9,0.999), 'eps':1e-08,
                                 'weight_decay':1e-6,'amsgrad':False},}

    # criterion
    criterion = 'BCEWithLogitsLoss'
    criterion_val = 'BCEWithLogitsLoss'
    criterion_params = {'BCEWithLogitsLoss': {'weight':None,'size_average':None,
                                             'reduce':None, 'reduction':'mean', 'pos_weight': None},
                        'CrossEntropyLoss': {'weight':None,'size_average':None,
                                             'ignore_index':-100,'reduce':None,
                                             'reduction':'mean'},
                        'LabelSmoothingLoss': {'classes':2, 'smoothing':0.05, 'dim':-1},
                        'FocalCosineLoss': {'alpha':1, 'gamma':2 , 'xent':0.1}}

    gradient_accumulation_steps=2
    max_grad_norm=1000
    group_kfold_split = 'PatientID'
    image_size = 256
    resize = 256
    crop_size = {128:110, 256:200, 512:400}
    verbose = 1
    verbose_step = 1
    num_folds = 5
    image_col_name = 'StudyInstanceUID'
    class_col_name = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                 'Swan Ganz Catheter Present']
    
    paths = {'train_path': '../input/ranzcr-clip-trainset-256x256',
             'test_path': '../input/siim-isic-melanoma-classification/jpeg/test',
             'csv_path': '../input/ranzcr-clip-catheter-line-classification/train.csv',
             'log_path': './log.txt',
             'save_path': './',
             'model_weight_path_folder': '../input/efficientnet-weights'}

    effnet = 'tf_efficientnet_b5_ns'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
config=GlobalConfig

In [None]:
# img = cv2.imread('../input/ranzcr-clip-catheter-line-classification/train/1.2.826.0.1.3680043.8.498.10000428974990117276582711948006105617.jpg') 
# filename = './testing1.jpg'
# cv2.imwrite(filename, img) 

# Seeding

In [None]:
def seed_all(seed: int = 1930):

    print("Using Seed Number {}".format(seed))

    os.environ["PYTHONHASHSEED"] = str(
        seed)  # set PYTHONHASHSEED env var at fixed value
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)  # pytorch (both CPU and CUDA)
    np.random.seed(seed)  # for numpy pseudo-random generator
    random.seed(
        seed)  # set fixed value for python built-in pseudo-random generator
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False


def seed_worker(_worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
    
seed_all(seed=config.seed)

# CV Folds

In [None]:
def make_folds(train_csv: pd.DataFrame,
               config,
               cv_schema=None,
               use_skf=True,
               use_gkf=False) -> pd.DataFrame:
    """Split the given dataframe into training folds."""
    # TODO: add options for cv_scheme as it is cumbersome here.
    if use_skf:
        df_folds = train_csv.copy()
        skf = StratifiedKFold(5, shuffle=True, random_state=config.seed)

        for fold, (train_idx, val_idx) in enumerate(
                skf.split(X=df_folds[config.image_col_name],
                          y=df_folds[config.class_col_name])):
            df_folds.loc[val_idx, "fold"] = int(fold + 1)
        df_folds["fold"] = df_folds["fold"].astype(int)
        print(df_folds.groupby(["fold", config.class_col_name]).size())

    elif use_gkf:
        df_folds = train_csv.copy()
        gkf = GroupKFold(n_splits=config.num_folds)
        groups = df_folds[config.group_kfold_split].values
        for fold, (train_index, val_index) in enumerate(
                gkf.split(df_folds,
                          df_folds[config.class_col_name],
                          groups=df_folds[config.group_kfold_split].values)):
            df_folds.loc[val_index, "fold"] = int(fold+1)
        df_folds["fold"] = df_folds["fold"].astype(int)
        # print(df_folds.groupby(["fold", config.class_col_name]).size())

    return df_folds

train_csv = pd.read_csv(config.paths['csv_path']) 
df_folds = make_folds(train_csv, config,use_skf=False, use_gkf=True)
df_folds

# Utilities

In [None]:
def get_file_type(image_folder_path: str,
                  allowed_extensions: Optional[List] = None):
    """Get the file type of images in a folder."""
    if allowed_extensions is None:
        allowed_extensions = ['.jpg', '.png', '.jpeg']

    file_list = os.listdir(image_folder_path)
    extension_type = [os.path.splitext(file)[-1].lower() for file in file_list]
    extension_dict = Counter(extension_type)
    assert len(extension_dict.keys()
               ) == 1, "The extension in the folder should all be the same, "
    "but found {} extensions".format(extension_dict.keys)
    extension_type = list(extension_dict.keys())[0]
    assert extension_type in allowed_extensions
    return extension_type



def check_df_ext(df: pd.DataFrame,
                 col_name: str,
                 allowed_extensions: Optional[List] = None):
    """Get the image file extension used in a data frame."""
    if allowed_extensions is None:
        allowed_extensions = ['.jpg', '.png', '.jpeg']
    # check if the col has an extension, this is tricky.
    # if no extension, it gives default ""
    image_id_list = df[col_name].tolist()
    print(image_id_list)
    extension_type = [
        # Review Comments: os.path.splitext is guaranteed to return a 2-tuple,
        # so no need to use -1 index.
        os.path.splitext(image_id)[1].lower() for image_id in image_id_list
    ]
    

    assert len(set(extension_type)
               ) == 1, "The extension in the image id should all be the same"


    if "" in extension_type:
        return False

    assert list(set(extension_type))[0] in allowed_extensions
    return True

# Augmentations

In [None]:
class Augmentation(ABC):

    @abstractmethod
    def augment(image):
        """Augment an image."""
        
class AlbumentationsAugmentation(Augmentation):

    def __init__(self, transforms: albumentations.core.composition.Compose):
        self.transforms = transforms

    def augment(self, image):
        albu_dict = {"image": image}
        transform = self.transforms(**albu_dict)
        return transform["image"]
    
class TorchTransforms(Augmentation):
    def __init__(self, transforms: torchvision.transforms.transforms.Compose):
        self.transforms = transforms
    
    def augment(self, image):
        if isinstance(image, np.ndarray):
            image = torchvision.transforms.ToPILImage()(image)
        transformed_image = self.transforms(image)
        return transformed_image
    
class TorchToolBoxTransforms(Augmentation):
    def __init__(self, transforms: torchtoolbox.transform.transforms.Compose):
        self.transforms = transforms
    
    def augment(self, image):
        transformed_image = self.transforms(image)
        return transformed_image

In [None]:
class augment_config:
    train_augmentations =  [albumentations.RandomResizedCrop(height=config.image_size, width=config.image_size),
                            #albumentations.Transpose(p=0.5),
                            albumentations.HorizontalFlip(p=0.5),
                            #albumentations.VerticalFlip(p=0.5),
                            albumentations.ShiftScaleRotate(p=0.5),
                            albumentations.OneOf([
                                                    albumentations.JpegCompression(),
                                                      albumentations.Downscale(scale_min=0.1, scale_max=0.15),
                                                      ], p=0.2),
                            albumentations.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
                            albumentations.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),                          
                            albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
                            albumentations.CoarseDropout(p=0.5),
                            albumentations.Cutout(p=0.5),
                            ToTensorV2(p=1.0)]

    val_augmentations = [albumentations.Resize(height=config.image_size, width=config.image_size, p=1.0),
                         albumentations.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
                         ToTensorV2(p=1.0)]

    test_augmentations = [albumentations.Resize(height=config.image_size, width=config.image_size, p=1.0), ToTensorV2(p=1.0)]

In [None]:
def get_albu_transforms(config):
    transforms_train = albumentations.Compose([*augment_config.train_augmentations],p=1.0)
    transforms_val = albumentations.Compose([*augment_config.val_augmentations],p=1.0)

    return transforms_train, transforms_val    
    
def get_transforms_torchvision(config):
    transforms_train = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    # torchvision.transforms.RandomVerticalFlip(),
    AutoAugment(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

    transforms_val = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

    return transforms_train, transforms_val

def get_torchtoolbox_transforms(config):
    transforms_train = torchtoolbox.transform.Compose([
      #DrawHair(),
      torchtoolbox.transform.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
      torchtoolbox.transform.RandomHorizontalFlip(),
      torchtoolbox.transform.RandomVerticalFlip(),
      #Microscope(p=0.4),
      torchtoolbox.transform.ToTensor(),
      torchtoolbox.transform.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
  ])
    
    transforms_val = torchtoolbox.transform.Compose([
        torchtoolbox.transform.ToTensor(),
        torchtoolbox.transform.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    ])


    return transforms_train, transforms_val

# Dataset

Note here we will use `BCEWithLogitsLoss` because all our targets are one hot encoded to 0,1 (multilabel in a sense). 

TODO: Check df extension throws error because of image name having dots......

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import albumentations
import torch

from typing import Optional
from tqdm import tqdm



class RANZCR(torch.utils.data.Dataset):

    """The Melanoma dataset. transforms is now an abstract class"""

    def __init__(self,
                 df: pd.DataFrame,
                 config: type,
                 transforms: type = None,
                 test: bool = False,
                 transform_norm: bool = False, meta_features=None):
        """Construct a RANZCR dataset."""

        self.df = df
        self.config = config
        self.transforms = transforms
        self.test = test
        self.transform_norm = transform_norm
        self.meta_features = meta_features

        if self.transforms is None:
            assert self.transform_norm is False
            print('Transforms is None and Transform Normalization is not '
                  'initialized!')

        self.image_extension = get_file_type(
            image_folder_path=config.paths['train_path'], allowed_extensions=None)
        
#         self.df_has_ext = check_df_ext(df=self.df, col_name=config.image_col_name)

#         if self.df_has_ext is True:
#             self.image_extension = ""
            
    def __len__(self):
        """Get the dataset length."""
        return len(self.df)

    def __getitem__(self, idx: int):
        """Get a row from the dataset."""

        image_id = self.df[self.config.image_col_name].values[idx]
        # simple hack to bypass testset df may not have label as column name and throw error when 
        # iterating through the dataset.
        label = None
        label = torch.zeros(1)

        
        if self.test:
            image_path = os.path.join(
                self.config.paths['test_path'], "{}{}".format(image_id,
                                                     self.image_extension))
        else:
            label = self.df[self.config.class_col_name].values[idx]
            label = torch.as_tensor(data=label, dtype=torch.float32, device=None)
            image_path = os.path.join(
                self.config.paths['train_path'], "{}{}".format(image_id,
                                                      self.image_extension))

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform_norm is False:
            image = image.astype(np.float32) / 255.0

        if self.transforms is not None:           
            image = self.transforms.augment(image)
        else:
            image = torch.as_tensor(data=image,
                                    dtype=torch.float32,
                                    device=None)
            
        if self.meta_features is not None:            
            meta = np.array(self.df.iloc[idx][self.meta_features].values, dtype=np.float32) 
            return image_id, (image, meta), label
            
        return image_id, image, label


# Model

In [None]:
class CustomEfficientNet(nn.Module):
    def __init__(self, config: type, pretrained: bool=True):
        super().__init__()
        self.config = config
        self.model = geffnet.create_model(
            model_weight_path_folder=config.paths['model_weight_path_folder'],
            model_name=config.effnet,
            pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, config.num_classes)
        

    def forward(self, input_neurons):
        # TODO: add dropout layers, or the likes.
        output_predictions = self.model(input_neurons)
        return output_predictions





# Custom Loss Functions

https://towardsdatascience.com/what-is-label-smoothing-108debd7ef06

https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/203103

https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/203271

In [None]:
# ====================================================
# Label Smoothing
# ====================================================
class LabelSmoothingLoss(nn.Module): 
    def __init__(self, classes=2, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.classes = classes 
        self.dim = dim 
    def forward(self, input, target): 
        pred = input.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.classes - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [None]:
class FocalCosineLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, xent=.1):
        super(FocalCosineLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

        self.xent = xent

        self.y = torch.Tensor([1]).cuda()

    def forward(self, input, target, reduction="mean"):
        cosine_loss = F.cosine_embedding_loss(input, F.one_hot(target, num_classes=input.size(-1)), self.y, reduction=reduction)

        cent_loss = F.cross_entropy(F.normalize(input), target, reduce=False)
        pt = torch.exp(-cent_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * cent_loss

        if reduction == "mean":
            focal_loss = torch.mean(focal_loss)

        return cosine_loss + self.xent * focal_loss

# Meters

In [None]:
class AverageLossMeter:
    """
    Computes and stores the average and current loss
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.curr_batch_avg_loss = 0
        self.avg = 0
        self.running_total_loss = 0
        self.count = 0

    def update(self, curr_batch_avg_loss: float, batch_size: str):
        self.curr_batch_avg_loss = curr_batch_avg_loss
        self.running_total_loss += curr_batch_avg_loss * batch_size
        self.count += batch_size
        self.avg = self.running_total_loss / self.count




class AccuracyMeter:
    def __init__(self):        
        self.reset()
     
        
    def reset(self):
        self.score = 0
        self.count = 0
        self.sum = 0

    def update(self, y_true, y_pred, batch_size=1):

        # so we just need to count total num of images / batch_size
        #self.count += num_steps
        self.batch_size = batch_size
        self.count += self.batch_size
        # this part here already got an acc score for the 4 images, so no need divide batch size
        self.score = sklearn.metrics.accuracy_score(y_true, y_pred)
        total_score = self.score * self.batch_size

        self.sum += total_score
        

    @property
    def avg(self):        
        self.avg_score = self.sum/self.count
        return self.avg_score

# Callbacks

In [None]:
from enum import Enum
from typing import Union



class Mode(Enum):
    MIN = np.inf
    MAX = -np.inf


class EarlyStopping:

    """Class for Early Stopping."""

    # Review Comments:
    #
    # You may want to investigate using Python's built-in Enum class
    # instead (see https://docs.python.org/3.6/library/enum.html).
    mode_dict = {'min': np.inf, 'max': -np.inf}

    def __init__(self,
                 patience: int = 5,
                 mode: Mode = Mode.MIN,
                 min_delta: float = 1e-5):
        """Construct an EarlyStopping instance.
        Arguments:
            patience : Number of epochs with no improvement after
                       which training will be stopped. (Default = 5)
            mode : One of {"min", "max"}. In min mode, training will
                   stop when the quantity monitored has stopped
                   decreasing.  In "max" mode it will stop when the
                   quantity monitored has stopped increasing.
            min_delta : Minimum change in the monitored quantity to
                        qualify as an improvement.
        """
        self.patience = patience
        self.mode = mode
        self.min_delta = min_delta
        self.stopping_counter = 0
        self.early_stop = False
        self.best_score = mode.value

    def improvement(self, curr_epoch_score: Union[float, int],
                    curr_best_score: Union[float, int]):
        # bool_flag = False, consider the reset bool_flag = True trick
        if self.mode == Mode.MIN:
            return curr_epoch_score <= (curr_best_score - self.min_delta)

        return curr_epoch_score >= (curr_best_score + self.min_delta)

    @property
    def monitor_op(self):
        return self.mode.value

    # Review Comments:
    #
    # I don't think using __call__ makes sense here. In general, you
    # should use double-underscore methods like __call__ only if you
    # really need to.  In this case, I think this would be better
    # implemented as a should_stop method that returns the updated
    # value of self.early_stop. There is no need for the class
    # instance itself to be callable as a function, which is what your
    # use of __call__ does.
    def should_stop(self, curr_epoch_score):
        """
        The actual algorithm of early stopping.
        Arguments:
            epoch_score : The value of metric or loss which you montoring for that epoch.
            mode : The model which is being trained.
            model_path : The path to save the model.
            
            rmb false or true --> true, one is true is enough in boolean logic in or clause.
        """
        # may not need if self.best_score is None or etc

        if self.improvement(curr_epoch_score=curr_epoch_score,
                            curr_best_score=self.best_score):

            # update self.best_score
            self.best_score = curr_epoch_score
            # self.checkpoint_model(model=model, model_path=model_path)

        else:
            self.stopping_counter += 1
            print("Early Stopping Counter {} out of {}".format(
                self.stopping_counter, self.patience))

        if self.stopping_counter >= self.patience:

            print("Early Stopping and since it is early stopping, we will not "
                  "save the model since the metric has not improved for {} "
                  "epochs".format(self.patience))
            # set flag to true, and in Trainer class, one this is
            # true, stop training.LOL
            self.early_stop = True

        return self.best_score, self.early_stop

# Trainer

In [None]:
from torch.cuda.amp import autocast, GradScaler

# IMPORTANT

Note a very important here is we use Sigmoid to calculate, and this is very different from softmax, where softmax will add up to 1, but applying sigmoid on 11 classes will not give u 1. So becareful, because ur multiclass roc function will screw up if u pass in softmax preds instead of the sigmoid preds.

In [None]:
class Trainer:

    """A class to perform model training."""

    def __init__(self, model, config, early_stopping=None):
        """Construct a Trainer instance."""
        self.model = model
        
        self.config = config
        self.early_stopping = early_stopping
        self.epoch = 0
        self.best_auc = 0
        self.best_acc = 0
        self.best_loss = np.inf
        self.save_path = config.paths['save_path']
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)

        self.criterion = getattr(torch.nn, config.criterion_val)(**config.criterion_params[config.criterion_val])
        self.criterion_val = getattr(torch.nn, config.criterion_val)(**config.criterion_params[config.criterion_val])
        self.optimizer = getattr(torch.optim, config.optimizer)(self.model.parameters(), **config.optimizer_params[config.optimizer])
        self.scheduler = getattr(torch.optim.lr_scheduler, config.scheduler)(optimizer=self.optimizer, **config.scheduler_params[config.scheduler])


        self.val_predictions = None
        self.monitored_metrics = None
        self.date = datetime.datetime.now(pytz.timezone("Asia/Singapore")).strftime("%Y-%m-%d")
  

        self.log("Trainer prepared. We are using {} device.".format(
            self.config.device))

    def fit(self, train_loader, val_loader, fold: int):
        """Fit the model on the given fold."""
        self.log("Training on Fold {} and using {}".format(fold, config.effnet))

        for _epoch in range(self.config.n_epochs):
            # Getting the learning rate after each epoch!
            lr = self.optimizer.param_groups[0]["lr"]
            timestamp = datetime.datetime.now(pytz.timezone("Asia/Singapore")).strftime("%Y-%m-%d %H-%M-%S")
            # printing the lr and the timestamp after each epoch.
            self.log("\n{}\nLR: {}".format(timestamp, lr))

            # start time of training on the training set
            train_start_time = time.time()

            # train one epoch on the training set
            avg_train_loss = self.train_one_epoch(
                train_loader)
            # end time of training on the training set
            train_end_time = time.time()

            # formatting time to make it nicer
            train_elapsed_time = time.strftime(
                "%H:%M:%S", time.gmtime(train_end_time - train_start_time))
            self.log(
                "[RESULT]: Train. Epoch {} | Avg Train Summary Loss: {:.6f} | "
                "Train Accuracy: {:6f} | Time Elapsed: {}".format(
                    self.epoch + 1, avg_train_loss, avg_train_loss,
                    train_elapsed_time))

            val_start_time = time.time()
            # note here has val predictions... in actual fact it is
            # repeated because its same as avg_val_acc_score
            avg_val_loss, val_predictions, avg_roc_score, multi_class_roc_auc_score, score, scores= \
                self.valid_one_epoch(val_loader)
            # not sure if it is good practice to write it here
            self.val_predictions = val_predictions
            val_end_time = time.time()
            val_elapsed_time = time.strftime(
                "%H:%M:%S", time.gmtime(val_end_time - val_start_time))

            self.log("[RESULT]: Validation. Epoch: {} | "
                     "Avg Validation Summary Loss: {:.6f} | "
                     "Validation ROC Kaggle method: {:.6f} | Validation ROC: {:.6f} | Multiclass ROC: {} {} | Time Elapsed: {}".format(
                         self.epoch + 1, avg_val_loss, score,
                         avg_roc_score,multi_class_roc_auc_score,scores,
                         val_elapsed_time))

            # added this flag right before early stopping to let user
            # know which metric im monitoring.
            self.monitored_metrics = avg_roc_score

            if self.early_stopping is not None:

                best_score, early_stop = self.early_stopping.should_stop(
                    curr_epoch_score=self.monitored_metrics)
                self.best_loss = best_score
                self.save("{}_best_loss_fold_{}.pt".format(
                    self.config.effnet, fold))
                if early_stop:
                    break

            else:
                # note here we use avg_val_loss, not train_val_loss! It is
                # just right to use val_loss as benchmark
                if avg_val_loss < self.best_loss:
                    self.best_loss = avg_val_loss

#             if self.best_acc < avg_val_acc_score:
#                 self.best_acc = avg_val_acc_score

                
            if avg_roc_score > self.best_auc:
                self.best_auc = avg_roc_score
                self.save(os.path.join(self.save_path, "{}_{}_best_roc_fold_{}.pt".format(self.date,
                        self.config.effnet, fold)))



            if self.config.val_step_scheduler:
                if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                    self.scheduler.step(self.monitored_metrics)
                else:
                    self.scheduler.step()

            # end of training, epoch + 1 so that self.epoch can be updated.
            self.epoch += 1

        # this is where we end the epoch training for the current
        # fold/model, therefore we can call the final "best weight
        # saved" by this exact name that we saved earlier on.
        curr_fold_best_checkpoint = self.load(
            os.path.join(
                self.save_path, "{}_{}_best_roc_fold_{}.pt".format(self.date, self.config.effnet, fold)
            )
        )
        # return the checkpoint for further usage.
        return curr_fold_best_checkpoint

    def train_one_epoch(self, train_loader):
        """Train one epoch of the model."""
        # set to train mode
        self.model.train()

        # log metrics
        summary_loss = AverageLossMeter()
        accuracy_scores = AccuracyMeter()

        # timer
        start_time = time.time()

        # Scaler https://pytorch.org/docs/stable/notes/amp_examples.html
        scaler = torch.cuda.amp.GradScaler()
        # looping through train loader for one epoch, steps is the
        # number of times to go through each epoch
        for step, (_image_ids, images, labels) in enumerate(train_loader):

            with torch.cuda.amp.autocast():
            
                images = images.to(self.config.device).float()
                labels = labels.to(self.config.device)
                batch_size = labels.shape[0]
                logits = self.model(images)
                loss = self.criterion(input=logits, target=labels)

            summary_loss.update(loss.item(), batch_size)

            if config.gradient_accumulation_steps > 1:
                loss = loss / config.gradient_accumulation_steps

            
            scaler.scale(loss).backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), config.max_grad_norm)
            if (step + 1) % config.gradient_accumulation_steps == 0:
                scaler.step(self.optimizer)
                scaler.update()
                self.optimizer.zero_grad()
                
            # self.optimizer.zero_grad()
            # loss.backward()

            # self.optimizer.step()


            y_true = labels.cpu().numpy()
            softmax_preds = torch.nn.Softmax(dim=1)(
                input=logits).to("cpu").detach().numpy()
            y_preds = np.argmax(a=softmax_preds, axis=1)

            #accuracy_scores.update(y_true, y_preds, batch_size=batch_size)

            # not too sure yet KIV
            if self.config.train_step_scheduler:
                self.scheduler.step()

            # measure elapsed time
            end_time = time.time()

            if self.config.verbose:
                if (step % self.config.verbose_step) == 0:
                    print(
                        f"Train Steps {step}/{len(train_loader)}, "
                        f"summary_loss: {summary_loss.avg:.3f}, "
                        f"acc: {summary_loss.avg:.3f} "
                        f"time: {(end_time - start_time):.3f}",
                        end="\r",
                    )

        return summary_loss.avg

    def valid_one_epoch(self, val_loader):
        """Validate one training epoch."""
        # set to eval mode
        self.model.eval()

        # log metrics
        summary_loss = AverageLossMeter()
        accuracy_scores = AccuracyMeter()

        # timer
        start_time = time.time()
        naka=[]
        val_gt_label_list, val_preds_softmax_list, val_preds_roc_list, val_preds_argmax_list = [], [], [], []

        # Looping through val loader for one epoch, steps is the
        # number of times to go through each epoch; with
        # torch.no_grad(): off gradients for torch when validating
        # because we do not need to store gradients for each logits.
        with torch.no_grad():
            for step, (_image_ids, images, labels) in enumerate(val_loader):


                images = images.to(self.config.device).float()

                labels = labels.to(self.config.device)
                batch_size = labels.shape[0]

                logits = self.model(images)
                loss = self.criterion_val(input=logits, target=labels)
                summary_loss.update(loss.item(), batch_size)


                y_true = labels.cpu().numpy()
                softmax_preds = torch.nn.Softmax(dim=1)(input=logits).to("cpu").numpy()
                positive_class_preds = softmax_preds[:,1]
                y_preds = np.argmax(a=softmax_preds, axis=1)
                #accuracy_scores.update(y_true, y_preds, batch_size=batch_size)

                # here is the same result u get from using sigmoid instead of softmax
                val_preds_roc_list.append(positive_class_preds)
                
                val_gt_label_list.append(y_true)
                val_preds_softmax_list.append(softmax_preds)
                val_preds_argmax_list.append(y_preds)
                
                naka.append(logits.sigmoid().to('cpu').numpy())
                               
                
                end_time = time.time()

                if config.verbose:
                    if (step % config.verbose_step) == 0:
                        print(
                            f"Validation Steps {step}/{len(val_loader)}, " +
                            f"summary_loss: {summary_loss.avg:.3f}, val_acc: {summary_loss.avg:.6f} "
                            + f"time: {(end_time - start_time):.3f}",
                            end="\r",
                        )
            
            val_gt_label_array  = np.concatenate(val_gt_label_list, axis=0)
            val_preds_softmax_array = np.concatenate(val_preds_softmax_list, axis=0)
            val_preds_argmax_array = np.concatenate(val_preds_argmax_list,axis=0)
            val_preds_roc_array = np.concatenate(val_preds_roc_list, axis=0)
            naka_array = np.concatenate(naka, axis = 0)
            multi_class_roc_auc_score, avg_roc_score = multiclass_roc(y_true=val_gt_label_array,
                                                                      y_preds_softmax_array=naka_array,
                                                                      config=self.config)
            #print(naka_array)
            #print(val_preds_softmax_array)
            score, scores = get_score(val_gt_label_array, naka_array)
            
            if self.config.num_classes > 2:                
                val_roc_auc_score =  sklearn.metrics.roc_auc_score(y_true=val_gt_label_array,
                                                                   y_score=val_preds_softmax_array,
                                                                   multi_class='ovr')
            else:
                val_roc_auc_score =  sklearn.metrics.roc_auc_score(y_true=val_gt_label_array,
                                                                   y_score=val_preds_roc_array)


        return summary_loss.avg, val_preds_softmax_array, avg_roc_score, multi_class_roc_auc_score, score, scores

    def save_model(self, path):
        """Save the trained model."""
        self.model.eval()
        torch.save(self.model.state_dict(), path)

    # will save the weight for the best val loss and corresponding oof preds
    def save(self, path):
        """Save the weight for the best evaluation loss."""
        self.model.eval()
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "scheduler_state_dict": self.scheduler.state_dict(),
                "best_acc": self.best_acc,
                "best_auc": self.best_auc,
                "best_loss": self.best_loss,
                "epoch": self.epoch,
                "oof_preds": self.val_predictions,
            },
            path,
        )

    def load(self, path):
        """Load a model checkpoint from the given path."""
        checkpoint = torch.load(path)
        return checkpoint


    def log(self, message):
        """Log a message."""
        if self.config.verbose:
            print(message)
        with open(self.config.paths['log_path'], "a+") as logger:
            logger.write(f"{message}\n")

In [None]:
def get_score(y_true, y_pred):
    scores = []
    for i in range(y_true.shape[1]):
        #print(y_true.shape)
        score = sklearn.metrics.roc_auc_score(y_true[:,i], y_pred[:,i])
        scores.append(score)
    avg_score = np.mean(scores)
    return avg_score, scores

In [None]:
def train_on_fold(df_folds: pd.DataFrame, config, fold: int):
    """Train the model on the given fold."""
    model = CustomEfficientNet(config=config, pretrained=True)
    model.to(config.device)

    transforms_train, transforms_val = get_albu_transforms(config)

    train_df = df_folds[df_folds["fold"] != fold].reset_index(drop=True)
    val_df = df_folds[df_folds["fold"] == fold].reset_index(drop=True)

    train_set = RANZCR(train_df, config, transforms=AlbumentationsAugmentation(transforms=transforms_train),transform_norm=True, meta_features=None)
    train_loader = DataLoader(train_set,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=4,
                              worker_init_fn=seed_worker, pin_memory=True)

    val_set = RANZCR(val_df, config, transforms=AlbumentationsAugmentation(transforms=transforms_val), transform_norm=True, meta_features=None)
    val_loader = DataLoader(val_set,batch_size=32,shuffle=False,num_workers=4,worker_init_fn=seed_worker,pin_memory=True)


    hns_detector = Trainer(model=model, config=config)

    curr_fold_best_checkpoint = hns_detector.fit(train_loader, val_loader,
                                                      fold)

    # loading checkpoint for all 10 epochs for this current fold

    val_df[[str(c) for c in range(config.num_classes)]] = curr_fold_best_checkpoint["oof_preds"]
    val_df["preds"] = curr_fold_best_checkpoint["oof_preds"].argmax(1)

    return val_df


def multiclass_roc(y_true,y_preds_softmax_array,config):
    label_dict = dict()   
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    roc_scores = []
    for label_num in range(len(config.class_list)):
        naka_score = sklearn.metrics.roc_auc_score(y_true[:,label_num], y_preds_softmax_array[:,label_num])
        y_true_multiclass_array = sklearn.preprocessing.label_binarize(y_true, classes=config.class_list)
        y_true_for_curr_class = y_true_multiclass_array[:,label_num]
        y_preds_for_curr_class = y_preds_softmax_array[:, label_num]

        fpr[label_num],tpr[label_num],_ = sklearn.metrics.roc_curve(y_true=y_true_for_curr_class,
                                                       y_score=y_preds_for_curr_class,
                                                       pos_label=1)
        roc_auc[label_num] = sklearn.metrics.auc(fpr[label_num], tpr[label_num])
        roc_scores.append(roc_auc[label_num])
        if config.num_classes == 2:
            roc_auc[config.class_list[1]] = 1 - roc_auc[label_num]
            break
    avg_roc_score = np.mean(roc_scores)    
    return roc_auc, avg_roc_score
    
def get_acc_score(config, result_df):
    """Get the accuracy of model predictions."""
    preds = result_df["preds"].values
    labels = result_df[config.class_col_name].values
    score = sklearn.metrics.accuracy_score(y_true=labels, y_pred=preds)
    return score

def get_roc_score(config, result_df):
    max_label = str(np.max(result_df[config.class_col_name].values))
    preds = result_df[max_label].values
    labels = result_df[config.class_col_name].values
    score = sklearn.metrics.roc_auc_score(y_true=labels, y_score=preds, multi_class='ovo')
    return score

def train_loop(df_folds: pd.DataFrame,config,fold_num: int = None,train_one_fold=False):
    """Perform the training loop on all folds."""
    # here The CV score is the average of the validation fold metric.
    cv_score_list = []
    oof_df = pd.DataFrame()
    if train_one_fold:
        _oof_df = train_on_fold(df_folds=df_folds,config=config,fold=fold_num)
        oof_df = pd.concat([oof_df, _oof_df])
#         curr_fold_best_score = get_roc_score(config, _oof_df)
#         print("Fold {} OOF Score is {}".format(fold_num,
#                                                curr_fold_best_score))
    else:
        # the below for loop guarantees it starts from 1 for fold.
        # https://stackoverflow.com/questions/33282444/pythonic-way-to-iterate-through-a-range-starting-at-1
        for fold in (number+1 for number in range(config.num_folds)):
            _oof_df = train_on_fold(df_folds=df_folds,config=config, fold=fold)
            oof_df = pd.concat([oof_df, _oof_df])
            #curr_fold_best_score = get_roc_score(config, _oof_df)
            #cv_score_list.append(curr_fold_best_score)
            #print("\n\n\nOOF Score for Fold {}: {}\n\n\n".format(fold, curr_fold_best_score))

        #print("CV score", np.mean(cv_score_list))
        #print("Variance", np.var(cv_score_list))
        #print("Five Folds OOF", get_roc_score(config, oof_df))
        oof_df.to_csv("oof.csv")
        #oof_df.to_csv(os.path.join(config.paths['save_path'], 'oof.csv'))


In [None]:
# train_five_folds = train_loop(df_folds=df_folds, config=config)

train_one_fold = train_loop(df_folds=df_folds, config=config, fold_num=1,train_one_fold=True)