In [None]:
!pip install albumentations
!pip install ../input/timm031/timm-0.3.1-py3-none-any.whl


In [None]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
import pytorch_lightning as pl

from sklearn.model_selection import GroupKFold, StratifiedKFold
from tqdm.auto import tqdm
import timm
from sklearn.metrics import accuracy_score

import albumentations as A
from albumentations import Compose
from albumentations.pytorch import ToTensorV2

from PIL import Image, ImageOps, ImageEnhance, ImageChops
import pandas as pd
import numpy as np
import random


from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import loggers as pl_loggers

    
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
Training = False

In [None]:
# ====================================================
# Configurations
# ====================================================
# ====================================================
# Configurations
# ====================================================
class CONFIGURATION:
    def __init__(self): 
        
        self.DEBUG = True

        #Model Params
        self.N_TTA = 8
        self.N_FOLDS = 5
        self.MODEL_NAME = 'tf_efficientnet_b4_ns' # Recommended : ['deit_base_patch16_384','vit_large_patch16_384','tf_efficientnet_b4_ns','resnext50_32x4d']
        self.pretrained = True
        self.N_CLASSES = 5
        self.TRAIN_FOLDS = [0,1,2,3,4]
        #self.TRAIN_FOLDS = [1] #Folds to be Trained

        self.scheduler_name = 'GradualWarmupSchedulerV2' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts', 'OneCycleLR', 'GradualWarmupSchedulerV2','LambdaLR']
        self.scheduler_update = 'batch' #['batch','epoch']
        self.criterion_name = 'TaylorSmoothedLoss'        # ['CrossEntropyLoss', 'LabelSmoothingLoss', 'FocalLoss','FocalCosineLoss', 'SymmetricCrossEntropyLoss', 'BiTemperedLoss', 'TaylorCrossEntropyLoss', 'TaylorSmoothedLoss']
        self.optimizer_name = 'AdamW' #['Adam','AdamW','AdamP','Ranger'] -> AdamP doesn't work on TPUs
        self.LR_RAMPUP_EPOCHS = 1
        self.LR_SUSTAIN_EPOCHS = 0

        self.FREEZE = True #If you fine tune after START_FREEZE epochs
        self.START_FREEZE = 12

        #Image Size
        self.HEIGHT = 512 #If VIT or deit is chosen as model: need 384 x 384
        self.WIDTH = 512
        self.CHANNELS = 3
        
        
        #Training Params
        self.BATCH_SIZE = 16 # PER REPLICA FOR TPUS    #RECOMMENDED : effnet = 16 ; resnext = 8 ; vit = 4 ; deit = 4
        self.EPOCHS = 25 # more is definitely plausible and recommended around 10
        self.LR = 2e-5
        self.LR_START =1e-5
        self.LR_MIN = 5e-6
        self.weight_decay = 0
        self.eps = 1e-8
        self.PATIENCE = 3

        #BiTemperedLoss
        self.T1 = 0.2
        self.T2 = 1.1
        self.LABEL_SMOOTH = 0.2

        #CosineAnnealingWarmRestarts
        self.T_0 = self.EPOCHS

        #CosineAnnealingLR
        self.T_max = self.EPOCHS

        self.NUM_WORKERS = 4

        
        self.IMG_MEAN = [0.485, 0.456, 0.406] #Mean for normalization Transform cassava = [0.4303, 0.4967, 0.3134] imgnet = [0.485, 0.456, 0.406]
        self.IMG_STD = [0.229, 0.224, 0.225] #STD for normalization Transform cassava = [0.2142, 0.2191, 0.1954] imgnet = [0.229, 0.224, 0.225]

        self.USE_2019 = False #Use 2019 images?


        self.SEED = 42
        
        Aug_Norm = A.Normalize(mean=self.IMG_MEAN, std=self.IMG_STD, max_pixel_value=255.0, p=1.0)
        Drop_Rand = A.CoarseDropout(max_holes=12, max_height=int(0.11*self.HEIGHT), max_width=int(0.11*self.WIDTH),
                                    min_holes=1, min_height=int(0.03*self.HEIGHT), min_width=int(0.03*self.WIDTH),
                                    always_apply=False, p=0.5)
        Rand_Crop = A.RandomCrop(height= self.HEIGHT, width = self.WIDTH,always_apply=True, p=1.0)
        Resize_Crop = A.RandomResizedCrop(self.HEIGHT, self.WIDTH,p=1.0)
        self.train_transforms = Compose([
                    A.Transpose(p=0.5),
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.5),
                    A.HueSaturationValue(
                        hue_shift_limit=0.2, 
                        sat_shift_limit=0.2, 
                        val_shift_limit=0.2, 
                        p=0.5
                    ),
                    A.RandomBrightnessContrast(
                            brightness_limit=(-0.1,0.1), 
                            contrast_limit=(-0.1, 0.1), 
                            p=0.5
                        ),
                    Resize_Crop,
                    Drop_Rand,           
                    Aug_Norm,   
                    ToTensorV2(p=1.0),
                ], p=1.)

        self.light_transforms = Compose([
                    A.Transpose(p=0.5),
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.5),
                    A.HueSaturationValue(
                                hue_shift_limit=0.2, 
                                sat_shift_limit=0.2, 
                                val_shift_limit=0.2, 
                                p=0.5),
                    A.RandomBrightnessContrast(
                                    brightness_limit=(-0.1,0.1), 
                                    contrast_limit=(-0.1, 0.1), 
                                    p=0.5),
                    Resize_Crop,
                    Aug_Norm,   
                    ToTensorV2(p=1.0),
                ], p=1.)

        self.heavy_transforms = Compose([
            A.HorizontalFlip(p=0.5),

            A.Resize(self.HEIGHT, self.WIDTH),

            A.Transpose(p=0.5),
            A.VerticalFlip(p=0.5),
            #A.augmentations.transforms.ColorJitter(brightness=0.10, contrast=0.2, saturation=0.2, hue=0.00, always_apply=False, p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            A.augmentations.transforms.RGBShift (r_shift_limit=20, g_shift_limit=20, b_shift_limit=20, always_apply=False, p=0.5),
            A.augmentations.transforms.ChannelDropout (channel_drop_range=(1, 1), fill_value=0, always_apply=False, p=0.5),

            A.augmentations.transforms.GridDistortion (num_steps=5, distort_limit=0.3, interpolation=1, border_mode=4, value=None, mask_value=None, always_apply=False, p=0.5),
            A.CoarseDropout(p=0.5),
            A.Cutout(p=0.5),
            Aug_Norm,
            ToTensorV2(p=1.0),])

        self.valid_transforms = Compose([
                    A.CenterCrop(self.HEIGHT, self.WIDTH),
                    Aug_Norm,   
                    ToTensorV2(p=1.0),
                ], p=1.)

        self.test_aug = Compose([
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.ShiftScaleRotate(p = 1.0),
                    #A.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.2, hue=0.00, always_apply=False, p=1.0),
                    Rand_Crop,
                    Aug_Norm,
                    ToTensorV2(p=1.0)
                ], p=1.)

#         image_net_post = Compose([
#                     Resize_Crop,
#                     Drop_Rand,
#                     Aug_Norm,    
#                     ToTensorV2(p=1.0)
#                 ], p=1.)

        self.TRAIN_AUG_TYPE = self.light_transforms
        self.VALID_AUG_TYPE = self.valid_transforms

CFG = CONFIGURATION()

In [None]:
!ls ../input/effnet-resnet-weights


In [None]:
# ====================================================
# CV Split
# ====================================================
DATA_PATH = '../input/cassava-leaf-disease-classification/'
TRAIN_DIR = DATA_PATH + 'train_images/'

DATA_PATH_2019 = './'
TRAIN_DIR_2019 = DATA_PATH_2019 + 'train/'
TEST_DIR = DATA_PATH + 'test_images/'


In [None]:
if Training:
    
    #This guarantees that no images from 2019 contaminate the validation split
    if CFG.USE_2019:
        train_df_merged = pd.read_csv(DATA_PATH_2019 + 'merged.csv')
        train_df = train_df_merged.loc[train_df_merged.source == 2020]
        if CFG.DEBUG:
            train_df = train_df.sample(500).reset_index(drop=True)
        train_df_2019 = train_df_merged.loc[train_df_merged.source == 2019]
        skf = StratifiedKFold(n_splits=CFG.N_FOLDS, shuffle=True, random_state=CFG.SEED)
        skf.get_n_splits(np.arange(train_df.shape[0]), train_df['label'])
        folds = [(idxT,idxV) for i,(idxT,idxV) in enumerate(skf.split(np.arange(train_df.shape[0]), train_df['label']))]
        if not CFG.DEBUG:
            folds_2019 = [np.concatenate((idxT,idxV)) for i,(idxT,idxV) in enumerate(skf.split(np.arange(train_df_2019.shape[0]), train_df_2019['label']))]
            for i in range(CFG.N_FOLDS):
                (idxT,idxV) = folds[i]
                folds[i] = (np.concatenate((idxT,train_df_2019.iloc[folds_2019[i]].index)),idxV)
                (idxT,idxV) = folds[i]
                print(np.bincount(train_df_merged['label'].iloc[idxT]),np.bincount(train_df['label'].iloc[idxV]))
        DATA_FOLD = TRAIN_DIR_2019
        del train_df_2019
    else:
        print("not using 2019")
        train_df = pd.read_csv(DATA_PATH + 'train.csv')
        if CFG.DEBUG:
            train_df = train_df.sample(500).reset_index(drop=True)
        skf = StratifiedKFold(n_splits=CFG.N_FOLDS, shuffle=True, random_state=CFG.SEED)
        skf.get_n_splits(np.arange(train_df.shape[0]), train_df['label'])
        folds = [(idxT,idxV) for i,(idxT,idxV) in enumerate(skf.split(np.arange(train_df.shape[0]), train_df['label']))]
        for i in range(CFG.N_FOLDS):
            (idxT,idxV) = folds[i]
            print(np.bincount(train_df['label'].iloc[idxT]),np.bincount(train_df['label'].iloc[idxV]))

        train_df_merged = train_df
        DATA_FOLD = TRAIN_DIR

In [None]:
 def _startify_and_save(data):
        # New column to hold the fold number
        data.loc[:, "kfold"] = -1

        # Shuffle the dataframe
        data = data.sample(frac=1).reset_index(drop=True)        
        
        # 5 Folds
        skf = StratifiedKFold(n_splits=CFG.N_FOLDS, shuffle=False, random_state=CFG.SEED) 
                                                                                    
        for fold_, (trn_,val_) in enumerate(skf.split(np.arange(data.shape[0]), data['label'])): 
            # We are just filling the vaidation indices. 
            # All other data are for training (trn indices are not required)
            data.loc[val_, "kfold"] = fold_
    
        # We are saving the result to the disk so that other GPUs can pick it from there. 
        # Rather if we do "self.startified_data = train_targets_scored", 
        # other GPUs will not be able to read this 
        data.to_csv("train_folds.csv", index=False)   

In [None]:
if Training:
    _startify_and_save(train_df_merged)

In [None]:
class LabelSmoothingLoss(nn.Module): 
    def __init__(self, classes=5, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        if CFG.criterion_name == 'LabelSmoothingLoss':
            pred = pred.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
    
class TaylorSoftmax(nn.Module):
    '''
    This is the autograd version
    '''
    def __init__(self, dim=1, n=2):
        super(TaylorSoftmax, self).__init__()
        assert n % 2 == 0
        self.dim = dim
        self.n = n

    def forward(self, x):
        '''
        usage similar to nn.Softmax:
            >>> mod = TaylorSoftmax(dim=1, n=4)
            >>> inten = torch.randn(1, 32, 64, 64)
            >>> out = mod(inten)
        '''
        fn = torch.ones_like(x)
        denor = 1.
        for i in range(1, self.n+1):
            denor *= i
            fn = fn + x.pow(i) / denor
        out = fn / fn.sum(dim=self.dim, keepdims=True)
        return out
    
class TaylorCrossEntropyLoss(nn.Module):
    '''
    This is the autograd version
    '''
    def __init__(self, n=2, ignore_index=-1, reduction='mean'):
        super(TaylorCrossEntropyLoss, self).__init__()
        assert n % 2 == 0
        self.taylor_softmax = TaylorSoftmax(dim=1, n=n)
        self.reduction = reduction
        self.ignore_index = ignore_index

    def forward(self, logits, labels):
        '''
        usage similar to nn.CrossEntropyLoss:
            >>> crit = TaylorCrossEntropyLoss(n=4)
            >>> inten = torch.randn(1, 10, 64, 64)
            >>> label = torch.randint(0, 10, (1, 64, 64))
            >>> out = crit(inten, label)
        '''
        log_probs = self.taylor_softmax(logits).log()
        loss = F.nll_loss(log_probs, labels, reduction=self.reduction,
                ignore_index=self.ignore_index)
        return loss
    
class TaylorSmoothedLoss(nn.Module):

    def __init__(self, n=2, ignore_index=-1, reduction='mean', smoothing=0.2):
        super(TaylorSmoothedLoss, self).__init__()
        assert n % 2 == 0
        self.taylor_softmax = TaylorSoftmax(dim=1, n=n)
        self.reduction = reduction
        self.ignore_index = ignore_index
        self.lab_smooth = LabelSmoothingLoss(CFG.N_CLASSES, smoothing=CFG.LABEL_SMOOTH)

    def forward(self, logits, labels):

        log_probs = self.taylor_softmax(logits).log()
        #loss = F.nll_loss(log_probs, labels, reduction=self.reduction,
        #        ignore_index=self.ignore_index)
        loss = self.lab_smooth(log_probs, labels)
        return loss
    

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

#Choose Criterions for the Training Loop
def GetCriterion(criterion_name):
    if criterion_name == 'BiTemperedLoss':
        criterion = BiTemperedLogistic()
    elif criterion_name == 'SymmetricCrossEntropyLoss':
        criterion = SymmetricCrossEntropy()
    elif criterion_name == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss()
    elif criterion_name == 'LabelSmoothingLoss':
        criterion = LabelSmoothingLoss()
    elif criterion_name == 'FocalLoss':
        criterion = FocalLoss()
    elif criterion_name == 'FocalCosineLoss':
        criterion = FocalCosineLoss()
    elif criterion_name == 'TaylorCrossEntropyLoss':
        criterion = TaylorCrossEntropyLoss()
    elif criterion_name == 'TaylorSmoothedLoss':
        criterion = TaylorSmoothedLoss()
    elif criterion_name == 'CutMix':
        criterion = CutMixCriterion(GetCriterion(CFG.criterion_name))
    #print(criterion_name)
    return criterion
    
    
def GetScheduler(scheduler_name,optimizer):
    #['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts', 'OneCycleLR', 'GradualWarmupSchedulerV2']
#     if scheduler_name == 'OneCycleLR':
#         return torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr = 1e-2,epochs = CFG.EPOCHS,steps_per_epoch = batches+1,pct_start = 0.1)
#     elif scheduler_name == 'CosineAnnealingWarmRestarts':
#         return torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = CFG.T_0, T_mult=1, eta_min=CFG.LR_MIN, last_epoch=-1)
#     el
    if scheduler_name == 'CosineAnnealingLR':
        return torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = CFG.T_max, eta_min=0, last_epoch=-1)
    elif scheduler_name == 'ReduceLROnPlateau':
        return torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=0.1, patience=1, threshold=0.0001, cooldown=0, min_lr=CFG.LR_MIN, eps=CFG.eps)
    elif scheduler_name == 'GradualWarmupSchedulerV2':
        return GradualWarmupSchedulerV2(optimizer=optimizer)
    
def GetOptimizer(optimizer_name,parameters):
    #['Adam','Ranger']
    if optimizer_name == 'Adam':
        if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
            return torch.optim.Adam(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay, amsgrad=False)
        else:
            return torch.optim.Adam(parameters, lr=CFG.LR, weight_decay=CFG.weight_decay, amsgrad=False)
    elif optimizer_name == 'AdamW':
        if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
            return torch.optim.AdamW(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay, amsgrad=False)
        else:
            return torch.optim.Adam(parameters, lr=CFG.LR, weight_decay=CFG.weight_decay, amsgrad=False)
    elif optimizer_name == 'AdamP':
        if CFG.scheduler_name == 'GradualWarmupSchedulerV2':
            return AdamP(parameters, lr=CFG.LR_START, weight_decay=CFG.weight_decay)
        else:
            return AdamP(parameters, lr=CFG.LR, weight_decay=CFG.weight_decay)
    elif optimizer_name == 'Ranger':
        return Ranger(parameters,lr = CFG.LR,alpha = 0.5, k = 6,N_sma_threshhold = 5,betas = (0.95,0.999),eps=CFG.eps,weight_decay=CFG.weight_decay)
    

SEED = CFG.SEED
seed_everything(SEED) 

In [None]:
# ====================================================
# Datasets
# ====================================================
class CasavaDataset(Dataset):
    def __init__(self, directory, FNames, labels,transform):
        self.dir = directory
        self.fnames = FNames
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.fnames)
    
 

    def __getitem__(self, index):
#         print(self.fnames[index])
        img = Image.open(os.path.join(self.dir, self.fnames[index]))
        img =  np.asarray(img,dtype = np.float32)
        if self.transform is not None:
            img = self.transform(image = img)['image']
        return img, self.labels[index], self.fnames[index]
        

In [None]:
class CassavaModel(pl.LightningModule):

    def __init__(self,  model_name=CFG.MODEL_NAME, pretrained=CFG.pretrained, fold=None):
        super().__init__()
        self.accuracy = pl.metrics.Accuracy()
        self.val_accuracy = pl.metrics.Accuracy()
        self.model_name = model_name
        self.fold = fold
        if model_name == 'deit_base_patch16_224' or model_name == 'deit_base_patch16_384':
            self.model = torch.hub.load('facebookresearch/deit:main', model_name, pretrained=pretrained)
        else:
            self.model = timm.create_model(model_name, pretrained=pretrained)
        if 'efficientnet' in model_name:
            self.n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(self.n_features, CFG.N_CLASSES)
        elif model_name == 'vit_large_patch16_384' or model_name == 'deit_base_patch16_224' or model_name == 'deit_base_patch16_384':
            self.n_features = self.model.head.in_features
            self.model.head = nn.Linear(self.n_features, CFG.N_CLASSES)
        elif 'resnext' in model_name:
            self.n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(self.n_features, CFG.N_CLASSES)
        self.criterion = GetCriterion(CFG.criterion_name)
        self.val_criterion = GetCriterion(CFG.criterion_name)
    
    def forward(self, x):
        return self.model(x)
    
    
    def freeze(self):
        # To freeze the residual layers
        for param in self.model.parameters():
            param.requires_grad = False
            
        if 'efficientnet' in self.model_name:
            for param in self.model.classifier.parameters():
                param.requires_grad = True
        elif self.model_name == 'vit_large_patch16_384' or 'deit_base_patch16_224':
            for param in self.model.head.parameters():
                param.requires_grad = True
        elif 'resnext' in self.model_name:
            for param in self.model.fc.parameters():
                param.requires_grad = True
            
    def unfreeze(self):
        # Unfreeze all layers
        for param in self.model.parameters():
            param.requires_grad = True
        
    def prepare_data(self):
        # Even in multi-GPU training. this method is called only from a single GPU. 
        # So this method ideal for download, stratification etc.     
        pass
        
    
    def setup(self, stage=None):
        # In multi-GPU training, this method is run on each GPU. 
        # So ideal for each training/valid split
        data = pd.read_csv("train_folds.csv")
        
        training_data = data[data.kfold != self.fold]
        training_data = training_data.drop(['kfold'], axis=1)
        validation_data = data[data.kfold == self.fold]
        validation_data = validation_data.drop(['kfold'], axis=1)
        self.train_dataset = CasavaDataset(DATA_FOLD, training_data['image_id'].values, training_data['label'].values, self.train_transforms)
        
        self.valid_dataset =  CasavaDataset(DATA_FOLD, validation_data['image_id'].values, validation_data['label'].values, self.train_transforms)
    
    def training_step(self, batch, batch_index):
        images, labels = batch
        if self.trainer.current_epoch == CFG.START_FREEZE:
            self.freeze()
            print('freeze the model')
        # forward pass on a batch
        logits = self(images)
        
        # identifying number of correct predections in a given batch
        correct=logits.argmax(dim=1).eq(labels).sum().item()
        
        # identifying total number of labels in a given batch
        total=len(labels)

        # calculating the loss
        train_loss = self.criterion(logits, labels)
        
        
        # logs- a dictionary
        logs={"train_loss": train_loss}

        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": train_loss,
             
            #optional for batch logging purposes
#             "log": logs,
            # info to be used at epoch end
            "correct": correct,
            "total": total,
            "progress_bar": logs
        }

        return batch_dictionary
    
    def validation_step(self, batch, batch_index):
        images, labels = batch
        logits = self(images)
        val_loss = self.val_criterion(logits, labels)

         # identifying number of correct predections in a given batch
        correct=logits.argmax(dim=1).eq(labels).sum().item()
        
        # identifying total number of labels in a given batch
        total=len(labels)
          # logs- a dictionary
        logs={"val_loss": val_loss}

        batch_dictionary={
            #REQUIRED: It ie required for us to return "loss"
            "loss": val_loss,   
            #optional for batch logging purposes
#             "log": logs,
            # info to be used at epoch end
            "correct": correct,
            "total": total,
            "progress_bar": logs
        }

        return batch_dictionary
    

    def configure_optimizers(self):
        optimizer = GetOptimizer(CFG.optimizer_name, self.model.parameters())
        return optimizer
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, CFG.BATCH_SIZE, num_workers=CFG.NUM_WORKERS, shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.valid_dataset, CFG.BATCH_SIZE, num_workers=CFG.NUM_WORKERS, shuffle=False) 
    
    def training_epoch_end(self, outputs):
        #  the function is called after every epoch is completed
        
        # calculating average loss 
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        
        # calculating correect and total predictions
        correct=sum([x["correct"] for  x in outputs])
        total=sum([x["total"] for  x in outputs])

        # creating log dictionary
         # logging using tensorboard logger
        self.logger.experiment.add_scalar("Loss/Train",
                                            avg_loss,
                                            self.current_epoch)
         
        self.logger.experiment.add_scalar("Accuracy/Train",
                                            correct/total,
                                            self.current_epoch)


        epoch_dictionary={
            # required
            'loss': avg_loss
        }
        
        return epoch_dictionary

    
    def validation_epoch_end(self, outputs):
         # calculating average loss 
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        
        # calculating correect and total predictions
        correct=sum([x["correct"] for  x in outputs])
        total=sum([x["total"] for  x in outputs])

        # creating log dictionary
         # logging using tensorboard logger
        self.logger.experiment.add_scalar("Loss/Val",
                                            avg_loss,
                                            self.current_epoch)
         
        self.logger.experiment.add_scalar("Accuracy/Val",
                                            correct/total,
                                            self.current_epoch)


        epoch_dictionary={
            # required
            'loss': avg_loss
        }
        
        return epoch_dictionary

In [None]:
# Five fold training. 
if Training:
    
    for fold in range(5):  

        checkpoint_callback = ModelCheckpoint(
            filepath='./ng_models_effnet/'+str(fold)+'/model_fold-{epoch:02d}-loss-{val_loss}', 
            monitor='val_loss', verbose=True, 
            save_last=False, save_top_k=1, save_weights_only=False, 
            mode='min', period=1, prefix='')

        tb_logger = pl_loggers.TensorBoardLogger('ng_logs_effnet/'+str(fold)+'/')

        trainer = pl.Trainer(logger=tb_logger, gpus=1, max_epochs=15, checkpoint_callback=checkpoint_callback) #, accelerator ='ddp')
    #     dm = MoADataModule(fold=k)
        model = CassavaModel(fold=fold) # Input Features, Output Targets
        trainer.fit(model)

        print(checkpoint_callback.best_model_path)

In [None]:
MODEL_PATH = '../input/effnet-resnet-weights/'

In [None]:
## Inference
if not Training:
    # ====================================================
    # Model Loading
    # ====================================================
    models = []
    count = 0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    MODEL_PATHS = ['tf_efficientnet_b4_ns_fold_0.ckpt','tf_efficientnet_b4_ns_fold_1.ckpt','tf_efficientnet_b4_ns_fold_2.ckpt','tf_efficientnet_b4_ns_fold_3.ckpt','tf_efficientnet_b4_ns_fold_4.ckpt']
    for model_fpath in MODEL_PATHS: #os.listdir(MODEL_PATH):
#         if count in CFG.TRAIN_FOLDS:
        print("Model Loaded:",model_fpath)
        model_name_split = model_fpath.split('_f')[0]
        model = CassavaModel(model_name=model_name_split,  pretrained=False) 
#         model = CassavaNet(model_name_split,pretrained = False)
        info = torch.load(MODEL_PATH + model_fpath,map_location = torch.device(device))
        print(info.keys())
#             model.load_from_checkpoint(checkpoint_path=MODEL_PATH + model_fpath)
        model.load_state_dict(info['state_dict'])
        models.append(model)
        count+=1

    
#     submission = pd.DataFrame()
#     list_files = os.listdir(TRAIN_DIR) #TEST_DIR)
#     submission['image_id'] = pd.Series(list_files)
#     submission['label'] =0

    test_df =  pd.read_csv(DATA_PATH + 'sample_submission.csv')
   
   

    print(test_df.shape)

In [None]:
# submission = submission.sample(50).reset_index(drop=True)

In [None]:
if not Training:
    # ====================================================
    # TTA
    # ====================================================
    BATCH_SIZE = 1
    test_set = CasavaDataset(TEST_DIR, test_df['image_id'].values, test_df['label'].values, None)
    image_ids = []
    predicts = []
    # test_set = GetData(TEST_DIR,submission['image_id'], submission['label'], Type = 'test')
    test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=1,pin_memory = True)
    with torch.no_grad():
        for i, (images,labels, image_id) in enumerate(tqdm(test_loader)):
            voting = np.zeros((len(models),CFG.N_TTA,CFG.N_CLASSES))
            aug_images = np.zeros((CFG.N_TTA,CFG.CHANNELS,CFG.HEIGHT,CFG.WIDTH))
            for aug_no in range(CFG.N_TTA):
                img_np = images.numpy()

                aug_data = CFG.heavy_transforms(image = np.reshape(img_np,(600,800,CFG.CHANNELS)))
               
            aug_images[aug_no,:,:,:] = aug_data['image'].numpy()
            aug_images = torch.from_numpy(aug_images).to(torch.float32).to(device)
            for model_no in range(len(models)):
                model = models[model_no]
                model = model.to(device)
                model.eval()            

                logits = model(aug_images)
                voting[model_no,:,:] = F.softmax(logits).cpu().numpy()

            voting = np.sum(voting,axis = 1) / CFG.N_TTA
            voting = np.sum(voting,axis = 0) / len(models)
            label = np.argmax(voting)
            image_ids.extend(image_id)
            predicts.append(label)
    submission_df = pd.DataFrame(
                {'image_id': image_ids,
                 'label': predicts
                })
#             submission['predict'].loc[submission['image_id'] == labels[0]] = label


    submission_df.to_csv('submission.csv',index=False)


In [None]:
!cat submission.csv