Inference at : https://www.kaggle.com/krisho007/simple-pytorch-lightning-inference

In [None]:
!pip install timm

In [None]:
import os
import cv2
import pandas as pd
import numpy as np
import random
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.metrics.functional import accuracy
import torch
# from torchvision import models
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
import albumentations as albu
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
import timm

In [None]:
import random
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

In [None]:
TRAIN_CSV = "../input/cassava-leaf-disease-classification/train.csv"
TRAIN_IMAGE_FOLDER = '../input/cassava-leaf-disease-classification/train_images'
CLASSES = 5

### Symmetric Cross Entropy Loss
https://www.kaggle.com/c/cassava-leaf-disease-classification/discussion/208239

In [None]:
class SymmetricCrossEntropy(nn.Module):

    def __init__(self, alpha=0.1, beta=1.0, num_classes= 5):
        super(SymmetricCrossEntropy, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.num_classes = num_classes

    def forward(self, logits, targets, reduction='mean'):
        onehot_targets = torch.eye(self.num_classes)[targets].cuda()
        ce_loss = F.cross_entropy(logits, targets, reduction=reduction)
        rce_loss = (-onehot_targets*logits.softmax(1).clamp(1e-7, 1.0).log()).sum(1)
        if reduction == 'mean':
            rce_loss = rce_loss.mean()
        elif reduction == 'sum':
            rce_loss = rce_loss.sum()
        return self.alpha * ce_loss + self.beta * rce_loss

### Taylor Smooth cross entropy

In [None]:
# https://www.kaggle.com/c/siim-isic-melanoma-classification/discussion/173733#965200
# implementations reference - https://github.com/CoinCheung/pytorch-loss/blob/master/pytorch_loss/taylor_softmax.py
# paper - https://www.ijcai.org/Proceedings/2020/0305.pdf

class TaylorSoftmax(nn.Module):

    def __init__(self, dim=1, n=2):
        super(TaylorSoftmax, self).__init__()
        assert n % 2 == 0
        self.dim = dim
        self.n = n

    def forward(self, x):
        
        fn = torch.ones_like(x)
        denor = 1.
        for i in range(1, self.n+1):
            denor *= i
            fn = fn + x.pow(i) / denor
        out = fn / fn.sum(dim=self.dim, keepdims=True)
        return out

class LabelSmoothingLoss(nn.Module):

    def __init__(self, classes, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        """Taylor Softmax and log are already applied on the logits"""
        with torch.no_grad(): 
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
    

class TaylorCrossEntropyLoss(nn.Module):

    def __init__(self, n=2, ignore_index=-1, reduction='mean', smoothing=0.2):
        super(TaylorCrossEntropyLoss, self).__init__()
        assert n % 2 == 0
        self.taylor_softmax = TaylorSoftmax(dim=1, n=n)
        self.reduction = reduction
        self.ignore_index = ignore_index
        self.lab_smooth = LabelSmoothingLoss(5, smoothing=smoothing)

    def forward(self, logits, labels):

        log_probs = self.taylor_softmax(logits).log()
        loss = self.lab_smooth(log_probs, labels)
        return loss

### Hyper parameters

In [None]:
FOLDS = 10
BATCH_SIZE =24
LR = 0.0001
EPOCHS=4
SMOOTHING = 0.1

LOSS_FUNCTION = nn.CrossEntropyLoss()
LOSS_FUNCTION = TaylorCrossEntropyLoss(n=2, smoothing=SMOOTHING)
LOSS_FUNCTION = SymmetricCrossEntropy()

IMG_SIZE = 128
IMG_SIZE = 240
IMG_SIZE = 512
# IMG_SIZE = 380

EARLY_STOPPING = True

MODEL_ARCH = 'resnet50'
MODEL_ARCH = 'tf_efficientnet_b1_ns'
MODEL_ARCH = 'efficientnet_b3'
MODEL_ARCH = 'tf_efficientnet_b4_ns'

In [None]:
# # These are the available model architectures in timm
# from pprint import pprint
# model_names = timm.list_models(pretrained=True)
# pprint(model_names)

### Dataset

In [None]:
class CassavaDataset(Dataset):
    def __init__(self, train, train_mode=True, transforms=None):
        self.train = train
        self.transforms = transforms
        self.train_mode = train_mode
    
    def __len__(self):
        return self.train.shape[0]
    
    def __getitem__(self, index):
        image_path = os.path.join(TRAIN_IMAGE_FOLDER, self.train.iloc[index].image_id)
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if (self.transforms):
            image = self.transforms(image=image)["image"]
        
        if not(self.train_mode):
            return {"x":image}
        
        return {
            "x": image,
            "y": torch.tensor(self.train.iloc[index, self.train.columns.str.startswith('label')], dtype=torch.float64)
        }

### Transforms

In [None]:
def get_augmentations():
    
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225) 
    
    train_augmentations = albu.Compose([
        albu.RandomResizedCrop(IMG_SIZE, IMG_SIZE, p=1.0),
        albu.Transpose(p=0.5),
        albu.HorizontalFlip(p=0.5),
#         albu.VerticalFlip(0.5),
        albu.CoarseDropout (p=0.5),
        albu.Normalize(always_apply=True),        
        ToTensorV2(p=1.0)
    ], p=1.0)
    
    valid_augmentations = albu.Compose([
        albu.Resize(IMG_SIZE, IMG_SIZE),
        albu.Normalize(always_apply=True),        
        ToTensorV2(p=1.0)
    ], p=1.0)   
    
    return train_augmentations, valid_augmentations

train_augs, val_augs = get_augmentations()

### NN Model

In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = timm.create_model(MODEL_ARCH, pretrained=True)
#         self.model = base_model

#         Efficientnets
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, CLASSES)
        
#         Resnets
#         n_features = self.model.fc.in_features
#         self.model.fc = nn.Linear(n_features, CLASSES)
        
        self._freeze_batchnorm()  # NEW NEW NEW NEW NEW NEW NEW NEW NEW
        
    def _freeze_batchnorm(self):
        for module in self.model.modules():
            if isinstance(module, nn.BatchNorm2d):
                if hasattr(module, 'weight'):
                    module.weight.requires_grad_(False)
                if hasattr(module, 'bias'):
                    module.bias.requires_grad_(False)
                module.eval()
        
    def forward(self, x):
        x = self.model(x)
        return x

### K-Fold CV

In [None]:
traincsv = pd.read_csv(TRAIN_CSV)
traincsv['kfold'] = -1
traincsv = traincsv.sample(frac=1).reset_index(drop=True)
stratifier = StratifiedKFold(n_splits=FOLDS)

for fold, (train_index, val_index) in enumerate(stratifier.split(X=traincsv.image_id.values, y=traincsv.label.values)):
    traincsv.loc[val_index, "kfold"] = fold

traincsv.to_csv("train_folds.csv", index=False)

### PL Data module

In [None]:
class CassavaDataModule(pl.LightningDataModule):
    def __init__(self, fold):
        super().__init__()
        self.train_aug, self.valid_aug = get_augmentations()
        self.fold = fold
        self.batch_size = BATCH_SIZE
    
    def setup(self, stage=None):
        folds = pd.read_csv('./train_folds.csv')
#         folds = pd.get_dummies(folds, columns=['label'])
        train_fold = folds.loc[folds["kfold"] != self.fold]
        val_fold = folds.loc[folds["kfold"] == self.fold]
        
        self.train_ds = CassavaDataset(train_fold, transforms=train_augs)
        self.val_ds = CassavaDataset(val_fold, transforms=val_augs)
        
    def train_dataloader(self):
        return DataLoader(self.train_ds, self.batch_size, num_workers=4, shuffle=True)
        
    def val_dataloader(self):
        return DataLoader(self.val_ds, self.batch_size, num_workers=4, shuffle=False)        
        

### PL Module

In [None]:
class CassavaPLModule(pl.LightningModule):
    def __init__(self, hparams, model):
        super(CassavaPLModule, self).__init__()
        self.hparams = hparams
        self.model = model
        self.criterion = LOSS_FUNCTION
        self.accuracy = pl.metrics.Accuracy()
        
    def forward(self, x):
        return self.model(x)
    
#     def configure_optimizers(self):
#         optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.lr)
# #         optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.hparams.lr, weight_decay=0.001)
#         scheduler = {
#             'scheduler': 
#                 torch.optim.lr_scheduler.ReduceLROnPlateau(
#                     optimizer, 
#                     patience=2,
#                     factor=0.25,
#                     threshold=0.01,
#                     mode='min', verbose=True
#                 ),
#             'interval': 'epoch',
#             'monitor' : 'val_loss'
#         }
#         return [optimizer], [scheduler]

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.lr)
        scheduler = {
            'scheduler': 
                torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
                    optimizer, 
                    15,
                    verbose=False
                ),
            'interval': 'step',
            'monitor' : 'train_loss'
        }
        return [optimizer], [scheduler]    
    
    def training_step(self, batch, batch_index):
        # One batch at a time
        features = batch['x']
        targets = batch['y']
        out = self(features)
        loss = self.criterion(out, targets.squeeze().long())
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)   
#         import pdb; pdb.set_trace()        
        metric_acc = accuracy(out, targets.squeeze().long())
#         metric_acc = accuracy(torch.argmax(out, dim=1), torch.argmax(targets.squeeze(), dim=1))
        self.log("train_accuracy", metric_acc, on_step=False, on_epoch=True, prog_bar=True,logger=True)
        return loss
        
    def validation_step(self, batch, batch_index):
        # One batch at a time
        features = batch['x']
        targets = batch['y']
        out = self(features)
        loss = self.criterion(out, targets.squeeze().long())
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True) 
        metric_acc = accuracy(out, targets.squeeze().long())
#         metric_acc = accuracy(torch.argmax(out, dim=1), torch.argmax(targets.squeeze(), dim=1))
        self.log("val_accuracy", metric_acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)        

### Training

In [None]:
def train(fold): 
    callbacks=[]
    checkpoint_callback = ModelCheckpoint(
        dirpath='checkpoints/',
        filename='model_{val_loss:.2f}',
        monitor='val_loss', verbose=True,
        save_last=False, save_top_k=1, save_weights_only=False,
        mode='min', period=1, prefix=''
    )        
    callbacks.append(checkpoint_callback)
    
    early_stopping = EarlyStopping('val_accuracy', patience=3, verbose=True, mode='max')

    if EARLY_STOPPING == True:
        callbacks.append(early_stopping)    
    
    tpu_core = fold + 1
    
    trainer = pl.Trainer(
                        gpus=-1 if torch.cuda.is_available() else None, 
                        precision=16,
                        max_epochs=EPOCHS,
#                         accumulate_grad_batches=4, # NEW NEW NEW NEW NEW NEW NEW NEW NEW
                        callbacks=callbacks)
    model = Model()
    pl_dm = CassavaDataModule(fold=fold)
    pl_module = CassavaPLModule(hparams={'lr':LR, 'batch_size':BATCH_SIZE}, model=model)
    
    trainer.use_native_amp = False
    trainer.fit(pl_module, pl_dm)
    
    print(checkpoint_callback.best_model_path, checkpoint_callback.best_model_score)
    

In [None]:
train(0)
train(1)
train(2)
train(3)
train(4)

RandAugment 

1. Remove error images  
2. https://www.kaggle.com/khyeh0719/pytorch-efficientnet-baseline-inference-tta - Augments