In [None]:
import os
import gc
import cv2
import sys
import math
import time
import copy
import numpy as np
import pandas as pd
from PIL import Image
from pathlib import Path
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
Path.ls = lambda x: list(x.iterdir())

import albumentations
from albumentations.pytorch import ToTensor, ToTensorV2

import torch
from torch import nn, optim
from torchvision import transforms, models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

effnet_path = '../input/efficientnet-pytorch/'
sys.path.append(effnet_path)
from efficientnet_pytorch import EfficientNet

## Loading the dataframe and filenames

In [None]:
path = Path("../input/cassava-leaf-disease-classification")
df_path = path/"train.csv"
train_path = path/"train_images"
train_fnames = train_path.ls()

In [None]:
df = pd.read_csv(df_path)
num_classes = df['label'].nunique() # number of clases (5 in our case)
df.head()

In [None]:
df['label'].value_counts()

It seems like we have huge class imbalance here! We should take care of it.

## Train/Validation Split

I'll split the main dataframe into train_df and val_df with stratification based on the labels. We are using 20% of the data for validation.

In [None]:
df.reset_index(inplace=True, drop=True)
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42)
for train_idx, val_idx in sss.split(X=df, y=df['label']):
    train_df = df.loc[train_idx]
    val_df = df.loc[val_idx]

In [None]:
train_df.shape, val_df.shape

## Normalization

One thing that is really common but doesn't sound right is that people usually use the ImageNet means and standard deviations to normalize this data. But I think it would be better to normalize this data using its own statistics to make sure that resulting mean of images will be zero and their std will be 1. This would not neccesarily be the case if ImageNet stats are used becasue this competition's images are much different from the ImageNet data (having green-ish colors much more!) so using ImageNet stats is not guaranteed to yield good classification results because the meaning of colors in different channels would differ if they are not properly centerd on zero with std of 1!

Here, I'm using the stats of this very dataset and not ImageNet stats. I've iterated throgh all training images and calculated the means and stds of the channels which are the numbers below.

In [None]:
mean = [0.4589, 0.5314, 0.3236]
std = [0.2272, 0.2297, 0.2200]

I removed the code to calcuate this numbers from notebook in order to shorten it. But it will be something like the following. Note that the **train_dl** here must be a dataloader that does not do augmentation or normalization (it should just resize the images and convert them to tensor).

In [None]:
# means, stds = [], []
# for batch, _ in train_dl:
#     means.append(batch.mean(dim=(0, 2, 3)))
#     stds.append(batch.std(dim=(0, 2, 3)))
    
# mean = torch.stack(means).mean(0)
# std = torch.stack(stds).mean(0)

## Dataset and DataLoaders

I've borrowed some code from [this notebook](https://www.kaggle.com/abhishek/using-tez-in-leaf-disease-classification) here to define the augmentations that I'll use. But I've modefied it and removed some of the augmentaions that did not seem to be good in our case. We are using image size of 256.

In [None]:
train_tfms = albumentations.Compose([
            albumentations.RandomResizedCrop(256, 256),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.ShiftScaleRotate(p=0.5),
            albumentations.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
            albumentations.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),
            albumentations.Normalize(
                mean=mean, 
                std=std, 
                max_pixel_value=255.0, 
                p=1.0
            ),
            albumentations.CoarseDropout(p=0.5),
            albumentations.Cutout(p=0.5),
            ToTensorV2()], p=1.)

        
valid_tfms = albumentations.Compose([
            albumentations.CenterCrop(256, 256, p=1.),
            albumentations.Resize(256, 256),
            albumentations.Normalize(
                mean=mean, 
                std=std, 
                max_pixel_value=255.0, 
                p=1.0
            ),
            ToTensorV2()], p=1.)

class LeafData(Dataset):
    def __init__(self, df, split="train"):
        if split == "train":
            self.transforms = train_tfms
        elif split == "val":
            self.transforms = valid_tfms
            
        self.paths = [train_path/id_ for id_ in df['image_id'].values]
        self.labels = df['label'].values
    
    def __getitem__(self, idx):
        img = cv2.imread(str(self.paths[idx]))[..., ::-1] # ::-1 is here because cv2 loads the images in BGR rather than RGB
        img = self.transforms(image=img)['image']
        label = self.labels[idx]
       
        return img, label
    
    def __len__(self):
        return len(self.paths)

def make_dataloaders(batch_size=32, num_workers=4, pin_memory=True, **kwargs):
    dataset = LeafData(**kwargs)
    dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers,
                            pin_memory=pin_memory, shuffle=True if kwargs['split'] == "train" else False)
    return dataloader

In [None]:
train_dl = make_dataloaders(df=train_df, split="train")
val_dl = make_dataloaders(df=val_df, split="val")
xb, yb = next(iter(train_dl))
xb.shape, yb.shape, xb.mean(dim=(0, 2, 3)), xb.std(dim=(0, 2, 3))

By looking at the stats of this random batch, we can make sure that the normalization is being done properly.

## Training helper functions

Here are some utility functions which helps us easily train the model. The details are not important. All the thing they are doing is to train and validate the model properly and call the lr_schedulers at the right time during training. Also, the best weights of the model (according to the validation loss) get saved after each epoch if the loss improves.

In [None]:
def accuracy(preds, target):
    preds = preds.argmax(dim=1)
    return (preds == target).float().mean()
    
def one_epoch(model, dl, loss_func, opt=None, lr_schedule=None):
    running_loss = 0.
    running_acc = 0
    
    for xb, yb in tqdm(dl):
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb)
        loss = loss_func(preds, yb)
        
        if opt is not None:
            opt.zero_grad()
            loss.backward()
            opt.step()
            if lr_schedule is not None:
                lr_schedule.step()
    
        running_acc += accuracy(preds, yb).item()
        running_loss += loss.item()
        
    return running_loss / len(dl), running_acc / len(dl)

In [None]:
def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']
    
def train_val(model, params):
   
    num_epochs = params["num_epochs"]
    loss_func = params["loss_func"]
    opt = params["optimizer"]
    train_dl = params["train_dl"]
    val_dl = params["val_dl"]
    lr_scheduler = params["lr_scheduler"]
    path2weights = params["path2weights"]
    one_cycle = params["one_cycle"]
    
    loss_history = {
        "train": [],
        "val": [],
    }
   
    metric_history = {
        "train": [],
        "val": [],
    }
    
    best_model_wts = copy.deepcopy(model.state_dict())
    
    best_loss=float('inf')
    
    for epoch in range(num_epochs):
        start = time.time()
        current_lr = get_lr(opt)
        print(f'Epoch {epoch + 1}/{num_epochs}, current lr = {current_lr:5f}')
      
        model.train()
        train_loss, train_metric = one_epoch(model, train_dl, loss_func, opt, lr_scheduler if one_cycle else None)

        loss_history["train"].append(train_loss)
        metric_history["train"].append(train_metric)
  
        model.eval()
        with torch.no_grad():
            val_loss, val_metric = one_epoch(model, val_dl, loss_func, opt=None)
        
       
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), path2weights)
            print("Copied best model weights!")
    
        loss_history["val"].append(val_loss)
        metric_history["val"].append(val_metric)
        
        if not one_cycle:
            lr_scheduler.step(val_loss)
            if current_lr != get_lr(opt):
                print("Loading best model weights!")
                model.load_state_dict(best_model_wts) 
        
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}\n"
              f"Train Acc: {train_metric:.4f}, Val Acc: {val_metric:.4f}\n"
              f"Completed in {time.time() - start:.3f}")
        
        print("-"*10) 

    model.load_state_dict(best_model_wts)
        
    return model, loss_history, metric_history

## EfficientNet + Building a powerful head for it

In the following code, you can see that I am using the EfficientNet model as the base (feature extractor) and then I append a powerful head to the backbone to be able to classify images into the needed classes (5 in our case). I'm using both Adaptive Average Pooling and Adaptive Max Pooling and then concatenating the two (this was suggested by Jeremy Howard in a fastai course). Then by using a Linear layer I map the features to 512 activations and then after that with another Linear layer I make the final 5 predictions for each class.

In [None]:
class EfficientNetModel(nn.Module):
    def __init__(self, arch="b4", dropout=0.2, n_out=5, 
                 pretrained=True, freeze=True):
        super().__init__()
        if pretrained:
            self.model = EfficientNet.from_pretrained(f"efficientnet-{arch}")
            if freeze:
                for p in self.model.parameters():
                    p.requires_grad = False
        else:
            self.model = EfficientNet.from_name(f"efficientnet-{arch}")
        
        self.lin1 = nn.Linear(1792 * 2, 512) # 1792 is the final output shape of the efficientnet backbone.
        self.lin2 = nn.Linear(512, n_out)    # I'm multiplying by two because we are concatenating the avg pool
        self.bn1 = nn.BatchNorm1d(1792 * 2)  # and max pool layers.
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout = dropout
        
    def forward(self, x):
        x = self.model.extract_features(x)
        avg = F.adaptive_avg_pool2d(x, 1)
        max_ = F.adaptive_max_pool2d(x, 1)
        cat = torch.cat((avg.squeeze(), max_.squeeze()), dim=1)
        x = self.bn1(cat)
        x = F.dropout(x, self.dropout)
        x = F.relu(self.bn2(self.lin1(x)))
        x = self.lin2(x)
        return x

## Training with OneCycle Policy

Here we put everything together and train the model with OneCycle Policy. You can refer to PyTorch documentation or the actual paper by Leslie Smith to learn about the OneCycle policy but the brief explanation is that it starts the training with low learning rate, increases it until 25% of iterations have passed, and then starts to reduce the learning rate until the training is finished (notice that the whole cycle will be done after that training is finished and it is not for each epoch separately).

In [None]:
model = EfficientNetModel(pretrained=True, freeze=False, 
                          arch="b4", n_out=num_classes, dropout=0.2).to(device) # I'm using pretrained weights but not freezing the backbone

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters())
epochs = 15
lr_sch = optim.lr_scheduler.OneCycleLR(opt, max_lr=1e-3, epochs=epochs,
                                       steps_per_epoch=len(train_dl), pct_start=0.25,)

params_train = {
 "num_epochs": epochs,
 "optimizer": opt,
 "loss_func": criterion,
 "train_dl": train_dl,
 "val_dl": val_dl,
 "lr_scheduler": lr_sch,
 "path2weights": "/kaggle/working/effnet.pt",
 "one_cycle": True
}

model, loss_hist, metric_hist = train_val(model, params_train)

In [None]:
torch.save(model.state_dict(), "last_epoch.pt")