In [None]:
import os
import gc
import cv2
import sys
import math
import time
import copy
import numpy as np
import pandas as pd
from PIL import Image
from pathlib import Path
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
Path.ls = lambda x: list(x.iterdir())

import albumentations
from albumentations.pytorch import ToTensor, ToTensorV2

import torch
from torch import nn, optim
from torchvision import transforms, models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#efficientnet from https://github.com/lukemelas/EfficientNet-PyTorch
effnet_path = '../input/efficientnet/EfficientNet-PyTorch/'
sys.path.append(effnet_path)
from efficientnet_pytorch import EfficientNet

# Overview

### This notebook highly leverages https://www.kaggle.com/moeinshariatnia/pytorch-better-normalization-onecycle-lr-train
### The above notebook was used for core training logic. I wanted to explore rapidly iterating on training configs

### Next steps of this exploration would be to bring the optimization method and learning rate scheduler into the experiment

In [None]:
# Define Your Experiment


# In order to speed up training we'll use a percent of the initial training set
rapid_train_data_percent = 0.1 #IE: 10%
base_configs = {
    'num_classes': 5,
    'crop_height': 256,
    'crop_width': 256,
    'horiz_flip':0.5,
    'rotate':0.5,
    'hue':0.2,
    'sat':0.2,
    'val':0.2,
    'hue_sat_val_p':0.5,
    'brightness_limit_min':-0.1,
    'brightness_limit_max':0.1,
    'contrast_limit_min':-0.1,
    'contrast_limit_max':0.1,
    'brightness_contrast_p':0.5,
    'coarse_dropout':0.5,
    'cutout':0.5,
    'batch_size': 32,
    'dropout': 0.2,
    'epochs':7,
    'max_lr':1e-3,
    'pct_start':0.25

}

# We do a run with base configs, so only supply experiment_registry values which differ from base_configs
experiment_registry = {
    'dropout': [0.3, 0.1],
    'pct_start': [0.5, 0.1]
}

## The following is a larger code block for the core training logic. See linked notebook to learn much more about this logic

In [None]:
path = Path("../input/cassava-leaf-disease-classification")
df_path = path/"train.csv"
train_path = path/"train_images"
train_fnames = train_path.ls()

df = pd.read_csv(df_path)
num_classes = df['label'].nunique() # number of clases (5 in our case)


df.reset_index(inplace=True, drop=True)
sss = StratifiedShuffleSplit(n_splits=2, test_size=0.2, random_state=42)
for train_idx, val_idx in sss.split(X=df, y=df['label']):
    train_df_pre = df.loc[train_idx]
    val_df_pre = df.loc[val_idx]

train_df = train_df_pre.iloc[::int(rapid_train_data_percent*100), :]
val_df = val_df_pre.iloc[::int(rapid_train_data_percent*100), :]



mean = [0.4589, 0.5314, 0.3236]
std = [0.2272, 0.2297, 0.2200]


def accuracy(preds, target):
    preds = preds.argmax(dim=1)
    return (preds == target).float().mean()

def one_epoch(model, dl, loss_func, opt=None, lr_schedule=None):
    running_loss = 0.
    running_acc = 0
    lrs = []


    for xb, yb in tqdm(dl):
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb)
        loss = loss_func(preds, yb)

        if opt is not None:
            opt.zero_grad()
            loss.backward()
            opt.step()
            lrs.append(opt.param_groups[0]["lr"])
            if lr_schedule is not None:
                lr_schedule.step()

        running_acc += accuracy(preds, yb).item()
        running_loss += loss.item()

    return running_loss / len(dl), running_acc / len(dl), lrs


def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']

def train_val(model, params):

    num_epochs = params["num_epochs"]
    loss_func = params["loss_func"]
    opt = params["optimizer"]
    train_dl = params["train_dl"]
    val_dl = params["val_dl"]
    lr_scheduler = params["lr_scheduler"]
    path2weights = params["path2weights"]
    one_cycle = params["one_cycle"]

    loss_history = {
        "train": [],
        "val": [],
    }

    metric_history = {
        "train": [],
        "val": [],
    }

    lrs_by_epoch = []

    best_model_wts = copy.deepcopy(model.state_dict())

    best_loss=float('inf')

    for epoch in range(num_epochs):
        start = time.time()
        current_lr = get_lr(opt)
        print(f'Epoch {epoch + 1}/{num_epochs}, current lr = {current_lr:5f}')

        model.train()
        train_loss, train_metric, lrs = one_epoch(model, train_dl, loss_func, opt, lr_scheduler if one_cycle else None)
        lrs_by_epoch.append({epoch: lrs})

        loss_history["train"].append(train_loss)
        metric_history["train"].append(train_metric)

        model.eval()
        with torch.no_grad():
            val_loss, val_metric, lrs2 = one_epoch(model, val_dl, loss_func, opt=None)


        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), path2weights)
            print("Copied best model weights!")

        loss_history["val"].append(val_loss)
        metric_history["val"].append(val_metric)

        if not one_cycle:
            lr_scheduler.step(val_loss)
            if current_lr != get_lr(opt):
                print("Loading best model weights!")
                model.load_state_dict(best_model_wts)

        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}\n"
              f"Train Acc: {train_metric:.4f}, Val Acc: {val_metric:.4f}\n"
              f"Completed in {time.time() - start:.3f}")

        print("-"*10)

    model.load_state_dict(best_model_wts)

    return model, loss_history, metric_history, lrs_by_epoch

## We wrap the actual training piece in a function so we can iterate our experiment over it, but the logic remains mostly the same from the source notebook

In [None]:
def train_cassava_model(base_configs):
    train_tfms = albumentations.Compose([
                albumentations.RandomResizedCrop(base_configs['crop_height'], base_configs['crop_width']),
                albumentations.HorizontalFlip(p=base_configs['horiz_flip']),
                albumentations.ShiftScaleRotate(p=base_configs['rotate']),
                albumentations.HueSaturationValue(
                    hue_shift_limit=base_configs['hue'],
                    sat_shift_limit=base_configs['sat'],
                    val_shift_limit=base_configs['val'],
                    p=base_configs['hue_sat_val_p']
                ),
                albumentations.RandomBrightnessContrast(
                    brightness_limit=(base_configs['brightness_limit_min'],base_configs['brightness_limit_max']),
                    contrast_limit=(base_configs['contrast_limit_min'], base_configs['contrast_limit_max']),
                    p=base_configs['brightness_contrast_p']
                ),
                albumentations.Normalize(
                    mean=mean,
                    std=std,
                    max_pixel_value=255.0,
                    p=1.0
                ),
                albumentations.CoarseDropout(p=base_configs['coarse_dropout']),
                albumentations.Cutout(p=base_configs['cutout']),
                ToTensorV2()], p=1.)


    valid_tfms = albumentations.Compose([
                albumentations.CenterCrop(256, 256, p=1.),
                albumentations.Resize(256, 256),
                albumentations.Normalize(
                    mean=mean,
                    std=std,
                    max_pixel_value=255.0,
                    p=1.0
                ),
                ToTensorV2()], p=1.)

    class LeafData(Dataset):
        def __init__(self, df, split="train"):
            if split == "train":
                self.transforms = train_tfms
            elif split == "val":
                self.transforms = valid_tfms

            self.paths = [train_path/id_ for id_ in df['image_id'].values]
            self.labels = df['label'].values

        def __getitem__(self, idx):
            img = cv2.imread(str(self.paths[idx]))[..., ::-1] # ::-1 is here because cv2 loads the images in BGR rather than RGB
            img = self.transforms(image=img)['image']
            label = self.labels[idx]

            return img, label

        def __len__(self):
            return len(self.paths)

    def make_dataloaders(batch_size=base_configs['batch_size'], num_workers=4, pin_memory=True, **kwargs):
        dataset = LeafData(**kwargs)
        dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers,
                                pin_memory=pin_memory, shuffle=True if kwargs['split'] == "train" else False)
        return dataloader



    train_dl = make_dataloaders(df=train_df, split="train")
    val_dl = make_dataloaders(df=val_df, split="val")
    xb, yb = next(iter(train_dl))
    xb.shape, yb.shape, xb.mean(dim=(0, 2, 3)), xb.std(dim=(0, 2, 3))





    class EfficientNetModel(nn.Module):
        def __init__(self, arch="b4", dropout=base_configs['dropout'], n_out=5,
                     pretrained=True, freeze=True):
            super().__init__()
            if pretrained:
                self.model = EfficientNet.from_pretrained(f"efficientnet-{arch}")
                if freeze:
                    for p in self.model.parameters():
                        p.requires_grad = False
            else:
                self.model = EfficientNet.from_name(f"efficientnet-{arch}")

            self.lin1 = nn.Linear(1792 * 2, 512) # 1792 is the final output shape of the efficientnet backbone.
            self.lin2 = nn.Linear(512, n_out)    # I'm multiplying by two because we are concatenating the avg pool
            self.bn1 = nn.BatchNorm1d(1792 * 2)  # and max pool layers.
            self.bn2 = nn.BatchNorm1d(512)
            self.dropout = dropout

        def forward(self, x):
            x = self.model.extract_features(x)
            avg = F.adaptive_avg_pool2d(x, 1)
            max_ = F.adaptive_max_pool2d(x, 1)
            cat = torch.cat((avg.squeeze(), max_.squeeze()), dim=1)
            x = self.bn1(cat)
            x = F.dropout(x, self.dropout)
            x = F.relu(self.bn2(self.lin1(x)))
            x = self.lin2(x)
            return x


    model = EfficientNetModel(pretrained=True, freeze=False,
                              arch="b4", n_out=num_classes, dropout=base_configs['dropout']).to(device) # I'm using pretrained weights but not freezing the backbone

    criterion = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters())
    epochs = base_configs['epochs']
    lr_sch = optim.lr_scheduler.OneCycleLR(opt, max_lr=base_configs['max_lr'], epochs=epochs,
                                           steps_per_epoch=len(train_dl), pct_start=base_configs['pct_start'],)

    params_train = {
     "num_epochs": epochs,
     "optimizer": opt,
     "loss_func": criterion,
     "train_dl": train_dl,
     "val_dl": val_dl,
     "lr_scheduler": lr_sch,
     "path2weights": "/kaggle/working/effnet.pt",
     "one_cycle": True
    }

    model, loss_hist, metric_hist, lrs_by_epoch = train_val(model, params_train)
    return model, loss_hist, metric_hist, lrs_by_epoch

## Basic functions to plot performance and basic model evaluation

In [None]:
def plot_model_performance(results_dict, experiment_registry):
    for exp_param in results_dict.keys():
        registry = experiment_registry.copy()
        if exp_param == 'base':
            registry = ['base']
        else:
            registry = experiment_registry[exp_param]
        for exp_setting in registry:


            experiment = exp_param + ': ' + str(exp_setting)
            print('Model Performance With Base Configs and Experimentation: {}'.format(experiment))

            epochs = results_dict[exp_param][exp_setting]['configs']['epochs']
            loss_hist = results_dict[exp_param][exp_setting]['loss_hist']
            metric_hist = results_dict[exp_param][exp_setting]['metric_hist']
            lrs_by_epoch = results_dict[exp_param][exp_setting]['lrs_by_epoch']

            x_axis = list(range(0, epochs))
            legend_keys = []
            for i in loss_hist:
                plt.plot(x_axis, loss_hist[i])
                legend_keys.append(i + '_loss')

            for i in metric_hist:
                plt.plot(x_axis, metric_hist[i])
                legend_keys.append(i + '_accuracy')
            plt.legend(legend_keys, loc='upper left')
            plt.show()

            epoch = 0
            num_steps = 54 # see if this changes
            x_axis = []
            rates = []
            for i in lrs_by_epoch:
                cur_list = i[epoch]
                for index in range(0, len(cur_list)):
                    rates.append(cur_list[index])
                    x_axis.append(epoch + round(index/num_steps,2))
                epoch += 1
            plt.plot(x_axis, rates)
            plt.legend(['Learing Rate'], loc='upper left')
            plt.show()


def evaluate_best_model(results_dict, experiment_registry):
    best_model = ''
    best_performance = 100

    for exp_param in results_dict.keys():
        registry = experiment_registry.copy()
        if exp_param == 'base':
            registry = ['base']
        else:
            registry = experiment_registry[exp_param]
        for exp_setting in registry:

            experiment = exp_param + ': ' + str(exp_setting)

            val_loss = results_dict[exp_param][exp_setting]['loss_hist']['val'][-1:][0]
            val_accuracy = results_dict[exp_param][exp_setting]['metric_hist']['val'][-1:][0]

            performance = (val_loss - 0) + (1-val_accuracy)

            if best_performance > performance:
                best_model = experiment
    return best_model

## Run our experiment

In [None]:
results_dict = {}

model, loss_hist, metric_hist, lrs_by_epoch = train_cassava_model(base_configs)
results_dict['base'] = {}
results_dict['base']['base'] = {}
results_dict['base']['base']['loss_hist'] = loss_hist
results_dict['base']['base']['metric_hist'] = metric_hist
results_dict['base']['base']['lrs_by_epoch'] = lrs_by_epoch
results_dict['base']['base']['configs'] = base_configs

for i in experiment_registry.keys():
    results_dict[i] = {}
    for y in experiment_registry[i]:
        results_dict[i][y] = {}

        adjusted_configs = base_configs.copy()
        adjusted_configs[i] = y
        print('Configs:')
        print(adjusted_configs)

        model, loss_hist, metric_hist, lrs_by_epoch = train_cassava_model(adjusted_configs)
        results_dict[i][y]['loss_hist'] = loss_hist
        results_dict[i][y]['metric_hist'] = metric_hist
        results_dict[i][y]['lrs_by_epoch'] = lrs_by_epoch
        results_dict[i][y]['configs'] = adjusted_configs


plot_model_performance(results_dict, experiment_registry)
best_model = evaluate_best_model(results_dict, experiment_registry)
print('Basic Model Evaliation Shows Top Performing Experiment Config: {}'.format(best_model))