Based on [Notebook - Cassava classification - EDA & fastai starter](https://www.kaggle.com/tanlikesmath/cassava-classification-eda-fastai-starter)

# Setup

In [None]:
# Install PyTorch Image Models package (TIMM)
!pip install ../input/timm031/timm-0.3.1-py3-none-any.whl

In [None]:
import numpy as np
import os
import pandas as pd
import time

from fastai.vision.all import *

In [None]:
nb_start = time.time()

In [None]:
# Notebook variables
data_dir = Path('../input/cassava-leaf-disease-classification')
sample_fraction = 1
seed = 999

In [None]:
set_seed(seed)

# Preprocess

In [None]:
# Read
train_df = pd.read_csv(data_dir/'train.csv')

In [None]:
# Process
train_df = (train_df
    .assign(path=train_df['image_id'].map(lambda x:data_dir/'train_images'/x))
    .drop(columns=['image_id'])
    .sample(frac=sample_fraction)
    .reset_index(drop=True))

In [None]:
# Showcase
print(train_df.shape[0])
train_df.head()

# EDA

In [None]:
from PIL import Image

im = Image.open(train_df['path'][0])
width, height = im.size
print(width,height)
im

# Data Loader

In [None]:
# Create data loader
item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.,1.))
batch_tfms = [*aug_transforms(size=224, max_warp=0), Normalize.from_stats(*imagenet_stats)]
bs=32

dls = ImageDataLoaders.from_df(
    df=train_df,
    valid_pct=0.2,
    seed=seed,
    label_col=0, # label is in the first column of the DataFrame
    fn_col=1, # filename/path is in the second column of the DataFrame
    bs=bs, # pass in batch size
    item_tfms=item_tfms, # pass in item_tfms
    batch_tfms=batch_tfms) # pass in batch_tfms

In [None]:
# Showcase data loader
dls.show_batch()

# Model Training

## Setup

In [None]:
# Needed for making pretrained weights work without needing to find the default filename
# EfficientNet-B3 model
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
        os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/timmefficientnet/tf_efficientnet_b3_ns-9d44bf68.pth' '/root/.cache/torch/hub/checkpoints/tf_efficientnet_b3_ns-9d44bf68.pth'

Functions from: [walkwithfastai - Utilizing the timm Library Inside of fastai](https://walkwithfastai.com/vision.external.timm)

In [None]:
# Utilities
from timm import create_model
from fastai.vision.learner import _update_first_layer


def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")

        
def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3, init=nn.init.kaiming_normal_, custom_head=None,
                     concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children())) * (2 if concat_pool else 1)
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model


def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_out=None, normalize=True, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    return learn

## Train (Stage 1)
Train frozen pre-trained model for single epoch 

In [None]:
# Define learner
learn = timm_learner(
    dls=dls, 
    arch='tf_efficientnet_b3_ns',
    loss_func=LabelSmoothingCrossEntropy(),
    opt_func=ranger,
    metrics=[accuracy]
).to_native_fp16()

In [None]:
# # Find optimal learning rate for pre-trained model
# start = time.time()
# learn.lr_find()
# print("{:.2f}min".format(int(time.time() - start) / 60))

In [None]:
# Train frozen pretrained model for single epoch
start = time.time()
learn.freeze()
learn.fit_flat_cos(1, 10e-2, wd=0.5, cbs=[MixUp()])
print("{:.2f}min".format(int(time.time() - start) / 60))

In [None]:
# Save stage-1 model
learn.save('stage-1')

In [None]:
# Read stage-1 model
learn = learn.load('stage-1')

In [None]:
# Validation loss 
learn.recorder.plot_loss()

## Train (Stage 2)
Train entire model for several epochs

In [None]:
# # Find optimal learning rate for model
# start = time.time()
# learn.unfreeze()
# learn.lr_find()
# print("{:.2f}min".format(int(time.time() - start) / 60))

In [None]:
start = time.time()
learn.unfreeze()
learn.fit_flat_cos(5, 2e-3,pct_start=0, cbs=[MixUp()])
print("{:.2f}min".format(int(time.time() - start) / 60))

In [None]:
learn.recorder.plot_loss()

In [None]:
learn.save('stage-2')

# Analyze Model

In [None]:
# Plot confusion matrix
learn_32 = learn.to_native_fp32()
interp = ClassificationInterpretation.from_learner(learn_32)
interp.plot_confusion_matrix()

# Inference

In [None]:
# Read sample dataset
sample_df = pd.read_csv(data_dir/'sample_submission.csv')
sample_df.head()

In [None]:
# Create submission dataset
_sample_df = sample_df.copy()
_sample_df['path'] = _sample_df['image_id'].map(lambda x:data_dir/'test_images'/x)
_sample_df = _sample_df.drop(columns=['image_id'])

In [None]:
# Create test set data loader
test_dl = dls.test_dl(_sample_df)

In [None]:
# Showcase test set data loader
test_dl.show_batch()

In [None]:
# Create predictions
preds, _ = learn.tta(dl=test_dl, n=8, beta=0)

# Submission

In [None]:
# Create and save submission file
sample_df['label'] = preds.argmax(dim=-1).numpy()
sample_df.to_csv('submission.csv',index=False)
sample_df.head()

In [None]:
print("{:.2f}min".format(int(time.time() - nb_start) / 60))