In [None]:
!pip3 install --upgrade fastai

The aim of this notebook is to get started as quickly as possible and make a submission to Kaggle.

* Loading Data
* Data Augmentation (Mixup)
* Fix Precision training
* Model Training
* Model Confusion Matrix
* Predictions

In [None]:
from fastai.vision.all import *
from fastai.callback.mixup import MixUp
import torch
import numpy as np
import random, os

In [None]:
files = glob.glob('/kaggle/input/cassava-leaf-disease-classification/train_images/*.jpg')
f, plots = plt.subplots(2, 5, sharex='col', sharey='row', figsize=(19, 7),  constrained_layout=True)
im_plot = []

for j in files[:10]:
    im = Image.open(j)
    im_plot.append(im)
        
for i in range(10):
    plots[i // 5, i % 5].axis('off')
    plots[i // 5, i % 5].imshow(im_plot[i])

In [None]:
def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything()

In [None]:
path = Path('/kaggle/input/cassava-leaf-disease-classification');

In [None]:
with open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json', 'r') as handle:
    parsed = json.load(handle)
parsed

In [None]:
train = path / "train.csv"
train_df = pd.read_csv(train)

In [None]:
train_df.head()

Looks like we have a class imbalance probelm

In [None]:
train_df.label.value_counts()

In [None]:
dls = ImageDataLoaders.from_df(train_df, path/"train_images", item_tfms=Resize(256),
                              bs=64, num_workers=4, 
                              label_col="label")

In [None]:
dls.show_batch()

In [None]:
print(dls.vocab); print(dls.c)

In [None]:
learn = cnn_learner(dls, resnet34, metrics=[error_rate, accuracy], cbs=MixUp, model_dir="/tmp/model/").to_fp16()

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(15, lr_max=1e-2, cbs=EarlyStoppingCallback(patience=3))

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(15, lr_max=slice(1e-7, 1e-3), cbs=EarlyStoppingCallback(patience=3)

In [None]:
sample_df = pd.read_csv(path/'sample_submission.csv')

In [None]:
test_items = get_image_files(path/"test_images")
dl = learn.dls.test_dl(test_items, rm_type_tfms=1, bs=64)
y_pred, _ = learn.get_preds(dl=dl)

In [None]:
sample_df['label'] = y_pred.argmax(dim=-1).numpy()
sample_df.head()

In [None]:
sample_df.to_csv('submission.csv',index=False)