In [None]:
# !pip install comet_ml

In [None]:
# import comet_ml #at the top of your file
# from comet_ml import Experiment

# # Create an experiment with your api key:
# experiment = Experiment(
#     api_key="cjZUHKCBKcrudJIeYuUe1zaBT",
#     project_name="leaf-disease-classification",
#     workspace="kaggle",
#     log_code=True,
# )

In [None]:
import json
import os
import pathlib as pt

import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd 

from fastai.vision.all import *
from fastai.data.core import DataLoaders

from tqdm import tqdm

import torch.cuda
if torch.cuda.is_available():
    print('PyTorch found cuda')
else:
    print('PyTorch could not find cuda')

## Prepare data

In [None]:
ROOT = pt.Path('../input/cassava-leaf-disease-classification')
LABEL_JSON = ROOT/"label_num_to_disease_map.json"
TRAIN_CSV  = ROOT/"train.csv"
TRAIN_DIR  = ROOT/"train_images"
TEST_DIR   = ROOT/"test_images"

In [None]:
train_imgs = list(TRAIN_DIR.glob("*"))
test_imgs = list(TEST_DIR.glob("*"))
print("Train: # {}".format(len(train_imgs)))
print("Test: # {}".format(len(test_imgs)))

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
train_df.head(3)

In [None]:
with open(LABEL_JSON) as fp:
    label_dict = json.load(fp)
label_dict

In [None]:
def create_path(row):
    return TRAIN_DIR/row
def get_label_name(row):
    return label_dict[str(row)]

train_df['img_path'] = train_df['image_id'].apply(create_path)
train_df['disease_name'] = train_df['label'].apply(get_label_name)
train_df.head(3)

In [None]:
_df = train_df['disease_name'].value_counts().sort_index()
fig = plt.figure()
ax = _df.plot(kind='bar')
ax.set_xlabel("Disease")
ax.set_ylabel("Frequency")
ax.set_title("Nr of samples / disease")
# experiment.log_figure(figure_name="Leaf Diseases", figure=fig)

In [None]:
aug_tfms = aug_transforms(mult=1.5, 
                          do_flip=True, 
                          flip_vert=False, 
                          max_rotate=20.0, 
                          min_zoom=1.0, 
                          max_zoom=1.5, 
                          max_lighting=0.3, 
                          max_warp=0.2, 
                          p_affine=0.75, 
                          p_lighting=0.65, 
                          xtra_tfms=None, 
                          size=224, 
                          mode='bilinear', 
                          pad_mode='reflection', 
                          align_corners=True, 
                          batch=False, 
                          min_scale=1.0)

data_loaders = ImageDataLoaders.from_df(train_df, 
                                        path="", 
                                        seed=42, 
                                        fn_col='img_path', 
                                        label_col='label', 
                                        valid_pct=0.2,
                                        item_tfms=Resize(460), #RandomResizedCrop(460, min_scale=0.3),
                                        batch_tfms=aug_tfms)

In [None]:
data_loaders.show_batch(max_n=8, nrows=2, unique=True)

## Setup the resnet architecture

In [None]:
# creating directories and copying the models to those directories
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp ../input/resnet34/resnet34.pth /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
!cp ../input/resnet50/resnet50.pth /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
!cp ../input/resnet152/resnet152.pth /root/.cache/torch/hub/checkpoints/resnet152-b121ed2d.pth

In [None]:
learn = cnn_learner(data_loaders, resnet152, metrics=[error_rate, accuracy], opt_func=Adam)
learn.lr_find()

In [None]:
# experiment.log_parameters(hyperparams)

`cnn_learner` call `.freeze()` by default. This means it freezes all the layers except the last which are added for the new classification task. When `.fit_one_cycle()` it's called, only these last layers are trained.

In [None]:
learn.fine_tune??

In [None]:
base_lr = 0.003
learn.fit_one_cycle(n_epoch=1, lr_max=slice(base_lr/100, base_lr))

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
# learn.fit_one_cycle??

In [None]:
learn.fit_one_cycle(1)
learn.fit_one_cycle(n_epoch=20, lr_max=slice(1e-6, 1e-4))

In [None]:
# experiment.log_model(name="resnet34_model_v0", file_or_folder="./resnet34_model.pkl")

## Look at some predictions

In [None]:
learn.show_results()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_top_losses(9, figsize=(15,10))

In [None]:
interp.plot_confusion_matrix()
plt.savefig("confusion_matrix.png", bbox_inches='tight', padding=0)
# experiment.log_image("confusion_matrix.png")

In [None]:
interp.most_confused() # (actual, predicted, nr of occurences)

In [None]:
interp.print_classification_report()

## Create submission file

In [None]:
def get_image_id(row):
    p = pt.Path(row)
    return p.name
test_df = pd.DataFrame()
test_df['img_path'] = test_imgs
test_df['image_id'] = test_df['img_path'].apply(get_image_id)
test_df.head()

In [None]:
test_dl = data_loaders.test_dl(test_df)
res_preds = learn.get_preds(dl=test_dl, with_decoded=True) # returns (predictions, _, predicted label)
preds_values = res_preds[0]
preds_labels = res_preds[2]

In [None]:
print("Generating submission file...")
submission_data = {'image_id': [], 'label': []}

for idx, pred_label in enumerate(preds_labels):

    submission_data['image_id'].append(test_df.iloc[idx]['image_id'])
    submission_data['label'].append(pred_label.item())


submission_df = pd.DataFrame(data=submission_data)
submission_df.to_csv("submission.csv", index=False)
# experiment.log_table("submission.csv")
!head submission.csv

In [None]:
# experiment.end()