In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
%matplotlib inline
%reload_ext autoreload
%autoreload 2

from fastai.vision.all import *

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np

In [None]:
SEED=47
def seed_torch(seed=47):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=SEED)

In [None]:
image_to_category_num = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')

In [None]:
category_num_to_disease = pd.read_json('/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json', typ="series")

In [None]:
image_to_category_num.label.value_counts(normalize=True)*100

In [None]:
TRAIN_PATH = "/kaggle/input/cassava-leaf-disease-classification/train_images"
TEST_PATH = "/kaggle/input/cassava-leaf-disease-classification/test_images"

In [None]:
def get_category_num_from_path(path):
    img_name = str(path).split('/')[-1]
    category = int(image_to_category_num[image_to_category_num.image_id == img_name].label)
    return category

In [None]:
def get_image_files_debugging(path):
    """Helper function for faster iteration"""
    return get_image_files(path)[:10000]

In [None]:
diseases = DataBlock(blocks=(ImageBlock, CategoryBlock), 
                     get_items=get_image_files,
                     splitter=RandomSplitter(seed=SEED),
                     get_y=get_category_num_from_path,
                     item_tfms=Resize(512),
                     batch_tfms=[*aug_transforms(), Normalize.from_stats(*imagenet_stats)])
dls = diseases.dataloaders(TRAIN_PATH, bs=12)                    

In [None]:
dls.show_batch(max_n = 9)

In [None]:
!pip install efficientnet_pytorch --quiet
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=5)
model.train()

In [None]:
weights = [20, 10, 10, 1.6, 10]
class_weights = torch.FloatTensor(weights).cuda()



In [None]:
mixup = MixUp()
rocauc = RocAuc()
learn = Learner(dls, 
                model,
                metrics=[accuracy, rocauc],
                loss_func=CrossEntropyLossFlat(weight=class_weights),
                cbs=[ShowGraphCallback(), 
                     EarlyStoppingCallback(monitor='accuracy', patience=5),
                     mixup,
                     ReduceLROnPlateau(monitor="accuracy", patience=3)]
                )

In [None]:
learn.freeze()

In [None]:
learn.lr_find()

In [None]:
learn.unfreeze()
learn.lr_find()

In [None]:
learn.fine_tune(20, 1e-3)

In [None]:
learn.export("B4_fp32_full_weightedloss_mixup_rlrp.pkl")

Models tried:
1. resnet34: validation loss starts diverging after a couple of epochs. best val_accuracy 0.82, best train_loss 0.34. Train loss keeps going down while valid loss and accuracy stop improving after a while. Can try with mixed precision to see if that helps as regularization.
1. resnet34 fp16 from_pretrained: using just 5000 images for train/valid and 256 batchsize, trains way faster (1:17 minutes per epoch). best val_acc = 0.82 ... Again seems to hit some limit (12 epochs)
1. resnet50, fp16, pretrained: 5000 imgs, 0.83 valid accuracy
1. VGG16, pretrained, 224, fp16: goes up to 0.8575 val accuracy (20 epochs). 
1. VGG16, pretrained, 448, fp16, half examples: Starts learning quickly. Best val acc = 0.8775 
1. VGG16, pretrained, 448, fp16, full examples: Starts learning quickly. Best val acc = ~0.87, no noticeable improvement....
1. VGG19, pretrained, 448, fp16, half examples, 10epochs: best acc=0.8835
1. ResNet101, pretrained, 448, fp16, halfexamples, 10epochs: best acc= 0.878
