In [None]:
import numpy as np
import pandas as pd

In [None]:
from fastai.vision.all import *

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append("../input/fastai2-offline/wwf-0.0.3-py3-none-any.whl")

In [None]:
import timm
from wwf.vision.timm import *

In [None]:
path = Path("../input/cassava-leaf-disease-classification")
path.ls()

In [None]:
set_seed(2021)

In [None]:
train_df = pd.read_csv(path/"train.csv")
train_df.head()

In [None]:
item_tfms = RandomResizedCrop(224, min_scale=0.75, ratio=(1.,1.))
batch_tfms = [*aug_transforms(size=224, max_warp=0), Normalize.from_stats(*imagenet_stats)]

dls = ImageDataLoaders.from_df(
    df=train_df,
    folder=path/"train_images",
    item_tfms=item_tfms,
    batch_tfms=batch_tfms,
    splitter=RandomSplitter(valid_pct=0.1),
)
dls.show_batch()

In this competition, we are not allowed to use Internet.  
However, cnn_learner() automatically tries to download resnet model via Internet.  
To avoid this, add data(resnet model) from here(https://www.kaggle.com/pytorch/resnet18 or https://www.kaggle.com/pytorch/resnet34),  
and copy it to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth  

In [None]:
!mkdir -p /root/.cache/torch/hub/checkpoints
# !cp ../input/pytorch-pretrained-models/xception-43020ad28.pth /root/.cache/torch/hub/checkpoints/xception-43020ad28.pth
# !cp ../input/pytorch-pretrained-models/inceptionresnetv2-520b38e4.pth /root/.cache/torch/hub/checkpoints/inception_resnet_v2-940b1cd6.pth
# !cp ../input/pytorch-pretrained-models/densenet121-fbdb23505.pth /root/.cache/torch/hub/checkpoints/densenet121_ra-50efcf5c.pth
# !cp ../input/pytorch-pretrained-models/inceptionv4-8e4777a0.pth /root/.cache/torch/hub/checkpoints/inceptionv4-8e4777a0.pth
!cp ../input/timm-pretrained-efficientnet/efficientnet/efficientnet_b3_ra-a5e2fbc7.pth /root/.cache/torch/hub/checkpoints/efficientnet_b3_ra2-cf984f9c.pth

In [None]:
train_df["label"].value_counts()


In [None]:
from sklearn.utils import class_weight

class_weights = [train_df["label"].value_counts()[i] for i in range(5)]
class_weights


In [None]:
weights = len(train_df) / torch.tensor(class_weights).cuda()
weights

In [None]:
# learn = cnn_learner(
#     dls, 
#     resnet34, 
#     metrics=accuracy,
#     loss_func=CrossEntropyLossFlat(weight=weights),
#     opt_func=ranger,    
#     ps=0.8,
# ).to_fp16()

In [None]:
timm.list_models("efficientnet*")

In [None]:
learn = timm_learner(
    dls,
    "efficientnet_b3",
     metrics=accuracy,
    loss_func=CrossEntropyLossFlat(weight=weights),
#     loss_func=LabelSmoothingCrossEntropy(),
#     opt_func=ranger,    
).to_fp16()

In [None]:
import gc 
gc.collect()

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(
    25,
    1e-1,
    cbs=[
        SaveModelCallback(),
        EarlyStoppingCallback(patience=10),
        GradientAccumulation(),
#         MixUp(),
    ],
    freeze_epochs=5,
)

In [None]:
# def fit_with(wd):
#     learn = cnn_learner(
#         dls, 
#         resnet50, 
#         metrics=accuracy,
#         opt_func=ranger,    
#         wd=wd,
#         loss_func=CrossEntropyLossFlat(weight=weights)
#     ).to_fp16()
            
#     learn.fit_one_cycle(
#         10, 
#         slice(1e-6, 1e-4), 
#     )
        
#     acc = learn.validate()[1]
        
#     return acc

# pbounds = {
#     "wd": (0.01, 10)
# }

# optimizer = BayesianOptimization(
#     f=fit_with,
#     pbounds=pbounds,
#     verbose=1,
# )

# optimizer.maximize()

# optimizer.max

In [None]:
learn.save("fine_tune_5+25")

In [None]:
learn.load("fine_tune_5+25")

In [None]:
learn.recorder.plot_loss()

In [None]:
learn.show_results()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(9)

In [None]:
interp.plot_confusion_matrix()

In [None]:
submission_df = pd.read_csv(path/"sample_submission.csv")
submission_df.head()

In [None]:
test_image_path_series = submission_df["image_id"].apply(lambda x: path/'test_images'/x)
test_image_path_series.head()

In [None]:
test_dl = learn.dls.test_dl(test_image_path_series)

In [None]:
preds, _ = learn.get_preds(dl=test_dl)

In [None]:
result = np.argmax(preds, axis=1)
submission_df["label"] = result
submission_df.head()

In [None]:
submission_df.to_csv("submission.csv", index=False)