## Creating a fastai pipeline

Adapted from: https://www.kaggle.com/slm37102/cassava-leaf-disease-classification-fastai

## Initial setup

In [None]:
!pip install -Uqq fastai

In [None]:
import pandas as pd
from fastai.vision.all import *

import os

In [None]:
try:
    print(torch.cuda.get_device_name(0))
except Exception as e:
    print(e)
    print("Please enable gpu to run. Or comment this cell.")


## Import train data

In [None]:
data_path = Path("../input/plant-pathology-2021-fgvc8")

In [None]:
str(data_path)

In [None]:
train_df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")
train_df

So the labels are verbose and sometimes multiple labels are also present in the data.

## Create Dataloaders

In [None]:
def get_x(r):
    return data_path/'train_images'/r['image']

def get_y(r):
    return r['labels'].split(' ')

In [None]:
def get_data(size=224, bs=128, data_df=train_df):
    dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                       splitter=RandomSplitter(seed=42),
                       get_x=get_x,
                       get_y=get_y,
                       item_tfms=RandomResizedCrop(128, min_scale=0.35),
                       batch_tfms = [*aug_transforms(size=size, flip_vert=True), Normalize.from_stats(*imagenet_stats)])
    return dblock.dataloaders(data_df, bs=bs)

## Lets take a look

In [None]:
dls = get_data()
dls.show_batch()

## Creating a learner

In [None]:
learn = cnn_learner(dls, resnet18, metrics=partial(accuracy_multi, thresh=0.2))

In [None]:
learn.model = learn.model.cuda()

In [None]:
# learn.lr_find()

In [None]:
try:
    learn.fine_tune(2, base_lr=1e-2, freeze_epochs=4)
except Exception as e:
    print(e)

## Make Submission file

In [None]:
submission_df = pd.read_csv(data_path/'sample_submision.csv')
submission_df

## prediction using TTA

In [None]:
test_data_path = submission_df["image"].apply(lambda x: data_path/'test_images'/x)
tst_dl = learn.dls.test_dl(test_data_path)
predictions = learn.tta(dl=tst_dl, n=10, beta=0)

print(predictions)

submission_df['label'] = np.argmax(predictions[0], axis=1)
submission_df

In [None]:
submission_df.to_csv('submission.csv', index=False)

In [None]:
learn.show_results()