# Classifying rice deseases

solving Kaggle competition _paddy disease classification_ with fastai

## Setup

In [None]:
from fastai.vision.all import *
from fastcore.parallel import *
from pathlib import Path

In [None]:
ROOT_PATH = Path.cwd().parent
DATA_PATH = ROOT_PATH / "data" / "paddy_disease"
TRAIN_DATA_PATH = DATA_PATH / "train_images"
TEST_DATA_PATH = DATA_PATH / "test_images"

## Process data

In [None]:
trn_files = get_image_files(TRAIN_DATA_PATH)

In [None]:
img = PILImage.create(trn_files[-1])
img.to_thumb(256)

In [None]:
def get_img_size(img):
    return PILImage.create(img).size

In [None]:
size_counts = pd.Series(
    parallel(f=get_img_size, items=trn_files, n_workers=8)
).value_counts()

In [None]:
size_counts

In [None]:
data_loaders = ImageDataLoaders.from_folder(
    path=TRAIN_DATA_PATH,
    valid_pct=0.2,
    seed=1337,
    item_tfms=Resize(480, method="squish"),
    batch_tfms=aug_transforms(size=128, min_scale=0.75),
)

In [None]:
data_loaders.show_batch(max_n=3)

## Train model|

In [None]:
learner = vision_learner(
    dls=data_loaders, arch="resnet26d", metrics=error_rate
).to_fp16()

In [None]:
learner.lr_find(suggest_funcs=(valley, slide))

In [None]:
learner.fine_tune(4, 0.008)

## Generate predictions

In [None]:
tst_files = get_image_files(TEST_DATA_PATH).sorted()

In [None]:
tst_data_loaders = data_loaders.test_dl(tst_files)

In [None]:
probs, _, idxs = learner.get_preds(dl=tst_data_loaders, with_decoded=True)

In [None]:
idx_cat_mapping = dict(enumerate(data_loaders.vocab))

In [None]:
labels = pd.Series(idxs.numpy()).map(idx_cat_mapping)

## Submit to Kaggle

In [None]:
sample_sub = pd.read_csv(DATA_PATH / "sample_submission.csv")

In [None]:
sample_sub["label"] = labels

In [None]:
sample_sub.head()

In [None]:
sample_sub.to_csv(DATA_PATH / "resnet26d_sub.csv", index=False)