## Beginner notebook for a baseline using fastai 

* [EDA](#EDA)
* [DataBlock & dataloaders](#section-two)
* [Training a model](#section-3)
* [Predict & submit](#submit)

In [None]:
#!pip install -qq fastai --upgrade

In [None]:
from fastai.vision.all import *
print(torch.__version__)
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0), torch.cuda.is_available())

import fastai; print(fastai.__version__)
!nvcc --version

### EDA <a id="EDA"></a>

In [None]:
import os
for root, dirs, files in os.walk('/kaggle/input', topdown=False):
    for name in dirs: print (os.path.join(root, name))

In [None]:
Path.cwd()

In [None]:
!tree ../input/ -d

In [None]:
DATA_DIR = Path("../input/cassava-leaf-disease-classification")
TRAIN_DIR = DATA_DIR/"train_images"
TEST_DIR = DATA_DIR/"test_images"
Path.BASE_PATH = DATA_DIR
DATA_DIR, TRAIN_DIR, TEST_DIR

In [None]:
train_df = pd.read_csv(DATA_DIR/"train.csv")
train_df.head(2)

In [None]:
train_df['image_id'] = train_df['image_id'].apply(lambda x: f'train_images/{x}')
train_df.head(5)

In [None]:
idx2lbl = {0:"Cassava Bacterial Blight (CBB)",
          1:"Cassava Brown Streak Disease (CBSD)",
          2:"Cassava Green Mottle (CGM)",
          3:"Cassava Mosaic Disease (CMD)",
          4:"Healthy"}

train_df['label'].replace(idx2lbl, inplace=True)
train_df.head(5)

In [None]:
train_df.label.value_counts()

In [None]:
TRAIN_DIR.ls()

In [None]:
get_image_files(TRAIN_DIR)

In [None]:
train_df.shape

## Creating DataBlock and a dataloader <a id="section-two"></a>

In [None]:
# When using file path for inputs when creating datasets from a DataBlock:
def get_label(path):
    fname = path.name
    row = train_df.loc[train_df.image_id == str("train_images/" + fname)]
    value = row.label.values[0]
    return (value)

# When using pandas datafragme as input
def get_x(row): return DATA_DIR / row.image_id
def get_y(row): return row.label

In [None]:
get_image_files(TRAIN_DIR)[0:5].map(get_label)

In [None]:
datablock = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_x=lambda row: DATA_DIR / row.image_id, #get_image_files, 
    get_y=lambda row: row.label, #get_label,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    item_tfms=Resize(460),
    batch_tfms=[RandomResizedCropGPU(224), *aug_transforms(size = 224, min_scale = 0.75), Normalize.from_stats(*imagenet_stats)]
)

In [None]:
dls = datablock.dataloaders(train_df)
len(dls.train.dataset), len(dls.valid.dataset)

In [None]:
dls.show_batch(figsize=(15,10))

## Training  <a id="section-3"></a>

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
        os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/resnet50/resnet50.pth' '/root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth'

In [None]:
learner = cnn_learner(dls, resnet151, loss_func=CrossEntropyLossFlat(), metrics=accuracy)
print("cbs: ", learner.cbs)
print("loss: ", learner.loss_func)

In [None]:
#learner.lr_find(num_it=100)

In [None]:
! pip install wandb --upgrade -qq

In [None]:
from fastai.callback.wandb import *
import wandb
run_name = "resnet151 5-10 wd+CutMix"
settings = wandb.Settings(silent="True")
run = wandb.init(name=run_name, project='Kaggle - Cassava Leaf Disease Classification', settings=settings)

In [None]:
lr = 0.01
wd = 0.01
freeze_epochs = 5
epochs = 10

cbs = [WandbCallback(), CutMix(1.)]
#cbs = [WandbCallback()]

learner.fine_tune(epochs, base_lr = lr, wd=wd, freeze_epochs=freeze_epochs, cbs=cbs)

In [None]:
#learner.summary()

## Predict and submit <a id="submit"></a>

In [None]:
get_image_files(TEST_DIR)

In [None]:
sample_df = pd.read_csv(DATA_DIR/'sample_submission.csv')
sample_df.head()

In [None]:
sample_copy = sample_df.copy()
sample_copy['image_id'] = sample_copy['image_id'].apply(lambda x: f'test_images/{x}')
sample_copy.head()

In [None]:
test_dl = learner.dls.test_dl(sample_copy)
test_dl.show_batch()

In [None]:
preds, _ = learner.tta(dl=test_dl)
preds

In [None]:
sample_df['label'] = preds.argmax(dim=-1).numpy()

In [None]:
sample_df.head()

In [None]:
sample_df.to_csv('submission.csv',index=False)