<a href="https://www.kaggle.com/code/arminatc/sorghum-fastai?scriptVersionId=94274462" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
!pip install -Uqq fastbook
import fastbook

In [None]:
from fastbook import *

In [None]:
!pip install fastai==2.6

In [None]:
import fastai

In [None]:
print(fastai.__version__ )

In [None]:
from fastai.vision.all import *

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import tqdm
mpl.rcParams["figure.figsize"] = (18,12)

In [None]:
pd.set_option('display.max_columns', 150)
pd.set_option('display.max_rows', 150)

In [None]:
torch.cuda.set_device(0)

In [None]:
def seed_everything(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything()

In [None]:
path = Path('./sorghum-id-fgvc-9');
path.ls()

# Data exploration

In [None]:
sorghum_df = pd.read_csv(path/'train_cultivar_mapping.csv');
sorghum_df.head()

In [None]:
allowed_image = []
for i in (path/'train_images').ls():
    va = i.name in sorghum_df.image.to_list()
    if va:
        allowed_image.append(i.name)

In [None]:
sorghum_df = sorghum_df[sorghum_df.image.isin(allowed_image)]

In [None]:
sorghum_df.cultivar.value_counts().plot(kind = 'bar')

# DataLoaders

In [None]:
dls = ImageDataLoaders.from_df(sorghum_df, path/'train_images', valid_pct=0.10,
                                   item_tfms=Resize(460),
                                   batch_tfms=[*aug_transforms(size=224, min_scale=0.75), Normalize.from_stats(*imagenet_stats)],
                                   bs=64, num_workers=4, label_col="cultivar")

In [None]:
dls.show_batch()

In [None]:
print(dls.vocab)
print(dls.c)

# Create the Learner

In [None]:
learn = cnn_learner(dls, resnet50, metrics=[error_rate, accuracy], model_dir="/tmp/model/").to_fp16()

In [None]:
def find_appropriate_lr(model:Learner, lr_diff:int = 15, loss_threshold:float = .05, adjust_value:float = 1, plot:bool = False) -> float:
    model.lr_find()
    
    losses = np.array(model.recorder.losses)
    assert(lr_diff < len(losses))
    loss_grad = np.gradient(losses)
    lrs = model.recorder.lrs
    
    r_idx = -1
    l_idx = r_idx - lr_diff
    while (l_idx >= -len(losses)) and (abs(loss_grad[r_idx] - loss_grad[l_idx]) > loss_threshold):
        local_min_lr = lrs[l_idx]
        r_idx -= 1
        l_idx -= 1

    lr_to_use = local_min_lr * adjust_value
    
    if plot:
        plt.plot(loss_grad)
        plt.plot(len(losses)+l_idx, loss_grad[l_idx],markersize=10,marker='o',color='red')
        plt.ylabel("Loss")
        plt.xlabel("Index of LRs")
        plt.show()

        plt.plot(np.log10(lrs), losses)
        plt.ylabel("Loss")
        plt.xlabel("Log 10 Transform of Learning Rate")
        loss_coord = np.interp(np.log10(lr_to_use), np.log10(lrs), losses)
        plt.plot(np.log10(lr_to_use), loss_coord, markersize=10,marker='o',color='red')
        plt.show()
        
    return lr_to_use

In [None]:
%time lr_to_use = find_appropriate_lr(learn)

In [None]:
lr_to_use

# Training

In [None]:
%time learn.fine_tune(15, lr_to_use)

In [None]:
learn.show_results()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(9, figsize=(15,10))

In [None]:
interp.plot_confusion_matrix(figsize=(20,20))

# Predictions

In [None]:
test_dl = dls.test_dl(get_image_files(path/'test'))

In [None]:
log_preds_test = learn.get_preds(dl=test_dl)
log_preds_test = np.argmax(log_preds_test[0], axis = 1)
preds_classes = [dls.vocab[i] for i in log_preds_test]
probs = np.exp(log_preds_test)

In [None]:
submission = pd.DataFrame({ 'filename': os.listdir(path/'test'), 'cultivar': preds_classes })

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index=False)