# **This is my very first Kaggle competition and notebook**

I re-worked this to use a custom resized dataset for training since the kernel was very slow on the full sized images.  I implemented on GCP with many different architectures including deeper resnets, ResNext, efficientnet and others both pretrained and not, but nothing performed any better than plain old resnet50

In [None]:
import pandas as pd
from fastai.vision.all import *

In [None]:
#check gpu install and availability
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.current_device())
!nvidia-smi

In [None]:
#we have no internet in the kernel so we will copy our pretrained resnet50 model to the torch cache folder fastai will search by default for use in training
!rm -rf /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
Path('/root/.cache/torch/hub/checkpoints/').mkdir(exist_ok=True, parents=True)
!cp '../input/resnet50-no-internet/resnet50-19c8e357.pth' '/root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth'
!ls -ltr /root/.cache/torch/hub/checkpoints/

In [None]:
#setup the config perhaps more useful for plain python than a notebook but it's a habit
INPUT_DIR = "/kaggle/input/plant-pathology-2021-fgvc8"
OUTPUT_DIR = "/kaggle/working"
PICKLE_LEARNER = "/kaggle/working/96acc-resnet50-lr3e-2.pkl"
SUBMISSION = "/kaggle/working/submission.csv"
TRAINING_DATA_DIRECTORY = "/kaggle/input/plant-pathology-2021-fgvc8-resized-600x400"
TEST_DATA_DIRECTORY = str(INPUT_DIR) + "/test_images"
LABELS_FILE = str(INPUT_DIR) + "/train.csv"

print(f"config path: {INPUT_DIR}")

print(f"labels file: {LABELS_FILE}")

In [None]:
df = pd.read_csv(LABELS_FILE)

In [None]:
#check the labels
df.head()

In [None]:
#load the data  we will use a custom resized dataset for faster training
dls = ImageDataLoaders.from_csv(INPUT_DIR, LABELS_FILE, folder=TRAINING_DATA_DIRECTORY, delimiter=',', label_delim=' ',
                               item_tfms=Resize(460), batch_tfms=[*aug_transforms(size=224),Normalize.from_stats(*imagenet_stats)], bs=32)

In [None]:
#look at our classes
dls.vocab

In [None]:
#look at a batch of images and labels (multiple classes)
dls.show_batch()

In [None]:
#create a trainer using resnet50 architecture using our no internet cached model
learn = cnn_learner(dls, resnet50, model_dir='/kaggle/working', metrics=partial(accuracy_multi, thresh=0.5))

In [None]:
#find a reasonable learning rate via this helpful fastai learning rate finder 
#learn.lr_find()

In [None]:
#train four epochs using fine_tune which trains the head initially and then all the other layers for four epochs
learn.fine_tune(4, 3e-2)

In [None]:
#save our model for inference later
learn.export(fname=PICKLE_LEARNER)

In [None]:
#load the pickled learner for inference
inf_learn = load_learner(PICKLE_LEARNER)

In [None]:
#check our classes
inf_learn.dls.vocab

In [None]:
#get our test data using the fastai convenience function
test_files = get_image_files(TEST_DATA_DIRECTORY)

In [None]:
#see how many test images we got
len(test_files)

In [None]:
#load our test dataset for inference
test_dl = inf_learn.dls.test_dl(test_files)

In [None]:
#show a batch
test_dl.show_batch()

In [None]:
#get our predictions on the test set
preds,_,dec_preds = inf_learn.get_preds(dl=test_dl, with_decoded=True)

In [None]:
#check out our predication and the decoded results
preds, dec_preds

In [None]:
#look at the first class predicted
#inf_learn.dls.vocab[dec_preds[0]]

In [None]:
#create a df for submission
sub_df = pd.DataFrame()

In [None]:
#save our predictions to the df for submission
img = []
lab = []
#loop through our test batch predictions to get our submission ready
for idx, item in enumerate(inf_learn.dl.items):
    preds = '';
    for pred in inf_learn.dls.vocab[dec_preds[idx]]:
        preds = preds + pred + ' '
    print(f"{item.name} : {preds}")
    lab.append(preds)
    img.append(item.name)
sub_df['image'] = img
sub_df['labels'] = lab
sub_df.head()

In [None]:
#save our submission csv
sub_df.to_csv(SUBMISSION, index=False)