In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai.vision import *

In [None]:
path = Path('/kaggle/input/dog-breed-identification')

In [None]:
path.ls()

In [None]:
df = pd.read_csv(path/'labels.csv')
df.head()

In [None]:
np.random.seed(42) # set random seed so we always get the same validation set
src = (ImageList.from_csv(path, 'labels.csv', folder='train', suffix='.jpg')
                .split_by_rand_pct(0.2)
                .label_from_df(cols='breed')
                .add_test_folder(test_folder = 'test'))

In [None]:
# Data augmentation
tfms = get_transforms(max_rotate=20, max_zoom=1.3, max_lighting=0.4, max_warp=0.4, p_affine = 1., p_lighting=1.)

In [None]:
# Starting with smaller size 224x224 before using the full size 352x352
bs, size = 64, 224

In [None]:
data = src.transform(tfms, size=size).databunch(bs=bs).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3, figsize=(12, 9))

In [None]:
# Visualizing transformations
def _plot(i,j,ax):
    x,y = data.train_ds[4]
    x.show(ax, y=y)
    
plot_multi(_plot, 3, 3, figsize=(8,8))

In [None]:
arch = models.resnet50
metrics = [error_rate, accuracy]

In [None]:
learn = cnn_learner(data, arch, metrics=metrics)

In [None]:
learn.model_dir = '/kaggle/working'

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
min_grad_lr = learn.recorder.min_grad_lr
print(min_grad_lr)

In [None]:
learn.fit_one_cycle(3, slice(min_grad_lr))

In [None]:
learn.save('stage-1-224-rn50')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
min_grad_lr = learn.recorder.min_grad_lr
print(min_grad_lr)

In [None]:
learn.fit_one_cycle(3, slice(min_grad_lr))

In [None]:
learn.save('stage-2-224-rn50')

In [None]:
learn.recorder.plot_losses()

In [None]:
# Interpret the result
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(4, figsize=(12, 9))

In [None]:
interp.most_confused(min_val=2)

In [None]:
# Make predictions of the test folder 
predictions, targets = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
# Make predictions on the first 9 images
classes = predictions.argmax(1)
class_dict = dict(enumerate(learn.data.classes))
labels = [class_dict[i] for i in list(classes[:9].tolist())]
test_images = [i.name for i in learn.data.test_ds.items][:9]

In [None]:
plt.figure(figsize=(12,9))

for i, fn in enumerate(test_images):
    img = plt.imread(path/'test'/fn, 0)
    plt.subplot(3, 3, i+1)
    plt.imshow(img)
    plt.title(labels[i])
    plt.axis("off")

In [None]:
from sklearn.metrics import log_loss
preds, y = learn.TTA()
print(accuracy(preds,y))
print(log_loss(y, preds))

In [None]:
# Save the predictions into "submission.csv" file

preds_test, y_test = learn.TTA(ds_type=DatasetType.Test)

df = pd.DataFrame(array(preds_test))
df.columns = data.classes

# Extract the id name from the file name
df.insert(0, "id", [str(pth).split('/')[5][:-4] for pth in data.test_ds.items])

df.to_csv("submission.csv", index=False)

In [None]:
# Double check the submission
df.head()