Load libraries

In [None]:
import numpy as np
import pandas as pd
import os
from fastai.vision import *

Look at dataset

In [None]:
path = Path('../input')

In [None]:
path.ls()

In [None]:
path_img = path/'train'

In [None]:
get_image_files(path_img)[:5]

Create datablock

In [None]:
np.random.seed(42)
size = 224
bs = 64
num_workers = 0  # set this to 0 to prevent kernel from crashing
pat = r'/([^/.]+).\d+.jpg$'

In [None]:
tfms = get_transforms()                              #Do standard data augmentation
data = (ImageItemList.from_folder(path_img)          #Get the training images from the train dir
        .random_split_by_pct()                       #Randomly split off 20% of the images to form validation set
        .label_from_re(pat)                          #Label by applying the regex to the filenames
        .add_test_folder('../test')                  #Add a test set using the test dir
        .transform(tfms, size=size)                  #Pass in data augmentation
        .databunch(bs=bs, num_workers=num_workers)   #Create ImageDataBunch
        .normalize(imagenet_stats))                  #Normalize using imagenet stats

Verify datasets loaded properly.  We should have the following:
* train: 20,000
* valid: 5,000
* test: 12,500

In [None]:
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

In [None]:
data.classes

Visualize data

In [None]:
data.show_batch(rows=3, figsize=(7,6))

Create learner using pre-trained model

In [None]:
learn = create_cnn(data, models.resnet50, metrics=accuracy, model_dir='/tmp/models')

Fit the model with most layers frozen

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.save('stage-1')

Unfreeze all layers, run learning rate finder, fit some more

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(2, max_lr=slice(1e-6,1e-4))

In [None]:
learn.save('stage-2')

Examine prediction errors in validation set

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
losses,idxs = interp.top_losses()
len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
interp.plot_confusion_matrix(figsize=(6,6), dpi=60)

In [None]:
interp.most_confused(min_val=2)

Create predictions using test set

In [None]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
dog_preds = preds[:,1]

Generate competition submission using predictions

In [None]:
submission = pd.DataFrame({'id':os.listdir('../input/test'), 'label':dog_preds})

In [None]:
submission['id'] = submission['id'].map(lambda x: x.split('.')[0])

In [None]:
submission['id'] = submission['id'].astype(int)

In [None]:
submission = submission.sort_values('id')

In [None]:
submission.to_csv('submission.csv', index=False)