In [None]:
import numpy as np
import pandas as pd
from fastai.vision import *

In [None]:
path = Path('../input')

In [None]:
path.ls()

In [None]:
get_image_files(path/'train-jpg')[:5]

In [None]:
df = pd.read_csv(path/'train_v2.csv')
df.head()

In [None]:
np.random.seed(42)
size = 224
bs = 64
num_workers = 0  # set this to 2 to prevent kernel from crashing

In [None]:
tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

In [None]:
src = (ImageItemList.from_csv(path, 'train_v2.csv', folder='train-jpg', suffix='.jpg')
       .random_split_by_pct()
       .label_from_df(sep=' ')
       .add_test_folder('test-jpg-v2'))

In [None]:
data = (src.transform(tfms, size=size)
        .databunch(bs=bs, num_workers=num_workers)
        .normalize(imagenet_stats))

Verify datasets loaded properly.  We should have the following:
* train: 32,384
* valid: 8,095
* test: 61,191

In [None]:
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

In [None]:
data.classes

In [None]:
data.show_batch(rows=3, figsize=(7,6))

In [None]:
arch = models.resnet50
acc = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)

In [None]:
learn = create_cnn(data, arch, metrics=[acc, f_score], model_dir='/tmp/models')

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
lr = 1e-2

In [None]:
learn.fit_one_cycle(4, slice(lr))

In [None]:
learn.save('stage-1')

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(2, max_lr=slice(1e-6,1e-4))

In [None]:
learn.save('stage-2')

In [None]:
learn.recorder.plot_losses()

In [None]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
preds[:5]

In [None]:
thresh = 0.2
labelled_preds = [' '.join([learn.data.classes[i] for i,p in enumerate(pred) if p > thresh]) for pred in preds]

In [None]:
labelled_preds[:5]

In [None]:
submission = pd.DataFrame({'image_name':os.listdir('../input/test-jpg-v2'), 'tags':labelled_preds})

In [None]:
submission['image_name'] = submission['image_name'].map(lambda x: x.split('.')[0])

In [None]:
submission = submission.sort_values('image_name')

In [None]:
submission[:5]

In [None]:
submission.to_csv('submission.csv', index=False)