In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Loading the required libraries

Cudnn benchmark helps boost the training speed when inputs of similar sizes are used

In [None]:
from fastai.script import *
from fastai.vision import *
from fastai.callbacks import *
from fastai.distributed import *
from fastprogress import fastprogress
from torchvision.models import *
from fastai.vision.models.xresnet import *
from fastai.vision.models.xresnet2 import *
torch.backends.cudnn.benchmark = True

Creating directory to save model weights later on

In [None]:
np.random.seed(42)
!mkdir ../modeldata

In [None]:
!ls ../input/planet-understanding-the-amazon-from-space/

In [None]:
path = Path('../input/planet-understanding-the-amazon-from-space')

In [None]:
path.ls()

This is a MultiClass Classification Problem as each data has multiple class labels

In [None]:
df = pd.read_csv(path/'train_v2.csv')
df.head()

Loading data using Fastai Dataloader

In [None]:
def get_data(path, size, bs, workers=None):
    if workers is None: workers = min(8, num_cpus())
    tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)
    return (ImageList.from_csv(path, 'train_v2.csv', folder='train-jpg', suffix='.jpg')
            .split_by_rand_pct(0.2)
            .label_from_df(label_delim=' ')
            .add_test_folder('test-jpg-v2')
            .transform(tfms, size=size)
            .databunch(bs=bs, num_workers=workers)
            .normalize(imagenet_stats))

In [None]:
data = get_data(path, 128, 64)

In [None]:
data.show_batch(rows=3, figsize=(12,9))

We use accuracy_thresh instead of accuracy. accuracy_thresh selects the classes that are above a certain threshold (0.5 by default) and compares them to the ground truth.

As for Fbeta, it's the metric that was used by Kaggle on this competition.

In [None]:
acc_02 = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
metrics = [acc_02, f_score]

In [None]:
arch = resnet50

In [None]:
learn = cnn_learner(data, arch, wd=1e-2,
             metrics=metrics,
             bn_wd=False, true_wd=True,
             #loss_func = LabelSmoothingCrossEntropy()
            )
m = globals()['xresnet50']
learn = Learner(data, m(c_out=data.c), wd=1e-2,
        metrics=metrics,
        bn_wd=False, true_wd=True,
    )
learn.model_dir = "../modeldata"

We use the LR Finder to pick a good learning rate.

In [None]:
learn.lr_find()
learn.recorder.plot()

Now, we fit the head of our network

In [None]:
learn.fit_one_cycle(5, slice(0.02))

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

And fine-tune the whole model:

In [None]:
learn.fit_one_cycle(5, 1e-4)

In [None]:
learn.save('save1')

Now we increase the image size to allow the model to learn more details from the dataset

In [None]:
learn.data = get_data(path, 256, 64)

In [None]:
learn.freeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(10, 1e-3)

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(5, 1e-4)

In [None]:
learn.save('save2')

Test Time augmentations allow us to perform data augmentation during testing phase and then give an average prediction score for the classes which would be better than using the original image in most of the cases

In [None]:
#preds, _ = learn.get_preds(ds_type=DatasetType.Test)
preds, _ = learn.TTA(ds_type=DatasetType.Test)

While predicting labels we set the threshold as 0.2 which was earlier used for training the model

In [None]:
thresh = 0.2
labelled_preds = [' '.join([learn.data.classes[i] for i,p in enumerate(pred) if p > thresh]) for pred in preds]

In [None]:
labelled_preds[:5]

In [None]:
fnames = [f.name[:-4] for f in learn.data.test_ds.items]
df = pd.DataFrame({'image_name':fnames, 'tags':labelled_preds}, columns=['image_name', 'tags'])

In [None]:
df.head()

In [None]:
df.to_csv('submission.csv', index=False)

In [None]:
from IPython.display import FileLinks
FileLinks('.')