In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

### Fast AI setup

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai import *
from fastai.vision import *
import torch

### First look at the Data

In [None]:
path = Path('../input/aptos2019-blindness-detection')
path_train = path/'train_images'
path_test = path/'test_images'
path, path_train, path_test

In [None]:
labels = pd.read_csv(path/'train.csv')
labels.head()

In [None]:
img = open_image(path_train/'000c1434d8d7.png')
img.show(figsize = (7,7))
print(img.shape)

In [None]:
# Distribution of the 5 diagnosis categories
labels['diagnosis'].value_counts().plot(kind = 'bar', title='Distribution of diagnosis categories')
plt.show()

The non-uniform distribution of data in our training set can be easily observed

### Creating a DataBunch

In [None]:
# Apply data augmentation to the images
tfms = get_transforms(
    do_flip=True,
    flip_vert=True,
    max_warp=0.1,
    max_rotate=360.,
    max_zoom=1.1,
    max_lighting=0.1,
    p_lighting=0.5
)

In [None]:
# Applying aptos19 normalization and standard deviation stats, from a pre-trained model found on a kaggle kernel
aptos19_stats = ([0.42, 0.22, 0.075], [0.27, 0.15, 0.081])

In [None]:
test_labels = pd.read_csv(path/'sample_submission.csv')
test = ImageList.from_df(test_labels, path = path_test, suffix = '.png')

In [None]:
src = (ImageList.from_df(labels, path = path_train, suffix = '.png')
       .split_by_rand_pct(seed = 42)
       .label_from_df(cols = 'diagnosis')
       .add_test(test))

In [None]:
data = (
    src.transform(
        tfms,
        size = 512, 
        resize_method=ResizeMethod.SQUISH,
        padding_mode='zeros'
    )
    .databunch(bs=8)
    .normalize(aptos19_stats))

In [None]:
data

In [None]:
data.show_batch(3, figsize = (7,7))

In [None]:
print(data.classes)
print(len(data.train_ds))
print(len(data.valid_ds))
print(len(data.test_ds))

### Setting up the Model

In [None]:
!mkdir -p /tmp/.cache/torch/checkpoints
!cp ../input/resnet34/resnet34.pth /tmp/.cache/torch/checkpoints/resnet34-333f7ec4.pth
!cp ../input/resnet152/resnet152.pth /tmp/.cache/torch/checkpoints/resnet152-b121ed2d.pth

In [None]:
kappa = KappaScore()
kappa.weights = "quadratic"

In [None]:
learn = cnn_learner(
    data, 
    models.resnet152, 
    metrics = [accuracy, kappa], 
    model_dir = Path('../kaggle/working'),
    path = Path(".")
)

In [None]:
learn.fit_one_cycle(3)

In [None]:
learn.save('resnet152-1')

### Unfreeze and Learn some more

In [None]:
learn.load('resnet152-1');

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(3, slice(1e-6,1e-5))

In [None]:
learn.save('resnet152-2')

In [None]:
learn.load('resnet152-2');

In [None]:
# learn.export()

### Double the size of images

In [None]:
data = (
    src.transform(
        tfms,
        size = 1024, 
        resize_method=ResizeMethod.SQUISH,
        padding_mode='zeros'
    )
    .databunch(bs=4)
    .normalize(aptos19_stats))

In [None]:
learn.data = data

In [None]:
learn.freeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(4, 2e-4)

In [None]:
learn.save('resnet152-3')

In [None]:
learn.load('resnet152-3');

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(6, 2e-5)

In [None]:
learn.save('resnet152-4')

In [None]:
learn.load('resnet152-4');

### Get Predictions

In [None]:
learn.load('resnet152-4');

In [None]:
preds, _ = learn.get_preds(ds_type=DatasetType.Test)

### Preparing Submission

In [None]:
submission = pd.read_csv(path/'sample_submission.csv')
submission.head()

In [None]:
preds = np.array(preds.argmax(1)).astype(int).tolist()
preds[:5]

In [None]:
submission['diagnosis'] = preds
submission.head()

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
submission.to_csv('submission.csv', index = False)