In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# Any results you write to the current directory are saved as output.

In [None]:
from fastai import *
from fastai.vision import *

In [None]:
path = Path("/kaggle/input/aptos2019-blindness-detection")
train_path = path/'train_images'
test_path = path/'test_images'
working_path = Path("/kaggle/working")
output_path = Path("/kaggle/output")

In [None]:
Path('/tmp/.cache/torch/checkpoints/').mkdir(exist_ok=True, parents=True)
model_path = '/tmp/.cache/torch/checkpoints/resnet152-b121ed2d.pth'
!cp ../input/resnet152/resnet152-b121ed2d.pth {model_path}

In [None]:
kappa = KappaScore()
kappa.weights = "quadratic"

In [None]:
data = (ImageList.from_csv(path, csv_name='train.csv', folder='train_images', suffix='.png')
                .split_by_rand_pct(valid_pct=0.2, seed=42)
                .label_from_df()
                .transform(get_transforms(flip_vert=True, max_rotate=360.0, max_warp=0.1), size=224)
                .databunch(bs=16, num_workers=os.cpu_count())
                .normalize())

In [None]:
data.show_batch(rows=3, figsize=(7,8))

In [None]:
learn = cnn_learner(data, models.resnet152, metrics=[error_rate, kappa])

In [None]:
learn.model_dir = '/kaggle/working'

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, 1e-3)

In [None]:
learn.save('stage-1-224')

In [None]:
learn.load('stage-1-224')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, max_lr=slice(1e-6, 1e-4))

In [None]:
learn.save('stage-2-224')

In [None]:
data = (ImageList.from_csv(path, csv_name='train.csv', folder='train_images', suffix='.png')
                .split_by_rand_pct(valid_pct=0.2, seed=42)
                .label_from_df()
                .transform(get_transforms(flip_vert=True, max_rotate=360.0, max_warp=0.1), size=448)
                .databunch(bs=16, num_workers=os.cpu_count())
                .normalize())

In [None]:
learn = cnn_learner(data, models.resnet152, metrics=[error_rate, kappa])

In [None]:
learn.model_dir = '/kaggle/working'

In [None]:
learn.load('stage-2-224')

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(3, 1e-3)

In [None]:
learn.save('stage-1-448')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(3, max_lr=slice(4e-06, 4e-4))

In [None]:
learn.save('stage-2-448')

In [None]:
sample_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
sample_df.head()

In [None]:
learn.data.add_test(ImageList.from_df(sample_df, path, folder='test_images', suffix='.png'))

In [None]:
import numpy as np
import pandas as pd
import os
import scipy as sp
from functools import partial
from sklearn import metrics
from collections import Counter
import json

In [None]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = metrics.cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [None]:
val_preds, targets = learn.get_preds(DatasetType.Valid)

In [None]:
_ , val_index = val_preds.max(1)

In [None]:
test_preds, y = learn.get_preds(DatasetType.Test)

In [None]:
_ , test_index = test_preds.max(1)

In [None]:
optR = OptimizedRounder()
optR.fit(val_index, targets)
coefficients = optR.coefficients()
val_index = optR.predict(val_index, coefficients)

In [None]:
test_index = optR.predict(test_index, coefficients)
sample_df.diagnosis = test_index.astype(int)
sample_df.to_csv('submission.csv',index=False)