In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
%matplotlib inline

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
%reload_ext autoreload
%autoreload 2

import os
import cv2
import random
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.metrics import cohen_kappa_score

import torch
from fastai.vision import *

In [None]:
SEED = 1234
SIZE = 224

PATH = "../input/aptos2019-blindness-detection"

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(SEED)

In [None]:
!mkdir -p /tmp/.cache/torch/checkpoints/
!cp ../input/resnet50/resnet50.pth /tmp/.cache/torch/checkpoints/resnet50-19c8e357.pth

In [None]:
df_train = pd.read_csv(PATH+"/train.csv")
df_test = pd.read_csv(PATH+"/sample_submission.csv")

In [None]:
train = ImageList.from_df(df_train, path=PATH, cols='id_code', folder="train_images", suffix='.png')
test = ImageList.from_df(df_test, path=PATH, cols='id_code', folder="test_images", suffix='.png')

In [None]:
def quadratic_kappa(y_hat, y):
    return torch.tensor(cohen_kappa_score(torch.round(y_hat), y, weights='quadratic'),device='cuda:0')

In [None]:
#preprocessing using ben's circular + local average pixel value subtraction

def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (SIZE, SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
        
    return image

#ben's preprocessing
###This is a function I used from a super helpful kernel 
###to crop the images to remove the unneccessary black borders.
def crop_image_from_gray(img,tol=7):
    """
    Crop out black borders
    https://www.kaggle.com/ratthachat/aptos-updated-preprocessing-ben-s-cropping
    """  
    
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        mask = gray_img>tol        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0):
            return img
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img


def circle_crop(img):   
    """
    Create circular crop around image centre    
    """    
    #img = cv2.imread(img)
    img = crop_image_from_gray(img)    
    
    height, width, depth = img.shape    
    
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
#     img = crop_image_from_gray(img)
    
    return img 

def local_avg(img):
    #blurring
    blurred = cv2.blur(img, ksize=(15, 15))
    
    # Take the difference with the original image
    # Weight with a factor of 4x to increase contrast 
    #change '4' to different values to view different preprocessed images
    dst = cv2.addWeighted(img, 4, blurred, -4, 128)
    return dst

In [None]:
#def crop_image(img,tol=7):        
#    mask = img>tol
#    return img[np.ix_(mask.any(1),mask.any(0))]

#version 11 ----- ben's +  circular + local avg
def open_aptos2019_image(fn, convert_mode, after_open)->Image:
    image = load_ben_color(fn)
#     image = cv2.resize(image, (SIZE, SIZE))
    image = local_avg(image)
    image = circle_crop(image)
    return Image(pil2tensor(image, np.float32).div_(255))

vision.data.open_image = open_aptos2019_image

In [None]:
data = (
    train.split_by_rand_pct(0.20, seed=SEED)
    .label_from_df(cols='diagnosis', label_cls=FloatList)
    .add_test(test)
    .transform(get_transforms(), size=SIZE)
    .databunch(path=Path('.'), bs=64).normalize(imagenet_stats)
)
data.show_batch(rows=3, figsize=(7,6))

In [None]:
#finding a good learning rate
learn = cnn_learner(data, models.resnet50, metrics=[quadratic_kappa], pretrained=True)
learn.lr_find()
learn.recorder.plot(suggestion=True)

In [None]:
learn.fit_one_cycle(20, 1e-2)
learn.recorder.plot_losses()
learn.recorder.plot_metrics()

In [None]:
valid_preds, valid_y = learn.get_preds(ds_type=DatasetType.Valid)
test_preds, _ = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [None]:
optR = OptimizedRounder()
optR.fit(valid_preds, valid_y)
coefficients = optR.coefficients()

valid_predictions = optR.predict(valid_preds, coefficients)[:,0].astype(int)
test_predictions = optR.predict(test_preds, coefficients)[:,0].astype(int)

valid_score = cohen_kappa_score(valid_y.numpy().astype(int), valid_predictions, weights="quadratic")

In [None]:
print("coefficients:", coefficients)
print("validation score:", valid_score)

In [None]:
df_test.diagnosis = test_predictions
df_test.to_csv("submission.csv", index=None)
df_test.head()

In [None]:
df_test.diagnosis.hist();