Credit to Lex and his references
https://www.kaggle.com/lextoumbourou/blindness-detection-resnet34-ordinal-targets

In [None]:
%matplotlib inline  
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

# style.use('fivethirtyeight')
# sns.set(style='whitegrid',color_codes=True)

from sklearn.metrics import confusion_matrix
from fastai import *
from fastai.vision import *
from fastai.callbacks import *

# specifically for manipulating zipped images and getting numpy arrays of pixel values of images.
import cv2                  
import numpy as np  
from tqdm import tqdm
import os                   
from random import shuffle  
from zipfile import ZipFile
from PIL import Image
from sklearn.utils import shuffle

print(os.listdir("../input"))

In [None]:
# def seed_everything(seed):
#     random.seed(seed)
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True

# SEED = 88
# seed_everything(SEED)

In [None]:
# copy pretrained weights for resnet34 to the folder fastai will search by default
Path('/tmp/.cache/torch/checkpoints/').mkdir(exist_ok=True, parents=True)
!cp '../input/resnet50/resnet50.pth' '/tmp/.cache/torch/checkpoints/resnet50-19c8e357.pth'

In [None]:
df_train = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
df_test = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')

x_train = df_train['id_code']
y_train = df_train['diagnosis']

In [None]:
df_train.head()

In [None]:
df_train.diagnosis.hist()

In [None]:
import torch
import torch.utils.data
import torchvision

In [None]:
print(os.listdir("../input/aptos2019-blindness-detection/")) 

In [None]:
def get_label(diagnosis):
    return ','.join([str(i) for i in range(diagnosis + 1)])

In [None]:
df_train['label'] = df_train.diagnosis.apply(get_label)

In [None]:
df_train.head(10)
torch.cuda.manual_seed_all(13)

In [None]:
tfms = ([RandTransform(tfm=TfmCrop (crop_pad), kwargs={'row_pct': (0.4, 1), 'col_pct': (0.1, 0.9), 'padding_mode': 'reflection'}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmPixel (rgb_randomize), kwargs={'channel':2, 'thresh':0.1}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmPixel (rgb_randomize), kwargs={'channel':1, 'thresh':0.1}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (dihedral_affine), kwargs={}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (rotate), kwargs={'degrees': (-15.0, 15.0)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmAffine (zoom), kwargs={'scale': (1.0, 1.05), 'row_pct': (0, 1), 'col_pct': (0, 1)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (brightness), kwargs={'change': (0.4, 0.6)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True),
  RandTransform(tfm=TfmLighting (contrast), kwargs={'scale': (0.8, 1.25)}, p=0.75, resolved={}, do_run=True, is_random=True, use_on_y=True)],
 [RandTransform(tfm=TfmCrop (crop_pad), kwargs={}, p=1.0, resolved={}, do_run=True, is_random=True, use_on_y=True)])

In [None]:
# create image data bunch
data = ImageDataBunch.from_df('./', 
                              df=df_train, 
                              valid_pct=0.25,
                              folder="../input/aptos2019-blindness-detection/train_images",
                              suffix=".png",
                              ds_tfms=tfms,
                              size=224,
                              bs=156, 
                              num_workers=32,
                             label_col='label', label_delim=',').normalize(imagenet_stats)

In [None]:
# check classes
print(f'Classes: \n {data.classes}')

In [None]:
# show some sample images
# data.show_batch(rows=3, figsize=(7,6))

In [None]:
def get_preds(arr):
    mask = arr == 0
    return np.clip(np.where(mask.any(1), mask.argmax(1), 5) - 1, 0, 4)

In [None]:
last_output = torch.tensor([
    [1.7226, 1.7226, 1.7226, 1.7226, 1.7226],
    [0, 0, 0, 0, 1.7226],
    [0.12841, -7.6266, -6.3899, -2.1333, -0.48995],
    [0.68119, 1.7226, -1.9895, -0.097746, 0.53576]
])
arr = (torch.sigmoid(last_output) > 0.5).numpy(); arr

In [None]:
# Test output
assert (get_preds(arr) == np.array([4, 0, 0, 1])).all()

In [None]:
class ConfusionMatrix(Callback):
    "Computes the confusion matrix."

    def on_train_begin(self, **kwargs):
        self.n_classes = 0

    def on_epoch_begin(self, **kwargs):
        self.cm = None

    def on_batch_end(self, last_output:Tensor, last_target:Tensor, **kwargs):
        preds = torch.tensor(get_preds((torch.sigmoid(last_output) > 0.5).cpu().numpy()))
        
        targs = torch.tensor(get_preds(last_target.cpu().numpy()))

        if self.n_classes == 0:
            self.n_classes = last_output.shape[-1]
            self.x = torch.arange(0, self.n_classes)
        
        cm = ((preds==self.x[:, None]) & (targs==self.x[:, None, None])).sum(dim=2, dtype=torch.float32)
        if self.cm is None: self.cm =  cm
        else:               self.cm += cm

    def on_epoch_end(self, **kwargs):
        self.metric = self.cm
        

@dataclass
class KappaScore(ConfusionMatrix):
    "Compute the rate of agreement (Cohens Kappa)."
    weights:Optional[str]=None      # None, `linear`, or `quadratic`

    def on_epoch_end(self, last_metrics, **kwargs):
        sum0 = self.cm.sum(dim=0)
        sum1 = self.cm.sum(dim=1)
        expected = torch.einsum('i,j->ij', (sum0, sum1)) / sum0.sum()
        if self.weights is None:
            w = torch.ones((self.n_classes, self.n_classes))
            w[self.x, self.x] = 0
        elif self.weights == "linear" or self.weights == "quadratic":
            w = torch.zeros((self.n_classes, self.n_classes))
            w += torch.arange(self.n_classes, dtype=torch.float)
            w = torch.abs(w - torch.t(w)) if self.weights == "linear" else (w - torch.t(w)) ** 2
        else: raise ValueError('Unknown weights. Expected None, "linear", or "quadratic".')
        k = torch.sum(w * self.cm) / torch.sum(w * expected)
        return add_metrics(last_metrics, 1-k)

In [None]:
# from torch.utils.data.sampler import WeightedRandomSampler
# class OverSamplingCallback(LearnerCallback):
#     def __init__(self,learn:Learner):
#         super().__init__(learn)
#         self.labels = self.learn.data.train_dl.dataset.y.items
#         _, counts = np.unique(self.labels,return_counts=True)
#         self.weights = torch.DoubleTensor((1/counts)[self.labels])
#         self.label_counts = np.bincount([self.learn.data.train_dl.dataset.y[i].data \
#                                          for i in range(len(self.learn.data.train_dl.dataset))])
#         self.total_len_oversample = int(self.learn.data.c*np.max(self.label_counts))
        
#     def on_train_begin(self, **kwargs):
#         self.learn.data.train_dl.dl.batch_sampler = \
#         BatchSampler(WeightedRandomSampler(self.weights,self.total_len_oversample), self.learn.data.train_dl.batch_size,False)

In [None]:
import math
import torch
from torch.optim.optimizer import Optimizer, required
import itertools as it
#from torch.optim import Optimizer
#credit - Lookahead implementation from LonePatient - https://github.com/lonePatient/lookahead_pytorch/blob/master/optimizer.py
#credit2 - RAdam code by https://github.com/LiyuanLucasLiu/RAdam/blob/master/radam.py


class Ranger(Optimizer):
    
    def __init__(self, params, lr=1e-2, alpha=0.5, k=8, betas=(.9,0.999), eps=1e-8, weight_decay=0.1):
        #parameter checks
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        if not lr > 0:
            raise ValueError(f'Invalid Learning Rate: {lr}')
        if not eps > 0:
            raise ValueError(f'Invalid eps: {eps}')
        
        #prep defaults and init torch.optim base
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        super().__init__(params,defaults)
        
        #now we can get to work...
        for group in self.param_groups:
            group["step_counter"] = 0
            #print("group step counter init")
                      
        #look ahead params
        self.alpha = alpha
        self.k = k 
        
        #radam buffer for state
        self.radam_buffer = [[None,None,None] for ind in range(10)]
        
        #lookahead weights
        self.slow_weights = [[p.clone().detach() for p in group['params']]
                                for group in self.param_groups]
        
        #don't use grad for lookahead weights
        for w in it.chain(*self.slow_weights):
            w.requires_grad = False
        
    def __setstate__(self, state):
        print("set state called")
        super(Ranger, self).__setstate__(state)
       
        
    def step(self, closure=None):
        loss = None
        #note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.  
        #Uncomment if you need to use the actual closure...
        
        #if closure is not None:
            #loss = closure()
            
        #------------ radam
        for group in self.param_groups:
    
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')
    
                p_data_fp32 = p.data.float()
    
                state = self.state[p]
    
                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
    
                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']
    
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
    
                state['step'] += 1
                buffered = self.radam_buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma
                    if N_sma > 5:
                        step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = group['lr'] / (1 - beta1 ** state['step'])
                    buffered[2] = step_size
    
                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)
    
                if N_sma > 5:                    
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size, exp_avg)
    
                p.data.copy_(p_data_fp32)
        
        
        #---------------- end radam step
        
        #look ahead tracking and updating if latest batch = k
        for group,slow_weights in zip(self.param_groups,self.slow_weights):
            group['step_counter'] += 1
            if group['step_counter'] % self.k != 0:
                continue
            for p,q in zip(group['params'],slow_weights):
                if p.grad is None:
                    continue
                q.data.add_(self.alpha,p.data - q.data)
                p.data.copy_(q.data)
            
        
            
        return loss

optar = partial(Ranger)

In [None]:
kappa = KappaScore(weights="quadratic")

# build model (use resnet34)
learn = cnn_learner(data, models.resnet50, metrics=[kappa, accuracy_thresh], 
                   opt_func = optar,
                   callback_fns = [
                                partial(EarlyStoppingCallback, monitor='kappa_score', min_delta=0.001, patience=3),
                                partial(ReduceLROnPlateauCallback),
#                               partial(GradientClipping, clip=0.2),
                                partial(SaveModelCallback, every = 'improvement', monitor='kappa_score', name='bestordinal')],
                   model_dir="/tmp").to_fp16().mixup(stack_y=False)

In [None]:
learn.lr_find()
learn.recorder.plot()
lrs = learn.recorder.lrs
losses = learn.recorder.losses
mg = (np.gradient(np.array(losses))).argmin()
ml = np.argmin(losses[1:])
min_grad_lr = lrs[mg]
print(min_grad_lr)
min_loss_lr = lrs[ml]/10
print(min_loss_lr)

In [None]:
#first time learning
lr = min_loss_lr
learn.fit_one_cycle(10, lr)
torch.cuda.manual_seed_all(18)

In [None]:
# data = ImageDataBunch.from_df('./', 
#                               df=df_train, 
#                               valid_pct=0.2,
#                               folder="../input/aptos2019-blindness-detection/train_images",
#                               suffix=".png",
#                               ds_tfms=tfms,
#                               size=224,
#                               bs=128, 
#                               num_workers=32,
#                              label_col='label', label_delim=',').normalize(imagenet_stats)

In [None]:
# learn.data = data
# learn.to_fp16()
learn.unfreeze()
learn.lr_find
lrs = learn.recorder.lrs
losses = learn.recorder.losses
mg = (np.gradient(np.array(losses))).argmin()
ml = np.argmin(losses[1:])
min_grad_lr = lrs[mg]
print(min_grad_lr)
min_loss_lr = lrs[ml]/10
print(min_loss_lr)
lr2 = min_loss_lr
learn.unfreeze()
learn.fit_one_cycle(10, max_lr = lr2)

In [None]:
# #Adding Weight decay for regulization
# learn.fit_one_cycle(5,wd=1e-1)
# learn.save('stage-1')

In [None]:
# learn.unfreeze()
# learn.lr_find()
# learn.recorder.plot()

In [None]:
learn.load('bestordinal')

In [None]:
learn.freeze()
learn.fit_one_cycle(15, max_lr=lr2/50,wd=1e-1)

In [None]:
learn.load('bestordinal')
sample_df = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
sample_df.head()

In [None]:
learn.data.add_test(ImageList.from_df(sample_df,'../input/aptos2019-blindness-detection',folder='test_images',suffix='.png'))

In [None]:
preds, y = learn.get_preds(DatasetType.Test)

In [None]:
preds

In [None]:
sample_df.diagnosis = get_preds((preds > 0.5).cpu().numpy())
sample_df.diagnosis.value_counts()

In [None]:
sample_df.head(10)

In [None]:
sample_df.to_csv('submission.csv',index=False)

In [None]:
!mv {learn.model_dir}/*.pth .
os.listdir()