In [None]:
import cv2
import matplotlib.pyplot as plt
from os.path import isfile
import torch.nn.init as init
import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
import os
from PIL import Image, ImageFilter
#print(os.listdir("../input"))
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim import Adam, SGD, RMSprop
import time
from torch.autograd import Variable
import torch.functional as F
from tqdm import tqdm
from sklearn import metrics
import urllib
import pickle
import cv2
import torch.nn.functional as F
from torchvision import models
#import seaborn as sns
import random
from apex import amp
import sys

In [None]:
from platform import python_version
print(python_version())

In [None]:
os.getcwd()

In [None]:
from efficientnet_pytorch import EfficientNet

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
num_classes = 1
seed_everything(1234)
lr          = 1e-4
efficientnet_arch = 'efficientnet-b4'
IMG_SIZE    = EfficientNet.get_image_size(efficientnet_arch)

In [None]:
print(IMG_SIZE)

In [None]:
train      = '/data/kaggle/aptos/train/images/'
train_2015      = '/data/kaggle/aptos/trainold/resized_train_cropped/'
#valid      = '/data/aptos/train/images/'
test       = '/data/kaggle/aptos/test/images/'

# train_csv  = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')

## train_csv = pd.read_csv("/data/aptos/train.csv")
train_csv = pd.read_csv("/data/kaggle/aptos/trainboth.csv")
#valdata = pd.read_csv("/data/aptos/valboth.csv")
test_df = pd.read_csv("/data/kaggle/aptos/test.csv")

In [None]:
# train      = '../input/aptos2019-blindness-detection/train_images/'
# test       = '../input/aptos2019-blindness-detection/test_images/'
# train_csv  = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')

In [None]:
train_df, val_df = train_test_split(train_csv, test_size=0.1, random_state=2018, stratify=train_csv.diagnosis)
train_df.reset_index(drop=True, inplace=True)
val_df.reset_index(drop=True, inplace=True)
train_df.head()

In [None]:
train_df['diagnosis'].value_counts(normalize=True)

In [None]:
val_df['diagnosis'].value_counts(normalize=True)

In [None]:
test_df.reset_index(drop=True, inplace=True)
test_df.head()

In [None]:
def expand_path(p):
    p = str(p)
    if isfile(train + p + ".png"):
        return train + (p + ".png")
    if isfile(train_2015 + p + '.jpeg'):
        return train_2015 + (p + ".jpeg")
    if isfile(test + p + ".png"):
        return test + (p + ".png")
    return p

def p_show(imgs, label_name=None, per_row=3):
    n = len(imgs)
    rows = (n + per_row - 1)//per_row
    cols = min(per_row, n)
    fig, axes = plt.subplots(rows,cols, figsize=(15,15))
    for ax in axes.flatten(): ax.axis('off')
    for i,(p, ax) in enumerate(zip(imgs, axes.flatten())): 
        img = Image.open(expand_path(p))
        ax.imshow(img)
        ax.set_title(train_df[train_df.id_code == p].diagnosis.values)

In [None]:
imgs = []
for p in train_df.id_code:
    imgs.append(p)
    if len(imgs) == 16: break
p_show(imgs)

In [None]:
#The Code from: https://www.kaggle.com/ratthachat/aptos-updated-albumentation-meets-grad-cam

def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance
        
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
    #         print(img.shape)
        return img

In [None]:
class MyDataset(Dataset):
    
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        label = self.df.diagnosis.values[idx]
        label = np.expand_dims(label, -1)
        
        p = self.df.id_code.values[idx]
        p_path = expand_path(p)
        image = cv2.imread(p_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = crop_image_from_gray(image)
        try: 
          image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        except:
            print("unable to resize image: ", p_path)
#        image = cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , 30) ,-4 ,128)
        image = transforms.ToPILImage()(image)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [None]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation((-120, 120)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [None]:
val_transform = transforms.Compose([
#    transforms.RandomHorizontalFlip(),
#    transforms.RandomRotation((-120, 120)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [None]:
trainset     = MyDataset(train_df, transform =train_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=12)
#valset       = MyDataset(val_df, transform   =train_transform)
valset       = MyDataset(val_df, transform   =val_transform)
val_loader   = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=False, num_workers=12)

In [None]:
testset       = MyDataset(test_df, transform   =val_transform)
test_loader   = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=12)

In [None]:
#model = EfficientNet.from_name('efficientnet-b0')
#model = EfficientNet.from_name('efficientnet-b4')
#model.load_state_dict(torch.load('/data/models/efficientnet/efficientnet-b3-5fb5a3c3.pth'))
# model.load_state_dict(torch.load('../input/efficientnet-pytorch/efficientnet-b0-08094119.pth'))
#model.load_state_dict(torch.load('/data/models/efficientnet/efficientnet-b0-08094119.pth'))
#model.load_state_dict(torch.load('/data/models/efficientnet/efficientnet-b5-586e6cc6.pth'))
#model.load_state_dict(torch.load('/data/models/efficientnet/efficientnet-b4-e116e8b3.pth'))

#in_features = model._fc.in_features
#model._fc = nn.Linear(in_features, num_classes)
#model.cuda()
model = EfficientNet.from_pretrained(efficientnet_arch, num_classes=num_classes)
model.cuda()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
criterion = nn.MSELoss()
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)

In [None]:
def train_model(epoch):
    model.train() 
        
    avg_loss = 0.
    optimizer.zero_grad()
    for idx, (imgs, labels) in enumerate(train_loader):
        if idx > 0 and idx % 10 == 0:
            print("step: ", idx, " / ", len(train_loader))
        imgs_train, labels_train = imgs.cuda(), labels.float().cuda()
        output_train = model(imgs_train)
        loss = criterion(output_train,labels_train)
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        optimizer.step() 
        optimizer.zero_grad() 
        avg_loss += loss.item() / len(train_loader)
        
    return avg_loss

def test_model():
    
    avg_val_loss = 0.
    model.eval()
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.float().cuda()
            output_test = model(imgs_vaild)
            avg_val_loss += criterion(output_test, labels_vaild).item() / len(val_loader)
        
    return avg_val_loss

In [None]:
mfile = 'chkpoint.pt'

In [None]:
best_avg_loss = 100.0
n_epochs      = 10

for epoch in range(n_epochs):
    
#     print('lr:', scheduler.get_lr()[0]) 
    print('lr:', lr) 
    start_time   = time.time()
    avg_loss     = train_model(epoch)
    avg_val_loss = test_model()
    elapsed_time = time.time() - start_time 
    print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format(
        epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time))
    
    if avg_val_loss < best_avg_loss:
        best_avg_loss = avg_val_loss
        torch.save(model.state_dict(), mfile)
    
#    scheduler.step()

In [None]:
torch.save(model.state_dict(), mfile)

In [None]:
# load the best model
model.load_state_dict(torch.load(mfile))

In [None]:
# next time this needs to go
model = EfficientNet.from_name('efficientnet-b4')
in_features = model._fc.in_features
model._fc = nn.Linear(in_features, num_classes)
model.load_state_dict(torch.load(mfile))
model.cuda()

In [None]:
# confirm the best score
# test_model()
model.eval()
outputlist = []
avg_val_loss = 0.
with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            # move to GPU
        
            data, target = data.cuda(), target.float().cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            outputlist.append(output)
            avg_val_loss += criterion(output, target).item() / len(val_loader)

In [None]:
avg_val_loss

In [None]:
def format_outputs(preds):
    finalpreds = []
    for batch in preds:
        for arr in batch:
            for num in arr:
                finalpreds.append(num.cpu().item())
    return finalpreds
outputlist=format_outputs(outputlist)

In [None]:
outputlist

In [None]:
#import numpy as np
#import pandas as pd
#import os
#import scipy as sp
#from functools import partial
#from sklearn import metrics
#from collections import Counter
#import json

In [None]:
from functools import partial
import scipy as sp

In [None]:
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = metrics.cohen_kappa_score(y, X_p, weights='quadratic')
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [None]:
targets = val_df['diagnosis']

In [None]:
from sklearn.metrics import cohen_kappa_score
def quadratic_kappa(y_hat, y):
    return cohen_kappa_score(np.round(y_hat), y, weights='quadratic')

In [None]:
optR = OptimizedRounder()
optR.fit(outputlist, targets)
coefficients = optR.coefficients()
print(coefficients)
valid_predictions = optR.predict(outputlist, coefficients)
valid_predictions
quadratic_kappa(valid_predictions, targets)
# test_predictions = optR.predict(test_predictions, coefficients)