In [None]:
import json
import math
import os
import random
import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications import DenseNet121
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
import tensorflow as tf
from tqdm import tqdm

import seaborn as sns

from sklearn.metrics import confusion_matrix
from fastai import *
from fastai.vision import *

%matplotlib inline

In [None]:
np.random.seed(42)

In [None]:
tf.random.set_seed(
    42
)

In [None]:
#train.csv is labelled to train images
#test.csv is labelled to test images
#Look at information in train and test .csv files
#5 categories:
#No_DR
#Mild
#Moderate
#Severe
#Proliferate_DR

In [None]:
#classes : 0 - No DR, 1 - Mild, 2 - Moderate, 3 - Severe, 4 - Proliferative DR
def classes_to_int(label):
    # label = classes.index(dir)
    label = label.strip()
    if label == "No DR":  return 0
    if label == "Mild":  return 1
    if label == "Moderate":  return 2
    if label == "Severe":  return 3
    if label == "Proliferative DR":  return 4
    print("Invalid Label", label)
    return 5

def int_to_classes(i):
    if i == 0: return "No DR"
    elif i == 1: return "Mild"
    elif i == 2: return "Moderate"
    elif i == 3: return "Severe"
    elif i == 4: return "Proliferative DR"
    print("Invalid class ", i)
    return "Invalid Class"

In [None]:
NUM_CLASSES = 5

In [None]:
train_path = '/....../diabetes_eye/train/'
test_path = '/....../kellymclean/diabetes_eye/test/'

In [None]:
train_df = pd.read_csv('/....../diabetes_eye/train.csv')
print(train_df.head())

In [None]:
np.size(train_df)

In [None]:
train_df.shape

In [None]:
x = train_df['id_code']
y = train_df['diagnosis']

In [None]:
train_df['diagnosis'].hist()
train_df['diagnosis'].value_counts()

In [None]:
def get_label(diagnosis):
    return ','.join([str(i) for i in range(diagnosis + 1)])

In [None]:
train_df['label'] = train_df.diagnosis.apply(get_label)

In [None]:
train_df.head(10)

In [None]:
# create image data bunch
data = ImageDataBunch.from_df('/',
                              df=df_train, 
                              valid_pct=0.2,
                              folder= '/....../diabetes_eye/train',
                              suffix= '.png',
                              ds_tfms=get_transforms(flip_vert=True, max_warp=0),
                              size=224,
                              bs=64, 
                              num_workers=0,
                             label_col='label', label_delim=',').normalize(imagenet_stats)

In [None]:
# check classes
print(f'Classes: \n {data.classes}')

In [None]:
def display_samples(df, columns=4, rows=3):
    fig=plt.figure(figsize=(5*columns, 4*rows))

    for i in range(columns*rows):
        image_path = df.loc[i,'id_code']
        image_id = df.loc[i,'diagnosis']
        img = cv2.imread(f'/....../diabetes_eye/train/{image_path}.png')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)
    
    plt.tight_layout()

display_samples(train_df)

In [None]:
#resize images
def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = math.floor(pad_diff[0]/2), math.ceil(pad_diff[0]/2)
    l, r = math.floor(pad_diff[1]/2), math.ceil(pad_diff[1]/2)
    if is_rgb:
        pad_width = ((t,b), (l,r), (0, 0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def preprocess_image(image_path, desired_size=224):
    im = Image.open(image_path)
    im = im.resize((desired_size, )*2, resample=Image.LANCZOS)
    
    return im

In [None]:
test_df = pd.read_csv('/....../diabetes_eye/test.csv')
print(test_df.head())

In [None]:
np.size(test_df)

In [None]:
test_df.shape

train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.15,
                                                      stratify=y, random_state=None)
print(train_x.shape, train_y.shape, valid_x.shape, valid_y.shape)
train_y.hist()
valid_y.hist()

#Need to create validation images
valid_x = images[-NUM_VAL_SAMPLES:]
valid_y = labels[-NUM_VAL_SAMPLES:]

In [None]:
# input image dimensions
IMG_SIZE = 224
img_rows = 224
img_cols = 224
img_channels = 3

In [None]:
N = train_df.shape[0]
x_train = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(train_df['id_code'])):
    x_train[i, :, :, :] = preprocess_image(
        f'/....../diabetes_eye/train/{image_id}.png'
    )

In [None]:
N = test_df.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(test_df['id_code'])):
    x_test[i, :, :, :] = preprocess_image(
        f'/....../diabetes_eye/test/{image_id}.png'
    )

In [None]:
y_train = pd.get_dummies(train_df['diagnosis']).values

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)

In [None]:
#Creating multilabels
y_train_multi = np.empty(y_train.shape, dtype=y_train.dtype)
y_train_multi[:, 4] = y_train[:, 4]

for i in range(3, -1, -1):
    y_train_multi[:, i] = np.logical_or(y_train[:, i], y_train_multi[:, i+1])

print("Original y_train:", y_train.sum(axis=0))
print("Multilabel version:", y_train_multi.sum(axis=0))

In [None]:
y_train_multi.shape

In [None]:
x_train.shape

In [None]:
#Now we can split it into a training and validation set.
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, 
    test_size=0.15, 
    random_state= 2019
)


In [None]:
BATCH_SIZE = 32

def create_datagen():
    return ImageDataGenerator(
        zoom_range=0.15,  # set range for random zoom
        # set mode for filling points outside the input boundaries
        fill_mode='constant',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

# Using original generator
data_generator = create_datagen().flow(x_train, y_train, batch_size=BATCH_SIZE, seed=2019)

In [None]:
class Metrics(Callback):
    def on_train_begin(self, logs={}):
        self.val_kappas = []

    def on_epoch_end(self, epoch, logs={}):
        X_val, y_val = self.validation_data[:2]
        y_val = y_val.sum(axis=1) - 1
        
        y_pred = self.model.predict(X_val) > 0.5
        y_pred = y_pred.astype(int).sum(axis=1) - 1

        _val_kappa = cohen_kappa_score(
            y_val,
            y_pred, 
            weights='quadratic'
        )

        self.val_kappas.append(_val_kappa)

        print(f"val_kappa: {_val_kappa:.4f}")
        
        if _val_kappa == max(self.val_kappas):
            print("Validation Kappa has improved. Saving model.")
            self.model.save('sept_model.h5')

        return Metrics

In [None]:
densenet = DenseNet121(
    weights= '/....../diabetes_eye/DenseNet-BC-121-32-no-top.h5',
    include_top=False,
    input_shape=(224,224,3)
)

In [None]:
def build_model():
    model = Sequential()
    model.add(densenet)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),
        metrics=['accuracy']
    )
    
    return model

In [None]:
model = build_model()
model.summary()

In [None]:
kappa_metrics = Metrics()

In [None]:
history = model.fit_generator(
    data_generator,
    steps_per_epoch=32, #x_train.shape[0] / BATCH_SIZE,
    epochs=15,
    validation_data=(x_val, y_val)#,
    #callbacks=[kappa_metrics]
)

In [None]:
model.save("model.h5")

#model with seed = 42 vs. 2020
history = model.fit_generator(
    data_generator,
    steps_per_epoch=32, #x_train.shape[0] / BATCH_SIZE,
    epochs=15,
    validation_data=(x_val, y_val),
    callbacks=[kappa_metrics]
)

In [None]:
# Display the training accuracy

model.evaluate(data_generator)

In [None]:
import json

In [None]:
with open('history.json', 'w') as f:
    json.dump(history.history, f)

history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['acc', 'val_acc']].plot()

In [None]:
plt.plot(kappa_metrics.val_kappas)

from keras.models import load_model

model = load_model('model.h5')
y_test = []
y_actual = []

y_actual = np.array(y_actual)
y_test = np.array(y_test)

cm = confusion_matrix(y_actual, y_test)

#Plot the confusion matrix

sns.heatmap(cm, cmap = 'plasma', annot = True)
plt.show()

In [None]:
model.load_weights('model.h5')
y_val_pred = model.predict(x_val)

def compute_score_inv(threshold):
    y1 = y_val_pred > threshold
    y1 = y1.astype(int).sum(axis=1) - 1
    y2 = y_val.sum(axis=1) - 1
    score = cohen_kappa_score(y1, y2, weights='quadratic')
    
    return 1 - score

simplex = scipy.optimize.minimize(
    compute_score_inv, 0.5, method='nelder-mead'
)

best_threshold = simplex['x'][0]

In [None]:
y_test = model.predict(x_test) > 0.5
y_test = y_test.astype(int).sum(axis=1) - 1

test_df['diagnosis'] = y_test
test_df.to_csv('sample_submission.csv',index=False)

In [None]:
df_submission = pd.read_csv('/....../diabetes_eye/sample_submission.csv')
print(df_submission.head(10))

In [None]:
x = df_submission['id_code']
y = df_submission['diagnosis']

In [None]:
df_submission['diagnosis'].hist()
df_submission['diagnosis'].value_counts()