In [None]:
import os
import sys
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Repository source: https://github.com/qubvel/efficientnet
sys.path.append(os.path.abspath('../input/efficientnet/efficientnet-master/efficientnet-master/'))
from efficientnet import EfficientNetB5

In [None]:
# Standard dependencies
import cv2
import time
import scipy as sp
import numpy as np
import random as rn
import pandas as pd
from tqdm import tqdm
from PIL import Image
from functools import partial
import matplotlib.pyplot as plt

# Machine Learning
import tensorflow as tf
import keras
from keras import initializers
from keras import regularizers
from keras import constraints
from keras import backend as K
from keras.activations import elu
from keras.optimizers import Adam
from keras.models import Sequential
from keras.engine import Layer, InputSpec
from keras.utils.generic_utils import get_custom_objects
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Conv2D, Flatten, GlobalAveragePooling2D, Dropout
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import cohen_kappa_score

# Path specifications
KAGGLE_DIR = '../input/aptos2019-blindness-detection/'
TRAIN_DF_PATH = KAGGLE_DIR + "train.csv"
TRAIN_IMG_PATH = KAGGLE_DIR + "train_images/"

# Set seed for reproducability
seed = 1234
rn.seed(seed)
np.random.seed(seed)
tf.set_random_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)

## Preparation <a id="2"></a>

By examining the data we can readily see that we do not have that much data (± 700 samples per class). It is probably a good idea to use data augmentation to increase robustness of our model (See the modeling section).

We could also try to use additional data from previous competitions to increase performance. Although I do not implement this in the kernel, feel free to experiment with adding data. Additional data can be found in [this Kaggle dataset](https://www.kaggle.com/benjaminwarner/resized-2015-2019-blindness-detection-images) (± 35000 additional images).

In [None]:
print("Image IDs and Labels (TRAIN)")
train_df = pd.read_csv(TRAIN_DF_PATH)

# Add extension to id_code
train_df['id_code'] = train_df['id_code'] + ".png"
print(f"Training images: {train_df.shape[0]}")
display(train_df.head())

In [None]:
# Specify image size
IMG_WIDTH = 224 # 456
IMG_HEIGHT = 224 # 456
CHANNELS = 3

## EDA (Exploratory Data Analysis) <a id="4"></a>

For EDA on image datasets I think one should at least examine the label distribution, the images before preprocessing and the images after preprocessing. Through examining these three aspects we can get a good sense of the problem. Note that the distribution on the test set can still vary wildly from the training data.

In [None]:
# Label distribution
train_df['diagnosis'].value_counts().sort_index().plot(kind="bar", 
                                                       figsize=(12,5), 
                                                       rot=0)
plt.title("Label Distribution (Training Set)", 
          weight='bold', 
          fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel("Label", fontsize=17)
plt.ylabel("Frequency", fontsize=17);

We will visualize a random image from every label to get a general sense of the distinctive features that seperate the classes. We will take this into account and try to enhance these features in our preprocessing. For these images there some to be increasingly more spots and stains on the retina as diabetic retinopathy worsens.

In [None]:
# Example from every label
fig, ax = plt.subplots(1, 5, figsize=(15, 6))
for i in range(5):
    sample = train_df[train_df['diagnosis'] == i].sample(1)
    image_name = sample['id_code'].item()
    X = cv2.imread(f"{TRAIN_IMG_PATH}{image_name}")
    ax[i].set_title(f"Image: {image_name}\n Label = {sample['diagnosis'].item()}", 
                    weight='bold', fontsize=10)
    ax[i].axis('off')
    ax[i].imshow(X);

## Preprocessing <a id="5"></a>

Here we will use the auto-cropping method with Ben's preprocessing as explained in [this kernel](https://www.kaggle.com/ratthachat/aptos-updatedv14-preprocessing-ben-s-cropping).

In [None]:
def crop_image_from_gray(img, tol=7):
    """
    Applies masks to the orignal image and 
    returns the a preprocessed image with 
    3 channels
    
    :param img: A NumPy Array that will be cropped
    :param tol: The tolerance used for masking
    
    :return: A NumPy array containing the cropped image
    """
    # If for some reason we only have two channels
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    # If we have a normal RGB images
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol
        
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

def preprocess_image(image, sigmaX=10):
    """
    The whole preprocessing pipeline:
    1. Read in image
    2. Apply masks
    3. Resize image to desired size
    4. Add Gaussian noise to increase Robustness
    
    :param img: A NumPy Array that will be cropped
    :param sigmaX: Value used for add GaussianBlur to the image
    
    :return: A NumPy array containing the preprocessed image
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
    return image

After preprocessing we have managed to enhance the distinctive features in the images. This will increase performance when we train our EfficientNet model.

In [None]:
# Example of preprocessed images from every label
fig, ax = plt.subplots(1, 5, figsize=(15, 6))
for i in range(5):
    sample = train_df[train_df['diagnosis'] == i].sample(1)
    image_name = sample['id_code'].item()
    X = preprocess_image(cv2.imread(f"{TRAIN_IMG_PATH}{image_name}"))
    ax[i].set_title(f"Image: {image_name}\n Label = {sample['diagnosis'].item()}", 
                    weight='bold', fontsize=10)
    ax[i].axis('off')
    ax[i].imshow(X);

## Modeling <a id="6"></a>

In [None]:
train_df['diagnosis'] = train_df['diagnosis'].astype(str)

In [None]:
# We use a small batch size so we can handle large images easily
BATCH_SIZE = 32

# Add Image augmentation to our generator
datagen = ImageDataGenerator(zoom_range=0.15, rotation_range=120,
                             horizontal_flip=True,
                             vertical_flip=True,
                             validation_split=0.15,
                             shear_range=0.1,
                             fill_mode='nearest',
                             preprocessing_function=preprocess_image, 
                             rescale=1 / 255.)

# Use the dataframe to define train and validation generators
train_generator = datagen.flow_from_dataframe(train_df, 
                                                    x_col='id_code', 
                                                    y_col='diagnosis',
                                                    directory = TRAIN_IMG_PATH,
                                                    target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                    batch_size=BATCH_SIZE,
                                                    color_mode = 'rgb',
                                                    shuffle=True,
                                                    class_mode="categorical", #'sparse', 
                                                    subset='training',
                                                    seed = 42)

val_generator = datagen.flow_from_dataframe(train_df, 
                                                  x_col='id_code', 
                                                  y_col='diagnosis',
                                                  directory = TRAIN_IMG_PATH,
                                                  target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                  batch_size=BATCH_SIZE,
                                                  color_mode = 'rgb',
                                                  shuffle=False,
                                                  class_mode="categorical", #'sparse',
                                                  subset='validation',
                                                  seed = 42)

In [None]:
t_x, t_y = next(train_generator)

fig, m_axs = plt.subplots(2, 4, figsize = (16, 8))
for (c_x, c_y, c_ax) in zip(t_x, t_y, m_axs.flatten()):
    c_ax.imshow(np.clip(c_x*127+127, 0, 255).astype(np.uint8))
    c_ax.set_title('Severity {}'.format(c_y))
    c_ax.axis('off')

In [None]:
# Load in EfficientNetB5
from efficientnet import EfficientNetB0
from keras.applications import ResNet50

# model 1 
effnet = EfficientNetB0(weights=None,
                        include_top=False,
                        input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS))
effnet.load_weights('../input/efficientnet-keras-weights-b0b5/efficientnet-b0_imagenet_1000_notop.h5')

# model 2 
# renet = ResNet50(weights = 'imagenet', include_top = False, input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS))

In [None]:
import keras 

def build_model():
    model = Sequential()
    model.add(effnet)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(5, activation="softmax"))
    model.compile(loss='categorical_crossentropy', 
                  optimizer=keras.optimizers.Adam(lr=0.001), 
                  metrics=['acc'])
    print(model.summary())
    return model

# Initialize model
model = build_model()

In [None]:
# Monitor val loss to avoid overfitting and save best mode
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath='model.h5', 
    verbose=1, monitor='val_loss', 
    save_weights_only=True, save_best_only=True
)   
rlr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                        factor=0.5, patience=4, 
                                        verbose=1, mode='auto', 
                                        epsilon=0.0001)

# Begin training
model.fit_generator(train_generator,
                    steps_per_epoch=train_generator.samples // BATCH_SIZE,
                    epochs=10,
                    validation_data=val_generator,
                    validation_steps = val_generator.samples // BATCH_SIZE,
                    callbacks=[checkpoint, rlr])

In [None]:
# load best saved weights
model.load_weights('./model.h5')

In [None]:
history_df = pd.DataFrame(model.history.history)
history_df[['loss', 'val_loss']].plot(figsize=(12,5))
# plt.title("Loss", fontsize=16, weight='bold')
plt.xlabel("Epoch")
plt.ylabel("Loss")
history_df[['acc', 'val_acc']].plot(figsize=(12,5))
plt.title("Accuracy", fontsize=16, weight='bold')
plt.xlabel("Epoch")
plt.ylabel("% Accuracy");

## Evaluation <a id="7"></a>

- Confusion Matrix 
- Classification Report 
- Weighted Q. Kappa
- ROC AUC Curve 

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, classification_report

#Confution Matrix and Classification Report
val_generator.reset()
y_pred = model.predict_generator(val_generator,
                                 steps=val_generator.samples / BATCH_SIZE,
                                 verbose = True)
y_pred = np.argmax(y_pred, axis=1)

In [None]:
len(val_generator.classes), len(y_pred) # both should be same 

In [None]:
val_generator.classes[:5], y_pred[:5] # sanity check 

## Confusion Matrix 

In [None]:
import seaborn as sns
plt.figure(figsize=(10,5))

conf_mat = confusion_matrix(val_generator.classes, y_pred)
sns.heatmap(conf_mat, annot=True, fmt="d", cbar = False, 
            cmap = plt.cm.Blues)

plt.ylabel('True label')
plt.xlabel('Predicted label')

## Classification Report

In [None]:
# list(valid_generator.class_indices.keys())
target = ['No DR', 'Mild', 'Moderate', 'Severe', 'Proliferative DR']
print('Classification Report')

print('Validation Acc: %2.2f%%' %(100*accuracy_score(val_generator.classes, y_pred)))
print(classification_report(val_generator.classes, y_pred, 
                            target_names = target))

## Weighted Cohan Kappa

In [None]:
from sklearn.metrics import cohen_kappa_score

cohen_kappa_score(val_generator.classes, 
                  y_pred, weights='quadratic')

## ROC AUC curve

In [None]:
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import LabelBinarizer

fig, c_ax = plt.subplots(1,1, figsize = (12, 10))
sns.set(style="darkgrid")
all_labels = np.unique(train_df['diagnosis'])

def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
    lb = LabelBinarizer()
    lb.fit(y_test)
    y_test = lb.transform(y_test)
    y_pred = lb.transform(y_pred)

    for (idx, c_label) in enumerate(all_labels):
        fpr, tpr, thresholds = roc_curve(y_test[:,idx].astype(int), y_pred[:,idx])
        c_ax.plot(fpr, tpr, label = '%s (AUC:%0.2f)'  % (c_label, auc(fpr, tpr)))
    c_ax.plot(fpr, fpr, 'b-', label = 'Random Guessing')
    return roc_auc_score(y_test, y_pred, average=average)

print('ROC AUC score:', multiclass_roc_auc_score(val_generator.classes, y_pred))

c_ax.legend()
c_ax.set_xlabel('False Positive Rate')
c_ax.set_ylabel('True Positive Rate')