In [None]:
import os, sys, math, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io

from skimage.transform import resize
from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image, ImageOps
import cv2
from sklearn.utils import class_weight, shuffle

%matplotlib inline

import scipy
import tensorflow as tf

from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.applications.resnet50 import preprocess_input
import tensorflow.keras.backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils import to_categorical

from tensorflow.keras import layers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import f1_score, fbeta_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score


WORKERS = 2
CHANNEL = 3

import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
tf.random.set_seed(42)

IMG_SIZE = 512
NUM_CLASSES = 5
SEED = 42
TRAIN_NUM = 1000 # use 1000 when you just want to explore new idea, use -1 for full train

#### Starting with just 2019 data

In [None]:
# df_train = pd.read_csv('../input/aptos2019-blindness-detection/train.csv')
# df_test = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')

df_train = pd.read_csv('../input/diabetic-retinopathy-resized/trainLabels.csv')
df_test = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')

print(df_train.shape)
print(df_test.shape)
print(df_train.head())

In [None]:
class_weights = df_train['level'].value_counts()
dfs = [df_train[df_train['level'] == i].sample(1200*(2 if i < 3 else 1),replace=True) for i in range(5)]
resampled = pd.concat(dfs, axis = 0).reset_index(drop=True)
resampled

In [None]:
resampled.level.value_counts()

In [None]:
df_train=resampled

#### There are a number of ways to preprocess the images. Lets start by looking at them as is.

In [None]:
def display_samples(df, columns=4, rows=3, gauss=False):
    fig=plt.figure(figsize=(5*columns, 4*rows))

    for i in range(columns*rows):
        image_path = df.loc[i,'image']
        image_id = df.loc[i,'level']
#         img = cv2.imread(f'../input/aptos2019-blindness-detection/train_images/{image_path}.png')
        img = cv2.imread(f'../input/diabetic-retinopathy-resized/resized_train/resized_train/{image_path}.jpeg')

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if gauss:
            img = cv2.addWeighted (img,4, cv2.GaussianBlur( img , (0,0) , IMG_SIZE/10) ,-4 ,128) 

        fig.add_subplot(rows, columns, i+1)
        plt.title(image_id)
        plt.imshow(img)
    
    plt.tight_layout()
    
display_samples(df_train)

#### A trick that worked in the previous competition was to use GuassianBlur

In [None]:
display_samples(df_train, gauss=True)

We will resize the images to 224x224, then create a single numpy array to hold the data.

In [None]:
def preprocess_image(image_path, desired_size=224, gauss=False):
    im = cv2.imread(image_path)
    im = cv2.resize(im, (desired_size, desired_size), interpolation = cv2.INTER_AREA)
    if gauss:
        im = cv2.addWeighted(im,4, cv2.GaussianBlur( im , (0,0) , desired_size/10) ,-4 ,128)
    
    return im

#### Resizing and applying blurr

In [None]:
N = df_train.shape[0]
x_train_array = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(df_train['image'])):
    x_train_array[i, :, :, :] = preprocess_image(
        f'../input/diabetic-retinopathy-resized/resized_train/resized_train/{image_id}.jpeg',
#         f'../input/diabetic-retinopathy-resized/resized_train{image_id}.png',
        gauss=True
    )
#     Image.fromarray(x_train_array[i, :, :, :]).save(f'/kaggle/working/2019_244_resized_gauss/test_images/{image_id}.jpeg')

Lets look at our new images!

In [None]:
fig=plt.figure(figsize=(4*4, 3*3))
for i in range(12):
    fig.add_subplot(4, 3, i+1)
    plt.imshow(x_train_array[i])

In [None]:
N = df_test.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.uint8)
for i, image_id in enumerate(tqdm(df_test['id_code'])):
    x_test[i, :, :, :] = preprocess_image(
        f'../input/aptos2019-blindness-detection/test_images/{image_id}.png',
        gauss = True
    )
#     Image.fromarray(x_test_array[i, :, :, :]).save(f'/kaggle/working/2019_244_resized_gauss/test_images/{image_id}.jpeg')

In [None]:
y_train = pd.get_dummies(df_train['level']).values

print(x_train_array.shape)
print(y_train.shape)
print(x_test.shape)

Instead of predicting a single label, we will change our target to be a multilabel problem; i.e., if the target is a certain class, then it encompasses all the classes before it. E.g. encoding a class 4 retinopathy would usually be [0, 0, 0, 1], but in our case we will predict [1, 1, 1, 1]. For more details, please check out Lex's kernel.

In [None]:
y_train_multi = np.empty(y_train.shape, dtype=y_train.dtype)
y_train_multi[:, 4] = y_train[:, 4]

for i in range(3, -1, -1):
    y_train_multi[:, i] = np.logical_or(y_train[:, i], y_train_multi[:, i+1])

print("Original y_train:", y_train.sum(axis=0))
print("Multilabel version:", y_train_multi.sum(axis=0))

In [None]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train_array, y_train_multi, 
    test_size=0.05, 
    random_state=2019
)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)


#### Building the data generator. It lets us do some fancy things like flip and randomly zoom the images in the training. 

In [None]:
BATCH_SIZE = 32

def create_datagen():
    return ImageDataGenerator(
        zoom_range=0.15,  # set range for random zoom
        # set mode for filling points outside the input boundaries
        fill_mode='constant',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

# Using original generator
data_generator = create_datagen().flow(x_train, y_train, batch_size=BATCH_SIZE, seed=2019)
# Using Mixup
# mixup_generator = MixupGenerator(x_train, y_train, batch_size=BATCH_SIZE, alpha=0.2, datagen=create_datagen())()

In [None]:
densenet = DenseNet121(
    weights='../input/densenet-keras/DenseNet-BC-121-32-no-top.h5',
    include_top=False,
    input_shape=(224,224,3)
)

In [None]:
def build_CNN_model():
    model = Sequential(i)
    model.add(layers.Conv2D(128, 5, strides=2, activation="relu", input_shape=x_train[0].shape))
    model.add(layers.Conv2D(128, 5, activation="relu"))
    model.add(layers.Conv2D(64, 3, activation="relu"))  
    model.add(layers.Conv2D(32, 3, activation="relu"))    
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),
        metrics=['accuracy']
    )
    
    return model

def build_densenet_model():
    model = Sequential()
    model.add(densenet)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),
    )
    
    return model

In [None]:
model_cnn = build_CNN_model()
model_cnn.build()
model_cnn.summary()

In [None]:
# history = model_cnn.fit_generator(
#     data_generator,
#     steps_per_epoch=x_train.shape[0] / BATCH_SIZE,
#     epochs=10,
#     validation_data=(x_val, y_val)
# )

In [None]:
# with open('history.json', 'w') as f:
#     json.dump(history.history, f)

# history_df = pd.DataFrame(history.history)
# history_df[['loss', 'val_loss']].plot()


In [None]:
model_densenet = build_densenet_model()
model_densenet.build()
model_densenet.summary()

In [None]:
history = model_densenet.fit_generator(
    data_generator,
    steps_per_epoch=x_train.shape[0] / BATCH_SIZE,
    epochs=30,
    validation_data=(x_val, y_val)
)

In [None]:
with open('history.json', 'w') as f:
    json.dump(history.history, f)

history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()

In [None]:
y_test = model_densenet.predict(x_test) > 0.5
y_test = y_test.astype(int).sum(axis=1) - 1

df_test['diagnosis'] = y_test
df_test.to_csv('submission.csv',index=False)

In [None]:
history_df.plot()

In [None]:
x_train_pred = model_densenet.predict(x_train) > 0.5
x_train_pred = x_train_pred.astype(int).sum(axis=1) - 1

x_val_pred = model_densenet.predict(x_val) > 0.5
x_val_pred = x_val_pred.astype(int).sum(axis=1) - 1

In [None]:
x_train_pred

In [None]:
y_true_train = np.sum(y_train, axis=1) - 1
y_true_val = np.sum(y_val, axis=1) - 1

In [None]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score

print(confusion_matrix(y_true_train, x_train_pred))
print(recall_score(y_true_train, x_train_pred, average='macro'))
print(precision_score(y_true_train, x_train_pred, average='macro'))
print(f1_score(y_true_train, x_train_pred, average='macro'))
print(accuracy_score(y_true_train, x_train_pred))

In [None]:
print(confusion_matrix(y_true_val, x_val_pred))
print(recall_score(y_true_val, x_val_pred, average='macro'))
print(precision_score(y_true_val, x_val_pred, average='macro'))
print(f1_score(y_true_val, x_val_pred, average='macro'))
print(accuracy_score(y_true_val, x_val_pred))
print(y_true_val.shape)