In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import sys
import random
import IPython

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

from sklearn.utils import shuffle
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix

from matplotlib.pyplot import imread
from cv2 import resize
import cv2

from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
import pydot
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from sklearn.preprocessing import OneHotEncoder

from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow

random.seed = 2
np.random.seed = 2
tf.seed = 2
tf.random.set_seed(2)

# Train Data Read

In [None]:
dataset_images_location = "../input/aptos2019-blindness-detection/train_images"
dataset_groundtruth_location = "../input/aptos2019-blindness-detection/train.csv"

df = pd.read_csv(dataset_groundtruth_location)
df_ = pd.DataFrame();
df_['Image name'] = df['id_code']
df_['Retinopathy grade'] = df['diagnosis']

labels = np.array(df_['Retinopathy grade']).reshape(-1, 1)
enc = OneHotEncoder(categories='auto', drop=None, sparse=False, dtype = np.int, handle_unknown='error')
labels = enc.fit_transform(labels)

a = []
for i in range(len(labels)):
    a.append(labels[i])
    
df_['Retinopathy grade encoded'] = a

df = df_
df.head()

In [None]:
df['Retinopathy grade'].hist(figsize = (10, 5))

In [None]:
def balance_data(class_size, df):
    train_df = df.groupby(['Retinopathy grade']).apply(lambda x: x.sample(class_size, replace = True)).reset_index(drop = True)
    train_df = train_df.sample(frac=1).reset_index(drop=True)
    print('New Data Size:', train_df.shape[0], 'Old Size:', df.shape[0])
    train_df['Retinopathy grade'].hist(figsize = (10, 5))
    return train_df

In [None]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(df,test_size=0.1) # Here we will perform an 90%/10% split of the dataset, with stratification to keep similar distribution in validation set

In [None]:
train_df['Retinopathy grade'].hist(figsize = (10, 5))
len(val_df)

In [None]:
train_df.pivot_table(index='Retinopathy grade', aggfunc=len)

In [None]:
train_df = balance_data(train_df.pivot_table(index='Retinopathy grade', aggfunc=len).max().max(),train_df) # I will oversample such that all classes have the same number of images as the maximum
train_df['Retinopathy grade'].hist(figsize = (10, 5))
print(len(train_df))

# Data Generator

In [None]:
bs = 16

In [None]:
def data_gen(df, enc):
    rot=[-4,-3,-2,-1,1,2,3,4]
    images = np.zeros(shape = (bs * 4, 224, 224, 3))
    labels = np.zeros(shape = (bs * 4, 5))
    im_size = 224
    
    counter = 0
    index = 0
    while True:
        if(counter + bs - 1 < len(df)):
            for i in range(bs):
                    # filling images
                    img = cv2.imread(dataset_images_location + '/' + df['Image name'][counter + i] + ".png")
                    img = cv2.resize(img, (im_size, im_size))
                    images[index] = img
                    images[index + 1] = (np.flip(img, axis = 0))
                    images[index + 2] = (np.flip(img, axis = 1))
                    images[index + 3] = (np.rot90(np.rot90(img)))

                    #filling labels
                    labels[index] = df['Retinopathy grade encoded'][counter + i]
                    labels[index + 1] = df['Retinopathy grade encoded'][counter + i]
                    labels[index + 2] = df['Retinopathy grade encoded'][counter + i]
                    labels[index + 3] = df['Retinopathy grade encoded'][counter + i]
                    
                    index = index + 4
            
            counter = counter + bs
            images = images.astype('float') / np.max(images)
            index = 0
            yield images, labels
            
        if counter + bs - 1 >= len(df):
            counter = 0
            index = 0


# Model (ResNet50)

In [None]:
def identity_block(X, f, filters, d = 0.05):

    #Retrieve filters
    F1, F2, F3 = filters
    
    X_shortcut = X;

    #First Layer
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = layers.Dropout(d)(X)

    #Second Layer
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1, 1), padding = 'same')(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = layers.Dropout(d)(X)

    #Third Layer
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(X)
    X = BatchNormalization(axis = 3)(X)

    #Final Step: Add shortcut to F(X) and pass it through relu activation
    X = Add()([X, X_shortcut]) 
    X = Activation('relu')(X)
    X = layers.Dropout(d)(X)

    return X

In [None]:
def convolutional_block(X, f, filters, s = 2, d = 0.05):

    #Retrieve Filters
    F1, F2, F3 = filters

    X_shortcut = X

    # First Layer
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (s, s))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)  
    X = layers.Dropout(d)(X)  

    # Second Layer
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1, 1), padding = 'same')(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)   
    X = layers.Dropout(d)(X) 

    # Third Layer
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(X)
    X = BatchNormalization(axis = 3)(X)

    # Shortcut Path
    X_shortcut = Conv2D(filters = F3, kernel_size = (1, 1), strides = (s, s), padding = 'valid')(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3)(X_shortcut)

    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    X = layers.Dropout(d)(X)

    return X

In [None]:
def Resnet50(input_shape = (224, 224, 3), classes = 5, d = 0.05):

    X_input = Input(input_shape)

    X = ZeroPadding2D((3, 3))(X_input)

    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2))(X)
    X = BatchNormalization(axis = 3)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides = (2, 2))(X)
    X = layers.Dropout(d)(X)

    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], s = 1)
    X = identity_block(X, 3, [64, 64, 256])
    X = identity_block(X, 3, [64, 64, 256])

    # Stage 3
    X = convolutional_block(X, f = 3, filters = [128, 128, 512], s = 2)
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])
    X = identity_block(X, 3, [128, 128, 512])

    #Stage 4
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], s = 2)
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])
    X = identity_block(X, 3, [256, 256, 1024])

    # Stage 5
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], s = 2)
    X = identity_block(X, 3, [512, 512, 2048])
    X = identity_block(X, 3, [512, 512, 2048])        

    X = AveragePooling2D((2, 2), name = "avg_pool")(X)
    X = layers.Dropout(d)(X)

    # output layer
    X = Flatten()(X)
    X = Dense(128, activation = 'relu', kernel_initializer = glorot_uniform(seed = 0))(X)
    X = Dense(classes, activation = 'softmax', name = 'fc' + str(classes), kernel_initializer = glorot_uniform(seed = 0))(X)

    # Create model
    model = Model(inputs = X_input, outputs = X, name = 'Resnet50')

    return model

# Model Compile and Fit

In [None]:
random.seed = 2
np.random.seed = 2
tf.seed = 2
tf.random.set_seed(2)

model = Resnet50(input_shape = (224, 224, 3), classes = 5)
opt = keras.optimizers.Adam(learning_rate = 0.00008, beta_1 = 0.9)
model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])

early_stopping= tf.keras.callbacks.EarlyStopping(monitor = 'accuracy', min_delta = 0.01, patience = 10)

# model.summary()

train_generator = data_gen(train_df, enc)
# val_generator = data_gen(val_df, enc)

hist = model.fit(train_generator,
                 steps_per_epoch = int(len(train_df)/bs),
                 epochs = 25, 
#                  validation_data = val_generator, 
#                  validation_steps = int(len(val_df) / bs),
                 callbacks = [early_stopping], 
                 shuffle = True)

print('Done')

In [None]:
#serialize and save model 
# serialize model to JSON
model_json = model.to_json()
with open("model_{}.json".format('gradingNew11'), "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_{}.h5".format('gradingNew11'))
print("Saved model to disk")

In [None]:
plt.plot(hist.history['loss'])