### Install software packages on google colab

In [None]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [None]:
!mkdir -p drive
!google-drive-ocamlfuse drive

### Pre-processed images using `preprocess.py`

In [None]:
!cp drive/EyePACS/train.zip .
print('Copied training data')
!cp drive/EyePACS/test.zip .
print('Copied test data')
!cp drive/EyePACS/IDRiD.zip .
print('Copied IDRiD data')
!unzip -qq train.zip
print('Extracted training data')
!unzip -qq test.zip
print('Extracted test data')
!unzip -qq IDRiD.zip
print('Extracted IDRiD data')

Copied training data
Copied test data
Copied IDRiD data
Extracted training data
Extracted test data
Extracted IDRiD data


### Load last checkpoint(Change the file to the one under `model` folder)

- Train and test data are from EyePACS - https://www.kaggle.com/c/diabetic-retinopathy-detection
- IDRiD is from - https://idrid.grand-challenge.org/

In [None]:
!cp drive/EyePACS/weights-05-0.8792-0.355479.hdf5 .
!mv weights-05-0.8792-0.355479.hdf5 model.hdf5

In [None]:
!rm IDRiD.zip
!rm train.zip
!rm test.zip

### Code for training and testing/inference

In [None]:
import os

import numpy as np
from keras.callbacks import Callback
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, CSVLogger
from keras.optimizers import SGD, Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import Xception
from keras.models import load_model
from keras.models import Model
from keras.layers import Dense

from skimage.io import imread
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score

Using TensorFlow backend.


In [None]:
def get_test_data(directory):
    dirs = os.listdir(directory)
    images = []
    labels = []

    for d in dirs:
        files = os.listdir(os.path.join(directory, d))
        images += [np.float32(imread(os.path.join(directory, d, f))) / 255 for f in files]
        labels += [int(d)] * len(files)

    return np.array(images), np.array(labels)


x_test, y_test = get_test_data('IDRiD')
y_test_bin = y_test.copy()
y_test_bin[y_test != 0] = 1

In [None]:
class MetricsCallback(Callback):
  
  def __init__(self, logs={}):
    self._data = []
    

    def on_epoch_end(self, epoch, logs=None):
        filename = 'drive/EyePACS/confusion_matrix_' + str(epoch) + '.txt'
        
        y_pred = self.model.predict(x_test)
        y_pred = np.argmax(y_pred, axis=1)
        y_pred_bin = y_pred.copy()
        y_pred_bin[y_pred != 0] = 1
        
        print('Accuracy on IDRiD: ', accuracy_score(y_test, y_pred))
        print('Kappa score: ', cohen_kappa_score(y_test, y_pred, weights='quadratic'))
        
        tn, fp, fn, tp = confusion_matrix(y_test_bin, y_pred_bin).ravel()
        print('Sensitivity: ', tp/(tp+fn))
        print('Specificity: ', tn/(tn+fp))
        print('ROC AUC: ', roc_auc_score(y_test_bin, y_pred_bin))
        
        cm = confusion_matrix(y_test, y_pred)
        print(cm)
        np.savetxt(filename, cm)

        
class EarlyStopCallback(Callback):

    def __init__(self, monitor='acc', value=0.9, verbose=0):
        super(Callback, self).__init__()
        self.monitor = monitor
        self.value = value
        self.verbose = verbose

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get(self.monitor)

        if current >= self.value:
            self.model.save('drive/EyePACS/resnet34_model1.h5py')
            self.model.stop_training = True

In [None]:
# Reduce learning rate if loss doesn't fall after 1 epoch
reduce_lr = ReduceLROnPlateau(patience=1, factor=0.8, min_lr=0.00005)

# Save result of each epoch
logger = CSVLogger('drive/EyePACS/epoch_log.csv')

# Save model every 5 epochs. Approximately 100mb per model
checkpoint_filepath = "drive/EyePACS/weights-{epoch:02d}-{acc:.4f}-{loss:.6f}.hdf5"
checkpoint = ModelCheckpoint(checkpoint_filepath, verbose=1, period=1)

metrics = MetricsCallback()

In [None]:
batch_size = 8
epochs = 5

# Takes care of data augmentation
train_gen = ImageDataGenerator(rescale=1.0 / 255,
                               rotation_range=360,
                               shear_range=np.pi / 12,
                               zoom_range=0.2,
                               fill_mode='constant',
                               horizontal_flip=True,
                               vertical_flip=True)

train_generator = train_gen.flow_from_directory('train',
                                                target_size=(512, 512),
                                                batch_size=batch_size)

test_gen = ImageDataGenerator(rescale=1.0 / 255)

test_generator = test_gen.flow_from_directory('test',
                                              target_size=(512, 512),
                                              batch_size=batch_size)

Found 35126 images belonging to 5 classes.
Found 8441 images belonging to 5 classes.


In [None]:
if not os.path.exists('model.hdf5'):
    print('Model file not found. Initializing new model')
    model = Xception(pooling='avg')

    im_input = model.input
    model.layers.pop()
    m = Dense(5, activation='softmax')(model.layers[-1].output)
    model = Model(input=im_input, outputs=m)

    optimizer = Adam(0.0003)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy',
                  metrics=['accuracy'])
else:
    print('Model found')
    model = load_model('model.hdf5')

Model found


In [None]:
model.fit_generator(train_generator,
                    epochs=epochs,
                    steps_per_epoch=35126 // batch_size,
                    validation_data=test_generator,
                    validation_steps=8114 // batch_size,
                    callbacks=[checkpoint, reduce_lr, logger,
                               metrics])