# Mounting Google Drive

In [0]:
##### before running it, make sure you don't have lots of big files in your google drive
##### otherwise it's going to take too long to finish running it before giving the TIMEOUT error
##### also save the train_controls, train_patients, val_controls, val_patients to your drive and
##### create a "train" folder with train_controls, train_patients in it, and 
##### a "val" folder with val_controls, val_patients in it.
##### change the train_dir and val_dir in the next cell to the dir of your train and val folder
##### e.g. my train and val folder are saved in the "deep learning" dir

from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


# Loading Train, Val, and Test Data Using Keras ImageDataGenerator

In [0]:
train_dir = "/gdrive/My Drive/deep learning/Project/trainProbMaps/"
val_dir = "/gdrive/My Drive/deep learning/Project/valProbMaps/"
test_dir = "/gdrive/My Drive/deep learning/Project/testProbMaps/"

img_width = 600
img_height = 450
batch_size = 1
channels = 3
epochs = 50
nb_train_samples = 408
nb_valid_samples = 149
nb_test_samples = 192

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
valid_datagen = ImageDataGenerator(rescale=1./255)    
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True)   

valid_generator = valid_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True) #weight toward one class or another

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

Using TensorFlow backend.


Found 408 images belonging to 2 classes.
Found 149 images belonging to 2 classes.
Found 192 images belonging to 2 classes.


# Building the Model Architecture & Training the Model

In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras.models import Model

input_shape = (img_height, img_width, channels)

middle = Sequential(name="middle")

middle.add(Conv2D(32,(11, 11), input_shape=input_shape))
middle.add(Activation('relu'))
middle.add(MaxPooling2D(pool_size=(4, 4)))

middle.add(Conv2D(32,(7, 7), input_shape=input_shape))
middle.add(Activation('relu'))
middle.add(MaxPooling2D(pool_size=(4, 4)))

middle.add(Conv2D(64,(3, 3), input_shape=input_shape))
middle.add(Activation('relu'))
middle.add(MaxPooling2D(pool_size=(2, 2)))

model = Sequential()
model.add(middle)
model.add(Flatten())



model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.summary()

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples / batch_size,
    epochs=epochs,
    validation_data=valid_generator,
    validation_steps=nb_valid_samples / batch_size,
    shuffle=True) # // batch_size)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
middle (Sequential)          (None, 12, 16, 64)        80352     
_________________________________________________________________
flatten_1 (Flatten)          (None, 12288)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                786496    
_________________________________________________________________
activation_4 (Activation)    (None, 64)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
_________________________________________________________________
activation_5 (Activation)    (None, 1)                 0         
Total para

KeyboardInterrupt: ignored

Saved model to disk


In [0]:
for input_batch, label_batch in train_generator:
    print(middle.predict(input_batch).shape)
    break

(5, 16, 12, 64)


In [0]:
def extract_features(dataset_type, sample_count):
    features = np.zeros(shape=(sample_count, 12, 16, 64))
    labels = np.zeros(shape=(sample_count))
    i = 0
    if dataset_type == "train":
        for inputs_batch, labels_batch in train_generator:
            features_batch = middle.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break   
    elif dataset_type == "valid":
        for inputs_batch, labels_batch in valid_generator:
            features_batch = middle.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    else:
        for inputs_batch, labels_batch in test_generator:
            features_batch = middle.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    return features, labels

train_features, train_labels = extract_features("train", nb_train_samples)
valid_features, valid_labels = extract_features("valid", nb_valid_samples)
test_features, test_labels = extract_features("test", nb_test_samples)


print(train_features.shape, train_labels.shape)
print(valid_features.shape, valid_labels.shape)
print(test_features.shape, test_labels.shape)

(408, 12, 16, 64) (408,)
(149, 12, 16, 64) (149,)
(192, 12, 16, 64) (192,)


In [0]:
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
clf.fit(np.reshape(train_features, (len(train_features), 16*12*64)), train_labels)
valid_prediction = clf.predict(np.reshape(valid_features, (len(valid_features), 16*12*64)))
valid_score = clf.score(np.reshape(valid_features, (len(valid_features), 16*12*64)), valid_labels)
fpr, tpr, thresholds = metrics.roc_curve(valid_labels, valid_prediction, pos_label=1)
valid_auc = metrics.auc(fpr, tpr)

print("score: {}".format(valid_score))
print("AUC: {}".format(valid_auc))

score: 0.9664429530201343
AUC: 0.9593877551020409


In [0]:
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
clf.fit(np.reshape(train_features, (len(train_features), 16*12*64)), train_labels)
test_prediction = clf.predict(np.reshape(test_features, (len(test_features), 16*12*64)))
test_score = clf.score(np.reshape(test_features, (len(test_features), 16*12*64)), test_labels)
fpr, tpr, thresholds = metrics.roc_curve(test_labels, test_prediction, pos_label=1)
test_auc = metrics.auc(fpr, tpr)

print("score: {}".format(test_score))
print("AUC: {}".format(test_auc))

score: 0.96875
AUC: 0.9686956521739131


In [0]:
# divide to 10-folds

# produce 10-fold cross validation and train the model 10 times to get the average val_accuracy
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

kf = KFold(n_splits=10)
kf.get_n_splits(features)

fold_num = 0
count = 0

score_list = []
auc_list = []

for train_index, val_index in kf.split(features):
    clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
    clf.fit(np.reshape(features[train_index], (len(train_index), 388*475*3)), labels[train_index])
    valid_prediction = clf.predict(np.reshape(features[val_index], (len(val_index), 388*475*3)))
    valid_score = clf.score(np.reshape(features[val_index], (len(val_index), 388*475*3)), labels[val_index])
    fpr, tpr, thresholds = metrics.roc_curve(labels[val_index], valid_prediction, pos_label=1)
    valid_auc = metrics.auc(fpr, tpr)
    score_list.append(valid_score)
    auc_list.append(valid_auc)
    
print("mean score is: {}".format(np.mean(score_list)))
print("mean AUC is: {}".format(np.mean(auc_list)))


In [0]:
# !pip install keras-vis
!pip install git+https://github.com/raghakot/keras-vis.git

from keras.applications import ResNet50
from vis.utils import utils
from keras import activations

# Hide warnings on Jupyter Notebook
import warnings
warnings.filterwarnings('ignore')

Collecting git+https://github.com/raghakot/keras-vis.git
  Cloning https://github.com/raghakot/keras-vis.git to /tmp/pip-req-build-61rk4mng
Building wheels for collected packages: keras-vis
  Running setup.py bdist_wheel for keras-vis ... [?25l- \ done
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-q11y7kbk/wheels/c5/ae/e7/b34d1cb48b1898f606a5cce08ebc9521fa0588f37f1e590d9f
Successfully built keras-vis
Installing collected packages: keras-vis
Successfully installed keras-vis-0.4.1


# Testing on Untouched Subset of Atlantis Patients and Controls

In [0]:
#TEST Accuracy on fraction from same group results in quite high accuracy but could get higher...
#print(train_generator.filenames)
#print(validation_generator.filenames)

nb_test_samples = 200

result = model.evaluate_generator(test_generator, steps=nb_test_samples / batch_size)
print(result)

predictions = model.predict_generator(test_generator, steps=nb_test_samples / batch_size)
print(np.round(predictions))
print(len(predictions))
img_path = "/gdrive/My Drive/deep learning/Project/testProbMaps/patients/24215.png"

from keras.preprocessing import image                             
import numpy as np

img = image.load_img(img_path, target_size=(img_width, img_height))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.                                               


print(img_tensor.shape)
#validation_generator.classes

# Testing on Early Glaucoma Dataset

In [0]:
#TEST Accuracy on Early Glaucoma Dataset (actually about 50% of these are classified as controls by experts 
#but were patients according to the training group criteria), so 50% accuracy make sense!!
test_dir = "/gdrive/My Drive/test/" 

img_width = 600
img_height = 450

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

nb_test_samples = 102

result = model.evaluate_generator(test_generator, steps=nb_test_samples / batch_size)
print(result)

predictions = model.predict_generator(test_generator, steps=nb_test_samples / batch_size)
print(np.round(predictions))
print(len(predictions))



In [0]:
print(test_generator.filenames)

# Visualizing Layer Activations

In [0]:
import matplotlib.pyplot as plt

#plt.imshow(img_tensor[0])
#plt.show()

from keras import models

layer_outputs = [layer.output for layer in model.layers[:8]]               
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_tensor)
first_layer_activation = activations[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt
plt.figure()
plt.matshow(first_layer_activation[0, :, :, 4], cmap='viridis')

model.summary