# Mounting Google Drive

In [0]:
##### before running it, make sure you don't have lots of big files in your google drive
##### otherwise it's going to take too long to finish running it before giving the TIMEOUT error
##### also save the train_controls, train_patients, val_controls, val_patients to your drive and
##### create a "train" folder with train_controls, train_patients in it, and 
##### a "val" folder with val_controls, val_patients in it.
##### change the train_dir and val_dir in the next cell to the dir of your train and val folder
##### e.g. my train and val folder are saved in the "deep learning" dir

from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


# Loading Train, Val, and Test Data Using Keras ImageDataGenerator

In [0]:
#Kavi's Dir
train_dir = "/gdrive/My Drive/trainProbMaps/"
val_dir = "/gdrive/My Drive/valProbMaps/"
test_dir = "/gdrive/My Drive/EGD/"

# Daniel's dir:
# train_dir = "/gdrive/My Drive/deep learning/Project/trainProbMaps/"
# val_dir = "/gdrive/My Drive/deep learning/Project/valProbMaps/"
# test_dir = "/gdrive/My Drive/deep learning/Project/testProbMaps/"

img_width = 224
img_height = 224
batch_size = 1
channels = 3
epochs = 50
nb_train_samples = 408
nb_valid_samples = 149
nb_test_samples = 192

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
valid_datagen = ImageDataGenerator(rescale=1./255)    
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True)   

valid_generator = valid_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True) #weight toward one class or another

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

label_mapT = train_generator.class_indices
print(label_mapT)

label_mapV = valid_generator.class_indices
print(label_mapV)

label_mapTe = test_generator.class_indices
print(label_mapTe)

Found 408 images belonging to 2 classes.
Found 149 images belonging to 2 classes.
Found 192 images belonging to 2 classes.
{'controls': 0, 'patients': 1}
{'controls': 0, 'patients': 1}
{'controls': 0, 'patients': 1}


# Building the Model Architecture (Pre-trained Keras VGG16 Extracting Features from OCT Dataset) & Training the Model

In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from PIL import Image

Image.MAX_IMAGE_PIXELS = None

from keras import layers

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras import layers
from keras.applications import resnet50
from keras import optimizers
from keras.applications import VGG16

#pretrained VGG16 on imagenet, starting to replicate Hassan's work
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(img_height, img_width, channels))

#conv_base.summary()

#Extracting features from OCT data using pretrained VGG
def extract_features(dataset_type, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))
    labels = np.zeros(shape=(sample_count))
    i = 0
    if dataset_type == "train":
        for inputs_batch, labels_batch in train_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break   
    elif dataset_type == "valid":
        for inputs_batch, labels_batch in valid_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    else:
        for inputs_batch, labels_batch in test_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    return features, labels

train_features, train_labels = extract_features("train", nb_train_samples)
valid_features, valid_labels = extract_features("valid", nb_valid_samples)
test_features, test_labels = extract_features("test", nb_test_samples)


print(train_features.shape, train_labels.shape)
print(valid_features.shape, valid_labels.shape)
print(test_features.shape, test_labels.shape)

from google.colab import files
test_features = np.reshape(test_features, (192, 7*7*512))
print(test_features.shape)
np.savetxt('LabelsTest.txt', test_labels)
files.download('LabelsTest.txt')
np.savetxt('VGG_RF_FeaturesTest.txt', test_features)
files.download('VGG_RF_FeaturesTest.txt')

(408, 7, 7, 512) (408,)
(149, 7, 7, 512) (149,)
(192, 7, 7, 512) (192,)
(192, 25088)


In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from PIL import Image

Image.MAX_IMAGE_PIXELS = None

from keras import layers

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras import layers
from keras.applications import resnet50
from keras import optimizers
from keras.applications import VGG16

import keras

#img_width = 475
#img_height = 388

img_width = 224
img_height = 224

from PIL import Image

Image.MAX_IMAGE_PIXELS = None

#EGD data
egd_dir = "/gdrive/My Drive/EGD/"

egd_datagen = ImageDataGenerator(rescale=1./255)

egd_generator = egd_datagen.flow_from_directory(
    egd_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True)

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(img_height, img_width, channels))

conv_base.summary()
# inp = keras.layers.Input(shape=(img_height, img_width, channels), name='image_input')

# vgg_model = VGG16(weights='imagenet',
#                   include_top=False)
# vgg_model.trainable = False

# x = keras.layers.Flatten(name='flatten')(vgg_model)
# x = keras.layers.Dense(4096, activation='relu', name='fc1')(x)
# x = keras.layers.Dense(4096, activation='relu', name='fc2')(x)
# x = keras.layers.Dense(1000, activation='softmax', name='predictions')(x)
# new_model = keras.models.Model(inputs=inp, outputs=x)
# new_model.compile(optimizer='adam', loss='categorical_crossentropy', 
#                   metrics=['accuracy'])

# new_model.fit_generator(
#     egd_generator,
#     steps_per_epoch=102 / batch_size,
#     epochs=epochs,
#      validation_data=validation_generator,
#      validation_steps=nb_validation_samples / batch_size,
#     shuffle=True) # // batch_size)



features = np.zeros(shape=(102, 7, 7, 512))
labels = np.zeros(shape=(102))
i = 0
for inputs_batch, labels_batch in egd_generator:
    features_batch = conv_base.predict(inputs_batch)
    features[i * batch_size : (i + 1) * batch_size] = features_batch
    labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= 102:
        break
        
print(features.shape, labels.shape)

# from google.colab import files
#features = np.reshape(features, (102, 7*7*512))
# print(features.shape)
# np.savetxt('Labels.txt', labels)
# files.download('Labels.txt')
# np.savetxt('VGG_RF_Features.txt', features)
# files.download('VGG_RF_Features.txt')

Found 102 images belonging to 2 classes.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None,

In [0]:
# divide to 10-folds

# produce 10-fold cross validation and train the model 10 times to get the average val_accuracy
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn import metrics


kf = KFold(n_splits=10)
kf.get_n_splits(features)

fold_num = 0
count = 0

score_list = []
auc_list = []

for train_index, val_index in kf.split(features):
    clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
    clf.fit(np.reshape(features[train_index], (len(train_index), 7*7*512)), labels[train_index])
    valid_prediction = clf.predict(np.reshape(features[val_index], (len(val_index), 7*7*512)))
    valid_score = clf.score(np.reshape(features[val_index], (len(val_index), 7*7*512)), labels[val_index])
    fpr, tpr, thresholds = metrics.roc_curve(labels[val_index], valid_prediction, pos_label=1)
    valid_auc = metrics.auc(fpr, tpr)
    score_list.append(valid_score)
    auc_list.append(valid_auc)
    print(labels[val_index])
    print(valid_prediction)
    
print("mean score is: {}".format(np.mean(score_list)))
print("mean AUC is: {}".format(np.mean(auc_list)))

#print(labels[val_index])
#print(valid_prediction)

[0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
[0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
[0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
[1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0. 1. 0. 1.]
[0. 0. 0. 0. 0. 1. 0. 1. 0. 1.]
[1. 1. 0. 1. 0. 1. 1. 0. 1. 0.]
[1. 1. 0. 1. 0. 1. 1. 0. 1. 0.]
[0. 1. 0. 0. 0. 0. 0. 1. 0. 1.]
[0. 1. 0. 0. 1. 1. 0. 1. 0. 1.]
[0. 0. 1. 1. 1. 0. 1. 1. 1. 1.]
[0. 1. 1. 1. 1. 0. 1. 1. 1. 1.]
[1. 1. 1. 1. 1. 0. 1. 1. 1. 0.]
[1. 0. 1. 1. 1. 0. 1. 1. 1. 1.]
[1. 1. 1. 0. 1. 1. 1. 0. 0. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
[0. 0. 1. 1. 1. 1. 0. 0. 0. 1.]
[0. 0. 1. 1. 1. 1. 0. 0. 0. 1.]
[1. 1. 1. 0. 0. 1. 0. 0. 1. 1.]
[1. 1. 1. 0. 0. 1. 0. 0. 1. 1.]
mean score is: 0.8918181818181818
mean AUC is: 0.8610119047619047


# Classifier Layer: Random Forest

In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn import metrics

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
clf.fit(np.reshape(train_features, (402, 7*7*512)), train_labels)

valid_prediction = clf.predict(np.reshape(valid_features, (102, 7*7*512)))
# print("validation accuracy:", sum([prediction[i] == valid_labels[i] for i in range(len(valid_labels))])/len(valid_labels))
#print(prediction)
#print(valid_labels)

test_prediction = clf.predict(np.reshape(test_features, (102, 7*7*512)))
# print("test accuracy:", sum([prediction[i] == test_labels[i] for i in range(len(test_labels))])/len(test_labels))
# print(test_prediction)
# print(test_labels)


ValueError: ignored

In [0]:
valid_score = clf.score(np.reshape(valid_features, (102, 7*7*512)), valid_labels)
test_score = clf.score(np.reshape(test_features, (102, 7*7*512)), test_labels)

print("valid accuracy:", valid_score)
print("test accuracy:", test_score)

from sklearn import metrics

fpr, tpr, thresholds = metrics.roc_curve(valid_labels, valid_prediction, pos_label=1)
print("valid AUC:", metrics.auc(fpr, tpr))
fpr, tpr, thresholds = metrics.roc_curve(test_labels, test_prediction, pos_label=1)
print("test AUC:", metrics.auc(fpr, tpr))

ValueError: ignored

In [0]:
print(test_generator.filenames)

In [0]:
control_list, patient_list = test_generator.filenames[:100], test_generator.filenames[100:]

print(test_generator.filenames[:100])
print(test_generator.filenames[100:])

control_idx_list = [9, 61, 65, 70]
patient_idx_list = [32, 43, 46, 48, 52, 86]

from google.colab import files

patient_name_list = [patient_list[i] for i in patient_idx_list]
control_name_list = [control_list[i] for i in control_idx_list]

for i in range(len(patient_name_list)):
    files.download(test_dir+patient_name_list[i])

for i in range(len(control_name_list)):
    files.download(test_dir+control_name_list[i])

# Testing

In [0]:
#print(train_generator.filenames)
#print(validation_generator.filenames)

nb_test_samples = 102

result = model.evaluate_generator(test_generator, steps=nb_test_samples / batch_size)
print(result)

predictions = model.predict_generator(test_generator, steps=nb_test_samples / batch_size)
print(predictions)
print(len(predictions))
img_path = "/gdrive/My Drive/validation/val_patients/patient13.png"

from keras.preprocessing import image                             
import numpy as np

img = image.load_img(img_path, target_size=(img_width, img_height))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.                                               


print(img_tensor.shape)
#validation_generator.classes

NameError: ignored

# Visualizing

In [0]:
import matplotlib.pyplot as plt

#plt.imshow(img_tensor[0])
#plt.show()

from keras import models

layer_outputs = [layer.output for layer in model.layers[:8]]               
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_tensor)
first_layer_activation = activations[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt
plt.figure()
plt.matshow(first_layer_activation[0, :, :, 4], cmap='viridis')

# Visualizing attention map

In [0]:
# !pip install keras-vis
!pip install git+https://github.com/raghakot/keras-vis.git

from keras.applications import ResNet50
from vis.utils import utils
from keras import activations

# Hide warnings on Jupyter Notebook
import warnings
warnings.filterwarnings('ignore')

In [0]:
from vis.utils import utils
from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (18, 6)

# img1 = utils.load_img('/gdrive/My Drive/deep_learning/val/val_controls/_ERGO-0449_OS_2014_triton_wf_report.png', 
#                       target_size=(img_width / 5, img_height / 5))
# img2 = utils.load_img('/gdrive/My Drive/deep_learning/val/val_controls/_ERGO-0450_OD_2014_triton_wf_report.png', 
#                       target_size=(img_width / 5, img_height / 5))

img1 = utils.load_img('/gdrive/My Drive/deep_learning/new_dataset/test/controls/View2098.png', 
                      target_size=(img_width, img_height))
img2 = utils.load_img('/gdrive/My Drive/deep_learning/new_dataset/train/patients/194.png', 
                      target_size=(img_width, img_height))

f, ax = plt.subplots(1, 2)
ax[0].imshow(img1)
ax[1].imshow(img2)

In [0]:
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
from vis.visualization import visualize_cam


# import numpy as np
import matplotlib.cm as cm
# from vis.visualization import visualize_cam


penultimate_layer = utils.find_layer_idx(model, 'conv2d_15')
layer_idx = utils.find_layer_idx(model, 'dense_9')

for modifier in [None, 'guided', 'relu']:
    plt.figure()
    f, ax = plt.subplots(1, 4)
    plt.suptitle("vanilla" if modifier is None else modifier)
    for i, img in enumerate([img1, img2]):    
        # 20 is the imagenet index corresponding to `ouzel`
        grads = visualize_cam(model, layer_idx, filter_indices=0, 
                              seed_input=img, penultimate_layer_idx=penultimate_layer,
                              backprop_modifier=modifier)        
        # Lets overlay the heatmap onto original image.    
        jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
        ax[i].imshow(jet_heatmap)
        ax[i + 2].imshow(img)
#         ax[i].imshow(jet_heatmap)
#         print(jet_heatmap.shape)
#         print(img.shape)
#         ax[i].imshow(overlay(jet_heatmap, img))
