# Mounting Google Drive

In [2]:
##### before running it, make sure you don't have lots of big files in your google drive
##### otherwise it's going to take too long to finish running it before giving the TIMEOUT error
##### also save the train_controls, train_patients, val_controls, val_patients to your drive and
##### create a "train" folder with train_controls, train_patients in it, and 
##### a "val" folder with val_controls, val_patients in it.
##### change the train_dir and val_dir in the next cell to the dir of your train and val folder
##### e.g. my train and val folder are saved in the "deep learning" dir

from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


# Loading Train, Val, and Test Data Using Keras ImageDataGenerator

In [4]:
train_dir = "/gdrive/My Drive/new_dataset/trainProbMaps/"
val_dir = "/gdrive/My Drive/new_dataset/valProbMaps/"
test_dir = "/gdrive/My Drive/new_dataset/testProbMaps/"

img_width = 600
img_height = 450
batch_size = 5
channels = 3
epochs = 50
nb_train_samples = 410
nb_validation_samples = 150
nb_test_samples = 200

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
valid_datagen = ImageDataGenerator(rescale=1./255)    
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory(
        train_dir, 
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary')   

validation_generator = valid_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary') #weight toward one class or another

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

#Keras takes care of generating labels if the directory structure matches above!
label_mapT = train_generator.class_indices
print(label_mapT)

label_mapV = validation_generator.class_indices
print(label_mapV)

label_mapTe = test_generator.class_indices
print(label_mapTe)

for data_batch, labels_batch in train_generator:
    print ('data batch shape:', data_batch.shape)
    #print(data_batch)
    print('labels batch shape:', labels_batch.shape)
    #print(labels_batch)
    break

Found 410 images belonging to 2 classes.
Found 150 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
{'controls': 0, 'patients': 1}
{'controls': 0, 'patients': 1}
{'controls': 0, 'patients': 1}
data batch shape: (5, 600, 450, 3)
labels batch shape: (5,)


# Building the Model Architecture (Pre-trained Kerass VGG16 Extracting Features from OCT Dataset) & Training the Model

In [11]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from keras import layers

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras import layers
from keras.applications import resnet50
from keras import optimizers
from keras.applications import VGG16

#pretrained VGG16 on imagenet, starting to replicate Hassan's work
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(img_width, img_height, channels))

#model.summary()

#Extracting features from OCT data using pretrained VGG
def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 18, 14, 512))
    labels = np.zeros(shape=(sample_count))
    train_generator = train_datagen.flow_from_directory(
        directory,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary')
    i = 0
    for inputs_batch, labels_batch in train_generator:
        features_batch = conv_base.predict(inputs_batch)
        #print(shape(features_batch))
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break                                                           
    return features, labels

train_features, train_labels = extract_features(train_dir, nb_train_samples)
validation_features, validation_labels = extract_features(val_dir, nb_validation_samples)


Found 410 images belonging to 2 classes.
Found 150 images belonging to 2 classes.


# Classifier Layer: Random Forrest In Progress or Dense Layer

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=nb_validation_samples, n_features=len(validation_features), 
                           n_informative=2, n_redundant=0, random_state=0, shuffle=False)
clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(X, y)
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)
print(clf.feature_importances_)

#print(clf.predict([[validation_features]]))


# model = Sequential()
# model.add(layers.Dense(512, activation='relu', input_dim=18 * 14 * 512))
# model.add(layers.Dropout(0.5))
# model.add(layers.Dense(1, activation='sigmoid'))

# model.summary()  

# model.compile(optimizer=optimizers.RMSprop(lr=0.01), #2e-5
#               loss='binary_crossentropy',
#               metrics=['acc'])

# train_features = np.reshape(train_features, (nb_train_samples, 18 * 14 * 512))
# validation_features = np.reshape(validation_features, (nb_validation_samples, 18 * 14 * 512))

# history = model.fit(train_features, train_labels,
#                     epochs=50,
#                     batch_size=5,
#                     validation_data=(validation_features, validation_labels))

[0.00928052 0.17763431 0.00574641 0.         0.00198298 0.00835966
 0.0035934  0.         0.00535439 0.00534121 0.01146534 0.00151419
 0.         0.01361906 0.00337935 0.00367449 0.         0.01293903
 0.         0.00813975 0.00272854 0.00084522 0.00105284 0.00938653
 0.00425684 0.00431292 0.00483407 0.00435044 0.01312944 0.00209084
 0.00591801 0.         0.0191225  0.004576   0.         0.0012213
 0.00557711 0.00723148 0.00607344 0.00187041 0.00085115 0.00222705
 0.03070598 0.00490528 0.01042616 0.00545176 0.         0.01511682
 0.00480796 0.00450642 0.00988211 0.         0.         0.
 0.01981062 0.01116548 0.         0.00383973 0.00934391 0.00401216
 0.00349472 0.00746213 0.04569979 0.00387129 0.00598896 0.
 0.00480529 0.0083002  0.00150465 0.00337087 0.0026359  0.01401519
 0.00693629 0.01467007 0.         0.         0.00177829 0.0042894
 0.         0.00448167 0.         0.00535985 0.         0.
 0.00201246 0.03164177 0.         0.         0.00195475 0.00351754
 0.         0.       

# Testing

In [0]:
#print(train_generator.filenames)
#print(validation_generator.filenames)

nb_test_samples = 102

result = model.evaluate_generator(test_generator, steps=nb_test_samples / batch_size)
print(result)

predictions = model.predict_generator(test_generator, steps=nb_test_samples / batch_size)
print(predictions)
print(len(predictions))
img_path = "/gdrive/My Drive/validation/val_patients/patient13.png"

from keras.preprocessing import image                             
import numpy as np

img = image.load_img(img_path, target_size=(img_width, img_height))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.                                               


print(img_tensor.shape)
#validation_generator.classes

# Visualizing

In [0]:
import matplotlib.pyplot as plt

#plt.imshow(img_tensor[0])
#plt.show()

from keras import models

layer_outputs = [layer.output for layer in model.layers[:8]]               
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_tensor)
first_layer_activation = activations[0]
print(first_layer_activation.shape)

import matplotlib.pyplot as plt
plt.figure()
plt.matshow(first_layer_activation[0, :, :, 4], cmap='viridis')