# Mounting Google Drive

In [0]:
#Importing data via Google Drive Mount

from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


# Loading Train, Val, and Test Data Using Keras ImageDataGenerator

In [0]:
#Directory paths
train_dir = "/gdrive/My Drive/newCircleData/Train/"
val_dir = "/gdrive/My Drive/newCircleData/Val/"
test_dir = "/gdrive/My Drive/newCircleData/Test/"

#Image dims and training details
img_width = 600
img_height = 450
batch_size = 1
channels = 3
epochs = 50
nb_train_samples = 395
nb_valid_samples = 145
nb_test_samples = 197

#Keras ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
valid_datagen = ImageDataGenerator(rescale=1./255)    
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True)   

valid_generator = valid_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True) #weight toward one class or another

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

Using TensorFlow backend.


Found 395 images belonging to 2 classes.
Found 145 images belonging to 2 classes.
Found 197 images belonging to 2 classes.


# **Loading Keras Pre-trained CNN & Extracting OCT Image Features**

In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential
from keras.applications.inception_v3 import InceptionV3

from keras import layers

from keras.callbacks import TensorBoard

#from keras.layers import Input, Dense
from keras import layers
from keras import optimizers
from keras.applications import ResNet50#, VGG16

conv_base = InceptionV3(weights='imagenet', include_top=False, input_shape=(img_height, img_width, channels), classes=2)

#Feature dimensions for each model
# conv_base = ResNet50(include_top=False, weights='imagenet',input_shape=(img_height, img_width, channels), pooling=None)

# For VGG16
# model_1st = 14
# model_2nd = 18
# model_3rd = 512

# For ResNet50
# model_1st = 15
# model_2nd = 19
# model_3rd = 2048

# For ResNet18
# model_1st = 15
# model_2nd = 19
# model_3rd = 512

# For InceptionV3
model_1st = 12
model_2nd = 17
model_3rd = 2048

#Extracting features from OCT data using pretrained VGG
def extract_features(dataset_type, sample_count):
    features = np.zeros(shape=(sample_count, model_1st, model_2nd, model_3rd))
    labels = np.zeros(shape=(sample_count))
    i = 0
    if dataset_type == "train":
        for inputs_batch, labels_batch in train_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break   
    elif dataset_type == "valid":
        for inputs_batch, labels_batch in valid_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    else:
        for inputs_batch, labels_batch in test_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    return features, labels

train_features, train_labels = extract_features("train", nb_train_samples)
valid_features, valid_labels = extract_features("valid", nb_valid_samples)
test_features, test_labels = extract_features("test", nb_test_samples)


print(train_features.shape, train_labels.shape)
print(valid_features.shape, valid_labels.shape)
print(test_features.shape, test_labels.shape)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
(395, 12, 17, 2048) (395,)
(145, 12, 17, 2048) (145,)
(197, 12, 17, 2048) (197,)


# Classifier Layer: Random Forest

In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
clf.fit(np.reshape(train_features, (nb_train_samples, model_2nd*model_1st*model_3rd)), train_labels)

prediction = clf.predict(np.reshape(valid_features, (nb_valid_samples, model_2nd*model_1st*model_3rd)))
print("validation accuracy:", sum([prediction[i] == valid_labels[i] for i in range(len(valid_labels))])/len(valid_labels))

prediction = clf.predict(np.reshape(test_features, (nb_test_samples, model_2nd*model_1st*model_3rd)))
print("test accuracy:", sum([prediction[i] == test_labels[i] for i in range(len(test_labels))])/len(test_labels))

print(test_labels)
print(prediction)

print('ROC AUC is', roc_auc_score(test_labels, prediction, 'weighted'))

print([i for i in range(len(test_labels)) if prediction[i] != test_labels[i]])

validation accuracy: 0.9379310344827586
test accuracy: 0.934010152284264
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1.]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 

# Getting False Positives and False Negatives

In [0]:

# control_list, patient_list = test_generator.filenames[:141], test_generator.filenames[142:]

# print(test_generator.filenames[:141])
# print(test_generator.filenames[142:])

# control_idx_list = [2, 7, 12, 33, 34, 40]
# patient_idx_list = [0, 3, 7, 10, 16, 20, 35, 54]

# from google.colab import files

# patient_name_list = [patient_list[i] for i in patient_idx_list]
# control_name_list = [control_list[i] for i in control_idx_list]

# for i in range(len(patient_name_list)):
#     files.download(test_dir+patient_name_list[i])

# for i in range(len(control_name_list)):
#     files.download(test_dir+control_name_list[i])

filenames = test_generator.filenames   
FP_list = []
FN_list = []

    #FP
for i in range(len(test_labels)):
  if test_labels[i] == 0 and prediction[i] == 1:
    FP_list.append(filenames[i])

#FN
for i in range(len(test_labels)):
  if test_labels[i] == 1 and prediction[i] == 0:
    FN_list.append(filenames[i])
    
print(FP_list)
print(FN_list)

['G/19220_RNFL_probability.png', 'G/20163_RNFL_probability.png', 'G/24215_RNFL_probability.png', 'G/25208_RNFL_probability.png', 'G/26768_RNFL_probability.png', 'G/28015_RNFL_probability.png', 'G/30631_RNFL_probability.png']
['S/19088_RNFL_probability.png', 'S/19516_RNFL_probability.png', 'S/21395_RNFL_probability.png', 'S/27449_RNFL_probability.png', 'S/31801_RNFL_probability.png', 'S/View2168.png']
