In [0]:
#Importing Data from Google Drive
from google.colab import drive
drive.mount('/gdrive')


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
from keras.preprocessing.image import ImageDataGenerator

#Directory paths
train_dir = "/gdrive/My Drive/newCircleData/Train/"
val_dir = "/gdrive/My Drive/newCircleData/Val/"
test_dir = "/gdrive/My Drive/newCircleData/Test/"

#Image dims and training details
img_width = 600
img_height = 450
batch_size = 1
channels = 3
epochs = 50
nb_train_samples = 395 #408
nb_valid_samples = 145 #149
nb_test_samples = 197 #192

#Loading data using Keras ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)             
valid_datagen = ImageDataGenerator(rescale=1./255)    
test_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True)   

valid_generator = valid_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True) #weight toward one class or another

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

label_mapT = train_generator.class_indices
print(label_mapT)

label_mapV = valid_generator.class_indices
print(label_mapV)

label_mapTe = test_generator.class_indices
print(label_mapTe)

Using TensorFlow backend.


Found 395 images belonging to 2 classes.
Found 145 images belonging to 2 classes.
Found 197 images belonging to 2 classes.
{'G': 0, 'S': 1}
{'G': 0, 'S': 1}
{'G': 0, 'S': 1}


In [0]:
import numpy as np 
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Model, Input
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Softmax, Flatten, Dense, BatchNormalization 
from keras.metrics import categorical_accuracy
from keras import backend as K
from keras import regularizers
import tensorflow as tf
from keras.models import Sequential

from PIL import Image

Image.MAX_IMAGE_PIXELS = None

from keras import layers

from keras.callbacks import TensorBoard

from keras import layers
from keras.applications import resnet50
from keras import optimizers
from keras.applications import VGG16

#pretrained VGG16 on imagenet
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(img_height, img_width, channels))

conv_base.summary()

#Extracting features from OCT data using pretrained VGG
def extract_features(dataset_type, sample_count):
    features = np.zeros(shape=(sample_count, 14, 18, 512))
    labels = np.zeros(shape=(sample_count))
    i = 0
    if dataset_type == "train":
        for inputs_batch, labels_batch in train_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break   
    elif dataset_type == "valid":
        for inputs_batch, labels_batch in valid_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    else:
        for inputs_batch, labels_batch in test_generator:
            features_batch = conv_base.predict(inputs_batch)
            features[i * batch_size : (i + 1) * batch_size] = features_batch
            labels[i * batch_size : (i + 1) * batch_size] = labels_batch
            i += 1
            if i * batch_size >= sample_count:
                break
    return features, labels

train_features, train_labels = extract_features("train", nb_train_samples)
valid_features, valid_labels = extract_features("valid", nb_valid_samples)
test_features, test_labels = extract_features("test", nb_test_samples)


print(train_features.shape, train_labels.shape)
print(valid_features.shape, valid_labels.shape)
print(test_features.shape, test_labels.shape)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 450, 600, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 450, 600, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 450, 600, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 225, 300, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 225, 300, 128)     73856     
__________________________________________________________

In [0]:
#Random Forest Training, Testing on validation and test sets
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn import metrics
from sklearn.metrics import roc_auc_score

clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, bootstrap=True)
trainResult = clf.fit(np.reshape(train_features, (395, 14*18*512)), train_labels)

valid_prediction = clf.predict(np.reshape(valid_features, (145, 14*18*512)))
print("validation accuracy:", sum([valid_prediction[i] == valid_labels[i] for i in range(len(valid_labels))])/len(valid_labels))

test_prediction = clf.predict(np.reshape(test_features, (197, 14*18*512)))
print("test accuracy:", sum([test_prediction[i] == test_labels[i] for i in range(len(test_labels))])/len(test_labels))

trainResult = clf.fit(np.reshape(train_features, (395, 14*18*512)), train_labels)
test_score = trainResult.predict_proba(np.reshape(test_features, (197, 14*18*512)))
print(test_score)

#Saving data for creating ROC curve
import scipy.io as sio
sio.savemat('/gdrive/My Drive/newCircleData/vgg_test_score.mat', {'vgg_test_score':test_score[:, ]})
sio.savemat('/gdrive/My Drive/newCircleData/vgg_test_label.mat', {'vgg_test_label':test_labels[:, ]})

print('ROC AUC is', roc_auc_score(test_labels, test_prediction, 'weighted'))

print([i for i in range(len(test_labels)) if test_prediction[i] != test_labels[i]])

#Getting false positives and false negatives
filenames = test_generator.filenames   
FP_list = []
FN_list = []

#FP
for i in range(len(test_labels)):
  if test_labels[i] == 0 and test_prediction[i] == 1:
    FP_list.append(filenames[i])

#FN
for i in range(len(test_labels)):
  if test_labels[i] == 1 and test_prediction[i] == 0:
    FN_list.append(filenames[i])
    
print(FP_list)
print(FN_list)

validation accuracy: 0.9448275862068966
test accuracy: 0.9390862944162437
[[0.28 0.72]
 [0.67 0.33]
 [0.7  0.3 ]
 [0.69 0.31]
 [0.78 0.22]
 [0.85 0.15]
 [0.84 0.16]
 [0.78 0.22]
 [0.73 0.27]
 [0.85 0.15]
 [0.15 0.85]
 [0.83 0.17]
 [0.64 0.36]
 [0.86 0.14]
 [0.72 0.28]
 [0.82 0.18]
 [0.45 0.55]
 [0.8  0.2 ]
 [0.59 0.41]
 [0.79 0.21]
 [0.4  0.6 ]
 [0.88 0.12]
 [0.83 0.17]
 [0.46 0.54]
 [0.79 0.21]
 [0.89 0.11]
 [0.65 0.35]
 [0.54 0.46]
 [0.79 0.21]
 [0.83 0.17]
 [0.87 0.13]
 [0.86 0.14]
 [0.8  0.2 ]
 [0.73 0.27]
 [0.8  0.2 ]
 [0.28 0.72]
 [0.51 0.49]
 [0.71 0.29]
 [0.79 0.21]
 [0.76 0.24]
 [0.79 0.21]
 [0.81 0.19]
 [0.71 0.29]
 [0.69 0.31]
 [0.74 0.26]
 [0.8  0.2 ]
 [0.8  0.2 ]
 [0.8  0.2 ]
 [0.85 0.15]
 [0.78 0.22]
 [0.81 0.19]
 [0.89 0.11]
 [0.82 0.18]
 [0.87 0.13]
 [0.66 0.34]
 [0.79 0.21]
 [0.06 0.94]
 [0.24 0.76]
 [0.35 0.65]
 [0.   1.  ]
 [0.14 0.86]
 [0.54 0.46]
 [0.15 0.85]
 [0.3  0.7 ]
 [0.27 0.73]
 [0.   1.  ]
 [0.15 0.85]
 [0.55 0.45]
 [0.28 0.72]
 [0.07 0.93]
 [0.   1.  ]
 [0