In [0]:
import os
import numpy as np
import keras
from keras.models import Sequential, save_model, load_model
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.preprocessing.image import *
from keras.applications.vgg16 import VGG16, preprocess_input

Mount my Google Drive folder

In [13]:
from google.colab import drive
drive.mount('/content/gdrive/')

# setting some useful path variables
path_proj = 'gdrive/My Drive/Projects_2019/Chest X-Ray Images (Pneumonia)/'
path_model = path_proj + 'Model/'
path_data = path_proj + 'Data/'
path_data_trn = path_proj + 'Data/train/'
path_data_val = path_proj + 'Data/test/'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


Load from my Google Drive folder

In [14]:
# train:val is set to be 8:2
imgen = ImageDataGenerator()
X_trn = imgen.flow_from_directory(path_data_trn, batch_size=64, target_size=(224,224), shuffle=True, class_mode='binary')
X_val = imgen.flow_from_directory(path_data_val, batch_size=64, target_size=(224,224), shuffle=True, class_mode='binary')

Found 5216 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [17]:
from keras.applications.resnet50 import ResNet50

RN50 = ResNet50(weights='imagenet', pooling=max, include_top = False, input_shape=(224,224,3)) 

for layer in RN50.layers:
    layer.trainable = False



In [18]:
# Just to make sure model is clean before adding layers to it
model = None
model = Sequential()
# "Borrow" VGG16 as a feature extractor
model.add(RN50)
# Concatenate the feature extracted from VGG16
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='softmax'))
model.add(Dropout(0.2))

# Checking the model architecture
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten_2 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               51380736  
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               65664     
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                4128      
__________

Learning rate finder for Keras
Package downloaded via Pip is not updated, so I just copied the links here

https://github.com/surmenok/keras_lr_finder/blob/master/keras_lr_finder/lr_finder.py

https://towardsdatascience.com/estimating-optimal-learning-rate-for-a-deep-neural-network-ce32f2556ce0

In [0]:
# If there's no improvement for 5 epochs, then training will stop
ES = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, min_delta=0.01)
# Saving the best model during training
CP = keras.callbacks.ModelCheckpoint(path_model + 'RN50_512_128_32_8_checkpoint.hdf5', save_best_only=True)

In [0]:
# Specify the optimiser, initial learning rate, loss function and metrics
model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=['acc'])

In [0]:
# Train the model
history = model.fit_generator(X_trn, steps_per_epoch=5216/64, epochs=9999, validation_data=X_val, validation_steps=624/64, callbacks=[ES, CP])

# Save the model after training
model.save(path_model + 'RN50_512_128_32_8.hdf5')
print('checkpoint is saved as: ' + path_model + 'RN50_512_128_32_8.hdf5')

Epoch 1/9999
Epoch 2/9999
Epoch 3/9999
Epoch 4/9999


In [0]:
# Load the best model during training
model = load_model(path_model + 'RN50_512_128_32_8.hdf5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [0]:
# Output class probabilities using the best model
p_hat_trn = model.predict_generator(X_trn, steps=5216/64)
p_hat_val = model.predict_generator(X_val, steps=624/64)

# Output class predictions using the best model
y_hat_trn = np.squeeze([[np.argmax(p_hat_trn[i])] for i in range(len(p_hat_trn))])
y_hat_val = np.squeeze([[np.argmax(p_hat_val[i])] for i in range(len(p_hat_val))])

In [0]:
acc_trn = np.sum([[y_hat_trn[i]==X_trn.labels[i]] for i in range(len(y_hat_trn))])/len(y_hat_trn)
acc_val = np.sum([[y_hat_val[i]==X_val.labels[i]] for i in range(len(y_hat_val))])/len(y_hat_val)

print(acc_trn)
print(acc_val)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])