In [None]:
import os
import matplotlib.pyplot as plt
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam, Nadam, SGD
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

In [None]:
data_path = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/chest_xray/'

fig, ax = plt.subplots(2, 3, figsize=(16, 8))
ax = ax.ravel()

for i, _set in enumerate(['train', 'val', 'test']):
    set_path = data_path + _set
    ax[i].imshow(plt.imread(set_path+'/NORMAL/'+os.listdir(set_path+'/NORMAL')[1]), cmap='gray')
    ax[i].set_title('Dataset: {}, Class: Normal'.format(_set))
    ax[i+3].imshow(plt.imread(set_path+'/PNEUMONIA/'+os.listdir(set_path+'/PNEUMONIA')[1]), cmap='gray')
    ax[i+3].set_title('Dataset: {}, Class: Pneumonia'.format(_set))

In [None]:
inputs = Input(shape=(150, 150, 1))
# Convolutional Block 1
x = Conv2D(filters=16, kernel_size=(4, 4), activation='relu', padding='same')(inputs)
x = Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
# Convolutional Block 2
x = SeparableConv2D(filters=32, kernel_size=(4, 4), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
# Convolutional Block 3
x = SeparableConv2D(filters=64, kernel_size=(4, 4), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
# Convolutional Block 4
x = SeparableConv2D(filters=128, kernel_size=(4, 4), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.25)(x)
# Convolutional Block 5
x = SeparableConv2D(filters=256, kernel_size=(4, 4), activation='relu', padding='same')(x)
x = SeparableConv2D(filters=256, kernel_size=(3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.25)(x)
# Fully Connected Block and Output
x = Flatten()(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(rate=0.75)(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.5)(x)
x = Dense(units=64, activation='relu')(x)
x = Dropout(rate=0.25)(x)
output = Dense(units=2, activation='sigmoid')(x)

In [None]:
# Define precision, recall and f1 score metrics
def precision_metric(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall_metric(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def f1_score_metric(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# Create data generators
datagen = ImageDataGenerator()
traingenerator = datagen.flow_from_directory(data_path + 'train',
                                             target_size=(150, 150),
                                             color_mode="grayscale",
                                             shuffle=True,
                                             seed=1,
                                             batch_size=16)

valgenerator = datagen.flow_from_directory(data_path + '/val', 
                                           target_size=(150, 150),
                                           color_mode="grayscale",
                                           shuffle=True,
                                           seed=1,
                                           batch_size=16)

testgenerator = datagen.flow_from_directory(data_path + '/test',
                                            target_size=(150, 150),
                                            shuffle=False,
                                            color_mode="grayscale",
                                            batch_size=16)

In [None]:
# Set model parameters in the following cells (optimizer, loss, metrics)
model = Model(inputs=inputs, outputs=output)

In [None]:
# Model 1 (history1) = Default Adam
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', precision_metric, recall_metric, f1_score_metric])

In [None]:
# Model 2 (history2) = Adam + Nesterov (Nadam)
model.compile(optimizer='Nadam',
              loss='binary_crossentropy',
              metrics=['accuracy', precision_metric, recall_metric, f1_score_metric])

In [None]:
# Model 3 (history3) = Default SGD
model.compile(optimizer='SGD',
              loss='binary_crossentropy',
              metrics=['accuracy', precision_metric, recall_metric, f1_score_metric])

In [None]:
# Model 4 (history4) = SGD + Momentum
opt = SGD(momentum=0.85)
model.compile(optimizer=opt,
              loss='binary_crossentropy',
              metrics=['accuracy', precision_metric, recall_metric, f1_score_metric])

In [None]:
# Model 5 (history5) = SGD + Nesterov
opt = SGD(nesterov=True)
model.compile(optimizer=opt,
              loss='binary_crossentropy',
              metrics=['accuracy', precision_metric, recall_metric, f1_score_metric])

In [None]:
epochs = 10
batch_size = 64
checkpoint = ModelCheckpoint(filepath='bw.hdf5', save_best_only=True, save_weights_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)

history = model.fit_generator(traingenerator, 
                              steps_per_epoch = traingenerator.samples // batch_size,
                              epochs = epochs, 
                              validation_data = testgenerator, 
                              validation_steps = testgenerator.samples // batch_size,
                              callbacks = [checkpoint])

In [None]:
#load best weights
model.load_weights('bw.hdf5')

In [None]:
# Evaluate the model
no_steps = len(testgenerator)
loss, accuracy, precision, recall, f1_score = model.evaluate_generator(testgenerator, 
                                                                       steps=no_steps, 
                                                                       verbose=True)
print("Loss: " + str(loss))
print("Accuracy: " + str(accuracy))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F1 Score: " + str(f1_score))

In [None]:
fig, ax = plt.subplots(5, 1, figsize=(6, 30))
ax = ax.ravel()

for i, met in enumerate(['loss', 'acc', 'precision_metric', 'recall_metric', 'f1_score_metric']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epoch')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [None]:
predict = model.predict_generator(testgenerator, steps = no_steps)
print(predict)

In [None]:
# to save and load history
import pickle
class ModelHistory(object):
    def __init__(self, history, epoch, params):
        self.history = history
        self.epoch = epoch
        self.params = params

In [None]:
# save
save_path = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history5_v2'
with open(save_path, 'wb') as file:
    model_history = ModelHistory(history.history, history.epoch, history.params)
    pickle.dump(model_history, file, pickle.HIGHEST_PROTOCOL)

In [None]:
# load
path1 = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history1_v2'
with open(path1, 'rb') as file:
    history1 = pickle.load(file)
    
path2 = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history2_v2'
with open(path2, 'rb') as file:
    history2 = pickle.load(file)
    
path3 = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history3_v2'
with open(path3, 'rb') as file:
    history3 = pickle.load(file)
    
path4 = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history4_v2'
with open(path4, 'rb') as file:
    history4 = pickle.load(file)
    
path5 = '/Users/pinarayaz/Jupyter/Mathematical Foundations of Data Science/history5_v2'
with open(path5, 'rb') as file:
    history5 = pickle.load(file)

In [None]:
# Comparisons for all models - TRAIN
fig, ax = plt.subplots(3, 2, figsize=(16, 20))
ax = ax.ravel()

for i, met in enumerate(['loss', 'acc', 'precision_metric', 'recall_metric', 'f1_score_metric']):
    ax[i].plot(history1.history[met])
    ax[i].plot(history2.history[met])
    ax[i].plot(history3.history[met])
    ax[i].plot(history4.history[met])
    ax[i].plot(history5.history[met])
    
    ax[i].set_title('Train {}'.format(met))
    ax[i].set_xlabel('epoch')
    ax[i].set_ylabel(met)
    ax[i].legend(['Adam', 'NAdam', 'SGD', 'SGD + momentum', 'SGD + Nesterov'])
    
fig.delaxes(ax.flatten()[5])

In [None]:
# Comparisons for all models - VAL
fig, ax = plt.subplots(5, 1, figsize=(6, 30))
ax = ax.ravel()

for i, met in enumerate(['loss', 'acc', 'precision_metric', 'recall_metric', 'f1_score_metric']):
    ax[i].plot(history1.history['val_' + met])
    ax[i].plot(history2.history['val_' + met])
    ax[i].plot(history3.history['val_' + met])
    ax[i].plot(history4.history['val_' + met])
    ax[i].plot(history5.history['val_' + met])
    
    ax[i].set_title('Validation {}'.format(met))
    ax[i].set_xlabel('epoch')
    ax[i].set_ylabel(met)
    ax[i].legend(['Adam', 'NAdam', 'SGD', 'SGD + momentum', 'SGD + Nesterov'])