# Download Dataset

In [1]:
! wget https://md-datasets-cache-zipfiles-prod.s3.eu-west-1.amazonaws.com/rscbjbr9sj-2.zip

In [2]:
! unzip rscbjbr9sj-2.zip
! unzip ChestXRay2017.zip
! tar –xvzf OCT2017.tar.gz
! rm -rf *.zip
! rm -rf *.tar.gz
! rm -rf chest_xray/train/NORMAL/.DS_Store
! rm -rf chest_xray/train/PNEUMONIA/.DS_Store
! rm -rf chest_xray/test/NORMAL/.DS_Store
! rm -rf chest_xray/test/PNEUMONIA/.DS_Store

# Data Exploration

In [3]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import os
import random
import cv2
%matplotlib inline

sns.set_style("whitegrid")
random.seed(30)

In [4]:
! ls chest_xray/train/NORMAL | wc -l
! ls chest_xray/train/PNEUMONIA | wc -l

In [5]:
! ls chest_xray/test/NORMAL | wc -l
! ls chest_xray/test/PNEUMONIA | wc -l

In [6]:
normal = []
for choice in random.choices(os.listdir("chest_xray/train/NORMAL"), k=5):
  img = mpimg.imread(f"chest_xray/train/NORMAL/{choice}")
  normal.append(img)

In [7]:
pneumonia = []
for choice in random.choices(os.listdir("chest_xray/train/PNEUMONIA"), k=5):
  img = mpimg.imread(f"chest_xray/train/PNEUMONIA/{choice}")
  pneumonia.append(img)

In [8]:
fig, ax = plt.subplots(ncols=5, nrows=1, figsize=(25,5))
plt.tight_layout(pad=5.0)
title = 'Normal X Rays'
fig.suptitle(title, fontsize=20)

ax[0].imshow(normal[0], cmap="gray")
ax[1].imshow(normal[1], cmap="gray")
ax[2].imshow(normal[2], cmap="gray")
ax[3].imshow(normal[3], cmap="gray")
ax[4].imshow(normal[4], cmap="gray")


In [9]:
fig, ax = plt.subplots(ncols=5, nrows=1, figsize=(25,5))
plt.tight_layout(pad=5.0)
title = 'Pneumonia X Rays'
fig.suptitle(title, fontsize=20)

ax[0].imshow(pneumonia[0], cmap="gray")
ax[1].imshow(pneumonia[1], cmap="gray")
ax[2].imshow(pneumonia[2], cmap="gray")
ax[3].imshow(pneumonia[3], cmap="gray")
ax[4].imshow(pneumonia[4], cmap="gray")


# Creating Validation Set and Data Augmentation

The training set is divided in two parts:


1.   Training Part (90%)
2.   Validation (10%)

Data augmentation is done on both training and test sets

In [10]:
from keras.preprocessing.image import ImageDataGenerator

In [11]:
! mkdir chest_xray/val
! mkdir chest_xray/val/NORMAL
! mkdir chest_xray/val/PNEUMONIA

In [12]:
random.seed(2020)
for choice in random.choices(os.listdir("chest_xray/train/NORMAL"), k=135):
  src = f"chest_xray/train/NORMAL/{choice}"
  dest = f"chest_xray/val/NORMAL/{choice}"
  try:
    os.rename(src, dest)
  except:
    print(choice)

In [13]:
random.seed(2020)
for choice in random.choices(os.listdir("chest_xray/train/PNEUMONIA"), k=388):
  src = f"chest_xray/train/PNEUMONIA/{choice}"
  dest = f"chest_xray/val/PNEUMONIA/{choice}"
  try:
    os.rename(src, dest)
  except:
    print(choice)

In [14]:
datagen = ImageDataGenerator(rescale=1/255,
                           rotation_range=40,
                           width_shift_range=0.2,
                           height_shift_range=0.2,
                           shear_range=0.2,
                           zoom_range=0.2, 
                           horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [15]:
image_height = 200
image_width = 200
batch_size = 10
no_of_epochs  = 100

In [16]:
train_set = datagen.flow_from_directory('chest_xray/train',target_size=(image_width, image_height),batch_size=batch_size,class_mode='binary')
val_set = datagen.flow_from_directory('chest_xray/val',target_size=(image_width, image_height),batch_size=batch_size,class_mode='binary')
test_set = test_datagen.flow_from_directory('chest_xray/test',target_size=(image_width, image_height),batch_size=batch_size,class_mode='binary')

# Creating the Model

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.python.keras.callbacks import ReduceLROnPlateau

In [18]:
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=128, kernel_size=3, activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(512, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.build(input_shape=(None, 200, 200, 3))

In [19]:
opt = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
#opt = SGD()
model.compile(loss='binary_crossentropy',
            optimizer=opt, metrics=['accuracy'])

In [20]:
model.summary()

In [21]:
reduce_learning_rate = ReduceLROnPlateau(monitor='accuracy',
                                         factor=0.5,
                                         patience=2,
                                         cooldown=2,
                                         min_lr=0.000000001,
                                         verbose=1)

callbacks = [reduce_learning_rate]

In [22]:
history = model.fit(train_set,
          validation_data=val_set,
          epochs=100, batch_size=10)

In [23]:
model.save('mycnn2.h5')
model.save('mycnn2')

# Model Evaluation

In [24]:
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
import numpy as np

In [25]:
print("Training Set:")
train_acc = model.evaluate(train_set)

print("Test Set:")
test_acc = model.evaluate(test_set)

## Training Set Confusion Matrix

In [26]:
train_orig = []
for i in range(4727//10 + 1):
    for label in train_set[i][1]:
        train_orig.append(label)
        
train_orig = np.array(train_orig)

In [27]:
train_pred = model.predict(train_set)
train_pred = np.around(train_pred)

In [28]:
cm  = confusion_matrix(train_orig, train_pred)
plt.figure()
plot_confusion_matrix(cm,figsize=(12,8), hide_ticks=True,cmap=plt.cm.Blues)
plt.xticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.yticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.show()

In [29]:
tn, fp, fn, tp = cm.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
specificity = tn/(tn+fp)

print("Sensitivity (Recall): {:.4f}".format(recall))
print("Specificity: {:.4f}".format(specificity))
print("Precision: {:.4f}".format(precision))

In [30]:
tn

## Test Set Confusion Matrix

In [31]:
test_orig = []
for i in range(624//10 + 1):
    for label in test_set[i][1]:
        test_orig.append(label)
        
test_orig = np.array(test_orig)

In [32]:
test_pred = model.predict(test_set)
test_pred = np.around(test_pred)

In [33]:
cm  = confusion_matrix(test_orig, test_pred)
plt.figure()
plot_confusion_matrix(cm,figsize=(12,8), hide_ticks=True,cmap=plt.cm.Blues)
plt.xticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.yticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.show()

In [34]:
tn, fp, fn, tp = cm.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
specificity=tn/(tn+fp)

print("Sensitivity (Recall): {:.4f}".format(recall))
print("Specificity: {:.4f}".format(specificity))
print("Precision: {:.4f}".format(precision))

In [35]:
tn

In [36]:
! tar -cvzf mycnn2.tar.gz mycnn2

In [37]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')

In [40]:
plt.plot(history.history['accuracy'], label="Training Accuracy")
plt.plot(history.history['val_accuracy'],label="Validation Accuracy")
plt.legend()

In [41]:
plt.plot(history.history['loss'], label="Training Accuracy")
plt.plot(history.history['val_loss'],label="Validation Accuracy")
plt.legend()

In [None]:
from tensorflow.keras.models import load_model

In [None]:
loaded = load_model('mycnn2')

In [None]:
loaded.evaluate(test_set)