# ITW 3 Python Project - Pneumonia detection using Chest X-Ray images by Convolutional Neural Network 

### Importing the libraries

In [1]:
import tensorflow as tf
import numpy as np

# Impotrting image generator module
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

#Importing os module for saving model in h5 format
import os.path
from tensorflow.keras.models import load_model

#Importing for confusion matrix and classification matrix
%matplotlib inline
from sklearn.metrics import classification_report, confusion_matrix
import itertools
import matplotlib.pyplot as plt

In [2]:
# Verifying versions of imported library
print(tf.__version__)
print(np.__version__)

2.6.1
1.19.5


## Part 1 - Data Preprocessing

### Preprocessing the Training set

In [3]:
train_datagen = ImageDataGenerator(
        shear_range = 0.2,
        zoom_range = 0.2, 
        horizontal_flip = True,
        rescale = 1./255)

training_set = train_datagen.flow_from_directory(
    'pneumonia_image_datasets/training_set',
    target_size = (64, 64),
    batch_size = 32,
    class_mode = 'binary')

Found 5216 images belonging to 2 classes.


### Preprocessing the Test set

In [4]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory('pneumonia_image_datasets/test_set',
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'binary')

Found 625 images belonging to 2 classes.


## Part 2 - Building the CNN

### Initialising the CNN

In [5]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [6]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 2 -  Adding a second convolutional layer and Pooling

In [7]:
cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
#pooling second layer
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 3 - Adding a third convolutional layer and Pooling

In [8]:
cnn.add(tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu'))
#pooling third layer
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 3 - Flattening

In [9]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full connection

In [10]:
cnn.add(tf.keras.layers.Dense(units=120, activation='relu'))


### Step 5 - Dropout Layer

In [11]:
cnn.add(tf.keras.layers.Dropout(rate=0.2))

### Step 5 - Output Layer

In [12]:
cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

### Step 6 - Summary

In [13]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0

## Part 3 - Training the CNN

### Compiling the CNN

In [14]:
cnn.compile(optimizer = 'adam',
            loss = 'binary_crossentropy',
            metrics = ['accuracy'],
           )

### Training the CNN on the Training set and evaluating it on the Test set

In [15]:
es = tf.keras.callbacks.EarlyStopping(patience=15, monitor='val_accuracy', restore_best_weights=True)
history = cnn.fit(x = training_set,
                  validation_data = test_set,
                  epochs = 32,
                  batch_size = 8,
                  callbacks = [es])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32

KeyboardInterrupt: 

## Part 4.1 - Making a classification report and Accuracy matrics

In [14]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(32)

plt.figure(figsize=(32, 32))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

NameError: name 'history' is not defined

## Part 4.2 - Confusion matrix

In [None]:
# use rounded prediction variable to round the result 
# where rounded predication is
# rounded_predication = np.argmax(predication_variable, axis=-1)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
cm = confusion_matrix(y_true="label for test set", y_pred="validation result")

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize = False,
                          titel='Confusion matrix',
                          cmap=plt.cm.Blues):
    """"
    This function prints and plots the confusion matrix.
    Normalize can be applied by setting 'normalize=True' .
    """"
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arrange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.xticks(tick_marks, classes)
    
    if normalize:
        cm=cm.astype('float')/cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print("Confusion matrix, without normalization")
    
    print(cm)
    
    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
cm_plot_label = ["normal_sample", "diseased_sample"]
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix ')

## Part 5 - Saving and loading the model

In [15]:
# For saving the model
# if os.path.isfile('model.h5') is False:
#     cnn.save('model.h5')

#For loading the model again
cnn = load_model('model.h5')

In [16]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 4608)              0

## Part 6 - Making a prediction

In [17]:
from keras.preprocessing import image
test_image = image.load_img('person1950_bacteria_4881.jpeg', target_size = (64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = cnn.predict(test_image)
training_set.class_indices
if result[0][0] > 0.5:
    prediction = "It's a pneumonia sample"
else : 
    prediction = "It's a normal sample"

print("\nResult : ")
print(prediction)

# For printing classification report
# predictions = cnn.predict_classes(training_set)
# predictions = predictions.reshape(1,-1)[0]
# print(classification_report(test_set, predictions, target_names = ['Pneumonia (Class 0)','Normal (Class 1)']))


Result : 
It's a pneumonia sample
