<a href="https://colab.research.google.com/github/dexter7662/Pneumonia-identification-using-VGG19/blob/main/Pneumonia_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from keras_preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten

Importing the dataset

In [None]:
os.environ['KAGGLE_USERNAME'] = "lorddexter"
os.environ['KAGGLE_KEY'] = "281706d18d96f899cdc0f5617516c67a"
!kaggle datasets download -d khoongweihao/covid19-xray-dataset-train-test-sets

In [None]:
!unzip covid19-xray-dataset-train-test-sets.zip

In [None]:
main = 'xray_dataset_covid19/'

In [None]:
train_normal = os.listdir(main + 'train/NORMAL')
train_pneumonia = os.listdir(main + 'train/PNEUMONIA')

test_normal = os.listdir(main + 'test/NORMAL')
test_pneumonia = os.listdir(main + 'test/PNEUMONIA')

Creating dataframes from directory

In [None]:
def dataframe(filenames, classes):
  return pd.DataFrame({'Filename': filenames, 'Label': [classes]*len(filenames)})

train_normal_df = dataframe(train_normal, 'Normal')
train_pneumonia_df = dataframe(train_pneumonia, 'Pneumonia')

test_normal_df = dataframe(test_normal, 'Normal')
test_pneumonia_df = dataframe(test_pneumonia, 'Pneumonia')

train_df = pd.concat([train_normal_df, train_pneumonia_df], axis=0)
test_df = pd.concat([test_normal_df, test_pneumonia_df], axis=0)

In [None]:
def pathmaker(df_name, df, empty_list):
    for i in df_name.values:
        if i[1] == 'Normal':
            empty_list.append(str(main + df + '/NORMAL/'+i[0]))
        else:
            empty_list.append(str(main + df + '/PNEUMONIA/'+i[0]))


#Empty list to be passed in path maker
train_path = []
test_path = []

#Assigning Path maker
pathmaker(train_df, 'train', train_path)
pathmaker(test_df, 'test', test_path)

train_df['Path'] = train_path
test_df['Path'] = test_path

#Shuffling / Re-arranging rows
train_df = train_df.sample(frac=1).reset_index(drop=True).iloc[:, 1:]
test_df = test_df.sample(frac=1).reset_index(drop=True).iloc[:, 1:]

train_df.head()

Checking for any class inbalance

In [None]:
sns.set_theme(style="darkgrid")
sns.countplot(x='Label', data=train_df)

In [None]:
sns.set_theme(style="darkgrid")
sns.countplot(x='Label', data=test_df)

Creating the data input pipeline

In [None]:
datagen = ImageDataGenerator(rescale=1./255., validation_split=0.2)

In [None]:
train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col='Path',
    y_col='Label',
    subset='training',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='categorical',
    target_size=(224, 224),
    validate_filenames=False
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col='Path',
    y_col='Label',
    subset='validation',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='categorical',
    target_size=(224, 224),
    validate_filenames=False
)

test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=None,
    x_col='Path',
    y_col=None,
    batch_size=32,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=(224, 224),
    validate_filenames=False
)

Designing model architecture

In [None]:
input = Input(shape=(224,224,3))
conv1 = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(input)
conv2 = Conv2D(filters=64,kernel_size=(3,3),strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv1)
pool1 = MaxPooling2D(pool_size=(2,2), strides=2)(conv2)
conv3 = Conv2D(filters=128, kernel_size=(3,3),strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(pool1)
conv4 = Conv2D(filters=128, kernel_size=(3,3),strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv3)
pool2 = MaxPooling2D(pool_size=(2,2), strides=2)(conv4)
conv5 = Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(pool2)
conv6 = Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv5)
conv7 = Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv6)
pool3 = MaxPooling2D(pool_size=(2,2), strides=2)(conv7)
conv8 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(pool3)
conv9 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv8)
conv10 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv9)
pool4 = MaxPooling2D(pool_size= (2,2), strides= 2)(conv10)
conv11 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(pool4)
conv12 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv11)
conv13 = Conv2D(filters=512, kernel_size=(3,3), strides=(1,1), padding='same', kernel_initializer= 'random_normal', bias_initializer= 'zeros')(conv12)
pool5 = MaxPooling2D(pool_size= (2,2), strides= 2)(conv13)
flatten = Flatten()(pool5)
dense1 = Dense(units=4096,activation='relu', kernel_regularizer= l2(0.0005), kernel_initializer= 'random_normal', bias_initializer= 'zeros')(flatten)
dropout1 = Dropout(rate=0.5)(dense1)
dense2 = Dense(units=4096, activation='relu', kernel_regularizer= l2(0.0005), kernel_initializer= 'random_normal', bias_initializer= 'zeros')(dropout1)
dropout2 = Dropout(rate=0.5)(dense2)
output = Dense(units=2, activation='sigmoid', kernel_regularizer= l2(0.0005), kernel_initializer= 'random_normal', bias_initializer= 'zeros')(dropout2)
A = Model(inputs = input, outputs = output)
A.summary()



In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name="Adam")
A.compile(optimizer= opt, metrics=['accuracy'], loss='categorical_crossentropy')

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if (logs.get('accuracy') >= 0.9):
      print('\nReached 90% accuracy so cancelling training!')
      self.model.stop_training = True

myCallback_object = myCallback()

In [None]:
model = A.fit(x=train_generator, validation_data= validation_generator, epochs=25, callbacks=myCallback_object, batch_size=148)

Generating predictions using model

In [None]:
test_generator.reset()
y_pred = A.predict(test_generator)
print(y_pred)

Converting Predictions to Labels

In [None]:
predicted_class_indices=np.argmax(y_pred,axis=1)

In [None]:
names = (train_generator.class_indices)
names = dict((v,k) for k,v in names.items())
predictions = [names[k] for k in predicted_class_indices]
print(predictions)

Calculating and Visualizing Confusion matrix

In [None]:
sns.heatmap(confusion_matrix(test_df['Label'], predictions), annot=True, cmap='inferno_r', square=True)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Confusion Matrix')

In [None]:
accuracy = accuracy_score(test_df['Label'], predictions)
print(accuracy)

precision = precision_score(test_df['Label'], predictions, average='micro')
print(precision)


recall = recall_score(test_df['Label'], predictions, average='micro')
print(recall)

f1 = f1_score(test_df['Label'], predictions, average='micro')
print(f1)