# Question 3 : An Efficient CNNModel for COVID-19 Disease Detection Based on X-Ray Image Classification


## Task :

<ul>
<li>Covid detection using CNN networks with low rate of falsy cases.</li>


</ul>


## Import Libraries

In [94]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
import keras.utils as image
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
import os
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

import shutil
from google.colab import drive
from google.colab import files


## Data collection and download

 get the kaggle data-set

In [2]:
uploaded = files.upload()

!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip -q chest-xray-pneumonia.zip

Saving kaggle.json to kaggle.json
Downloading chest-xray-pneumonia.zip to /content
100% 2.29G/2.29G [00:27<00:00, 114MB/s]
100% 2.29G/2.29G [00:27<00:00, 91.2MB/s]


 get the github data-set

In [3]:
!git clone https://github.com/ieee8023/covid-chestxray-dataset.git


Cloning into 'covid-chestxray-dataset'...
remote: Enumerating objects: 3641, done.[K
remote: Total 3641 (delta 0), reused 0 (delta 0), pack-reused 3641[K
Receiving objects: 100% (3641/3641), 632.96 MiB | 32.36 MiB/s, done.
Resolving deltas: 100% (1450/1450), done.
Updating files: 100% (1174/1174), done.


create data for positive samples (Github)

In [6]:
FILE_PATH = "/content/covid-chestxray-dataset/metadata.csv"
IMAGE_PATH = "/content/covid-chestxray-dataset/images/"


In [7]:
df = pd.read_csv(FILE_PATH)
print(df.shape)

(950, 30)


In [8]:
df.head()

Unnamed: 0,patientid,offset,sex,age,finding,RT_PCR_positive,survival,intubated,intubation_present,went_icu,...,date,location,folder,filename,doi,url,license,clinical_notes,other_notes,Unnamed: 29
0,2,0.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 22, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-a-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
1,2,3.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 25, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-b-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
2,2,5.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 27, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-c-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
3,2,6.0,M,65.0,Pneumonia/Viral/COVID-19,Y,Y,N,N,N,...,"January 28, 2020","Cho Ray Hospital, Ho Chi Minh City, Vietnam",images,auntminnie-d-2020_01_28_23_51_6665_2020_01_28_...,10.1056/nejmc2001272,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,"On January 22, 2020, a 65-year-old man with a ...",,
4,4,0.0,F,52.0,Pneumonia/Viral/COVID-19,Y,,N,N,N,...,"January 25, 2020","Changhua Christian Hospital, Changhua City, Ta...",images,nejmc2001573_f1a.jpeg,10.1056/NEJMc2001573,https://www.nejm.org/doi/full/10.1056/NEJMc200...,,diffuse infiltrates in the bilateral lower lungs,,


In [86]:
TARGET_DIR_COVID = "/content/Dataset/Covid"
DIR_NORMAL = "/content/Dataset/Normal"
PARENT_DIR = "/content/Dataset"

In [87]:
if not os.path.exists(DIR_NORMAL):
    os.mkdir(DIR_NORMAL)
    print("Normal directory created")

if not os.path.exists(PARENT_DIR):
    os.mkdir(PARENT_DIR)
    print("Parent directory created")

if not os.path.exists(TARGET_DIR_COVID):
    os.mkdir(TARGET_DIR_COVID)
    print("Covid folder created")



Covid folder created


### Of 178 images, 136 X-ray images belonged to confirmed COVID-19 patients and other 42 images belonged to normal or people with other diseases

### Analysis of X-Ray Images by Medical Experts. A deep analysis was done on the X-ray images by medical specialists. Out of 135 X-ray images of confirmed COVID-19 patients,only a set of 90 X-ray images was selected as a perfect candidate to train the models.

In [77]:
# if os.path.exists(TARGET_DIR_COVID):
#     shutil.rmtree(TARGET_DIR_COVID)
#     print(f"{TARGET_DIR_COVID} folder cleared")

# if os.path.exists(DIR_NORMAL):
#     shutil.rmtree(DIR_NORMAL)
#     print(f"{DIR_NORMAL} folder cleared")

/content/Dataset/Covid folder cleared
/content/Dataset/Normal folder cleared


In [80]:
cnt_covid = 0

max_images_covid = 90

for i, row in df.iterrows():
    filename = row["filename"]
    image_path = os.path.join(IMAGE_PATH, filename)

    if row["finding"] == "Pneumonia/Viral/COVID-19" and row["view"] == "PA" and cnt_covid < max_images_covid:
        image_copy_path = os.path.join(TARGET_DIR_COVID, f"covid_{cnt_covid}.png")
        shutil.copy2(image_path, image_copy_path)
        cnt_covid += 1

        if cnt_covid >= max_images_covid:
            break


print(f"{cnt_covid} COVID-19 images (renamed) copied to {TARGET_DIR_COVID}")

90 COVID-19 images (renamed) copied to /content/Dataset/Covid


In [81]:
cnt_normal = 0
max_images_normal = 42
for i, row in df.iterrows():
    filename = row["filename"]
    image_path = os.path.join(IMAGE_PATH, filename)

    if row["finding"] != "Pneumonia/Viral/COVID-19" and cnt_normal < max_images_normal:
        image_copy_path = os.path.join(DIR_NORMAL, f"normal_{cnt_normal}.png")
        shutil.copy2(image_path, image_copy_path)
        cnt_normal += 1

    if  cnt_normal >= max_images_normal:
        break

print(f"{cnt_normal} normal images copied to {DIR_NORMAL}")

42 normal images copied to /content/Dataset/Normal


### Balancing Dataset Classes: COVID-19 cases, 136 normal chest X-ray images have been used. Extra X-ray images were downloaded from Kaggle

In [82]:
ADDITIONAL_NORMAL_DIR = "/content/chest_xray/train/NORMAL"

cnt_additional_normal = 0

max_additional_normal_images = 136

for filename in os.listdir(ADDITIONAL_NORMAL_DIR):
    if cnt_additional_normal >= max_additional_normal_images:
        break

    image_path = os.path.join(ADDITIONAL_NORMAL_DIR, filename)
    image_copy_path = os.path.join(DIR_NORMAL, f"normal_{cnt_additional_normal + max_images_normal}.png")

    shutil.copy2(image_path, image_copy_path)
    cnt_additional_normal += 1

print(f"{cnt_additional_normal} additional normal images copied to {DIR_NORMAL}")


136 additional normal images copied to /content/Dataset/Normal


## Data Augmentation

In [85]:
# if os.path.exists(TARGET_DIR_COVID):
#     shutil.rmtree(TARGET_DIR_COVID)
#     print(f"{TARGET_DIR_COVID} folder cleared")

/content/Dataset/Covid folder cleared


In [88]:
cnt_covid = 0

max_images_covid = 90

max_horizontal_flip = 90
max_rotation_90 = 90
max_rotation_180 = 90
max_rotation_270 = 90

datagen_horizontal_flip = ImageDataGenerator(horizontal_flip=True)
datagen_rotation_90 = ImageDataGenerator(rotation_range=90)
datagen_rotation_180 = ImageDataGenerator(rotation_range=180)
datagen_rotation_270 = ImageDataGenerator(rotation_range=270)

for i, row in df.iterrows():
    if row["finding"] == "Pneumonia/Viral/COVID-19" and row["view"] == "PA" and cnt_covid < max_images_covid:
        original_image_path = os.path.join(IMAGE_PATH, row["filename"])
        original_image_copy_path = os.path.join(TARGET_DIR_COVID, f"covid_{cnt_covid}.png")
        shutil.copy2(original_image_path, original_image_copy_path)
        cnt_covid += 1

        if cnt_covid >= max_images_covid:
            break

        img = image.load_img(original_image_path)
        x = image.img_to_array(img)
        x = x.reshape((1,) + x.shape)

        i = 0
        for batch in datagen_horizontal_flip.flow(x, batch_size=1, save_to_dir=TARGET_DIR_COVID, save_prefix=f"aug_hflip_{cnt_covid}", save_format="png"):
            i += 1
            if i >= max_horizontal_flip:
                break

        i = 0
        for batch in datagen_rotation_90.flow(x, batch_size=1, save_to_dir=TARGET_DIR_COVID, save_prefix=f"aug_rot90_{cnt_covid}", save_format="png"):
            i += 1
            if i >= max_rotation_90:
                break

        i = 0
        for batch in datagen_rotation_180.flow(x, batch_size=1, save_to_dir=TARGET_DIR_COVID, save_prefix=f"aug_rot180_{cnt_covid}", save_format="png"):
            i += 1
            if i >= max_rotation_180:
                break

        i = 0
        for batch in datagen_rotation_270.flow(x, batch_size=1, save_to_dir=TARGET_DIR_COVID, save_prefix=f"aug_rot270_{cnt_covid}", save_format="png"):
            i += 1
            if i >= max_rotation_270:
                break


KeyboardInterrupt: ignored

In [89]:
TRAIN_PATH = "/content/Dataset"
VALIDATION_PATH = "/content/chest_xray/val/"

### Model Architecture

In [91]:
model = Sequential()

model.add(Conv2D(64,kernel_size=(3,3),activation="relu",input_shape=(150,150,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.2))

model.add(Conv2D(64,(3,3),activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.2))

model.add(Conv2D(128,(3,3),activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.2))

model.add(Conv2D(128,(3,3),activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.2))

# model.add(Conv2D(256,(3,3),activation="relu"))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(BatchNormalization(axis=-1))
# model.add(Dropout(0.2))


# model.add(Conv2D(256,(3,3),activation="relu"))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(BatchNormalization(axis=-1))
# model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512,activation="relu"))
model.add(BatchNormalization(axis=-1))
model.add(Dense(256,activation="relu"))
model.add(BatchNormalization(axis=-1))
model.add(Dense(1,activation="sigmoid"))

model.compile(loss=keras.losses.binary_crossentropy,optimizer = "adam",metrics=["accuracy"])

In [None]:
model.summary()


#### train model

In [93]:
train_datagen = image.ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
)
test_dataset = image.ImageDataGenerator(rescale = 1./255)

AttributeError: ignored

In [95]:
train_generator = train_datagen.flow_from_directory(
    '/content/Dataset/',
    target_size = (150,150),
    batch_size = 32,
    class_mode = 'binary'
)

Found 1152 images belonging to 2 classes.


In [96]:
train_generator.class_indices


{'Covid': 0, 'Normal': 1}

In [98]:
validation_generator = test_dataset.flow_from_directory(
    '/content/chest_xray/val/',
    target_size = (150,150),
    batch_size = 32,
    class_mode = 'binary'
)

Found 16 images belonging to 2 classes.


#### Fit The Model

In [99]:
hist = model.fit_generator(
    train_generator,
    steps_per_epoch = 8,
    epochs = 10,
    validation_data = validation_generator,
    validation_steps = 2
)

  hist = model.fit_generator(


Epoch 1/10


KeyboardInterrupt: ignored