# Covid Chest X-ray Classification with CNN

In [1]:
# Dataset link https://www.kaggle.com/datasets/sid321axn/covid-cxr-image-dataset-research


# Specifying Dataset Directory
data_dir = '../input/covid-cxr-image-dataset-research/COVID_IEEE'

## Importing Required Libraries 

In [2]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dropout
import cv2
import os
import PIL
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pathlib
from sys import getsizeof
import numpy as np

## Loading Data from Local Directory 

In [3]:
data_dir = pathlib.Path(data_dir)
covid = list(data_dir.glob('covid/*'))
normal = list(data_dir.glob('normal/*'))
virus = list(data_dir.glob('virus/*'))

## Analysing Data

In [4]:
data = { 'Covid':len(covid), 'Normal':len(normal), 'Virus':len(virus)}
Xray = list(data.keys())
Number = list(data.values())
  
fig = plt.figure(figsize = (8, 5))
 
# creating the bar plot
plt.bar(Xray, Number,width = 0.4)
 
plt.xlabel("Cases")
plt.ylabel("Number of x-rays")
plt.title("Number of x-rays of each case")
plt.show()

## Plotting Data

In [5]:
xray_dict = {
    'covid':list(data_dir.glob('covid/*')),
    'normal':list(data_dir.glob('normal/*')),
    'virus': list(data_dir.glob('virus/*'))
}

In [6]:
# Now our image is coverted into a numpy array
img = cv2.imread(str(xray_dict['covid'][0]))
img
plt.imshow(img)
plt.xlabel('Covid x-ray')
print('size of image',img.shape)

In [7]:
xray_label_dict = {
    'covid' : 0,
    'normal' : 1,
    'virus' : 2
} 

In [8]:
x,y = [],[]
for type_n, images in xray_dict.items():
  for image in images:
    img = cv2.imread(str(image))
    resized_img = cv2.resize(img,(256,256))
    x.append(resized_img)
    y.append(xray_label_dict[type_n])
x = np.array(x)
y = np.array(y)
print(x.shape)
print(y.shape)


## Sample Images from Dataset

In [9]:
titles = {
    0 : "Covid",
    1 : "Normal",
    2 : "Virus"
}

import random
# make figure 
plt.figure(figsize=(15,15))
for n in range(10):
  r = random.randint(0, 1800)
  plt.subplot(3,5,n+1) 
  plt.imshow(x[r])
  plt.title(titles[y[r]])
  plt.axis('off')
plt.tight_layout()
plt.show()

## Data Augmentation

Data augmentation in data analysis are techniques used to increase the amount of data by adding slightly modified copies of already existing data or newly created synthetic data from existing data. It acts as a regularizer and helps reduce overfitting when training a machine learning model.

In [10]:
plt.imshow(img)
plt.title("Orignal Image")

In [11]:
img_aug = np.fliplr(img)
plt.imshow(img_aug)
plt.title("Horizontaly Flipped Image")

In [12]:
img = cv2.imread(str(xray_dict['virus'][2]))
img
print(img.shape)

In [13]:
data_augmentation = keras.Sequential([
    #keras.layers.experimental.preprocessing.RandomRotation(0.1),
    keras.layers.experimental.preprocessing.RandomZoom(0.1),
    keras.layers.experimental.preprocessing.RandomFlip("horizontal",input_shape=(256,256,3)),
])

## Splitting of Data into Training and Testing Data

In [14]:
x_train,x_test,y_train, y_test = train_test_split(x,y,random_state=3)

In [15]:
print(len(x_train))
print(len(x_test))

## Normalization of data

In [16]:
x_train = x_train/255
x_test = x_test/255

In [17]:
# Normalized Data
x_train[0]

## Building Model

In [18]:
Model = keras.Sequential([
                        #data Augmentaion
                        data_augmentation,
                        #CNN layer 
                        keras.layers.Conv2D(16,3,padding  ='same',activation='relu',input_shape=(256, 256, 3)),
                        keras.layers.MaxPooling2D((2,2)),
                        Dropout(0.3),
    

                        keras.layers.Conv2D(32,4,padding  ='same',activation='relu'),
                        keras.layers.MaxPooling2D((2,2)),
                        Dropout(0.4),
    
                     

                        keras.layers.Conv2D(64,5,padding  ='same',activation='relu'),
                        keras.layers.MaxPooling2D((2,2)),
                        Dropout(0.3),

                        #Dense Layer
                        # we dont need to specify the input dimension in the middle it will detect automatically
                          keras.layers.Flatten(),
                          keras.layers.Dense(64,activation='relu'),
                          Dropout(0.5),
                          keras.layers.Dense(128,activation='relu'),
                          Dropout(0.5),
                          keras.layers.Dense(256,activation='relu'),
                          keras.layers.Dense(3,activation='softmax')
  ])
Model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [19]:
Model.fit(x_train,y_train,epochs = 100,batch_size = 100)

## Model Summary

In [20]:
Model.summary()

## Model Evauluation

In [21]:
Model.evaluate(x_test,y_test)

In [22]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
y_pred = Model.predict(x_test)
y_pred_classes = [np.argmax(element) for element in y_pred]
print("classification Report : \n", classification_report(y_test,y_pred_classes))


## Saving the Model

In [23]:
Model.save("Covid.h5")

# Reducing the size of Model by Qunatization Aware Training

In [24]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(Model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()

# Save the model.
with open('model_tflite_quant.tflite', 'wb') as f:
  f.write(tflite_model)