# **Using Tensorflow for detecting Covid-19 Infected Lungs from Normal Lungs with Chest X-Ray**

Dataset - Radioraphy with 3616 covid xray images and over 10,000 normal lungs images. We picked 3616 normal images to eliminate bias

Preprocessing:

Images were augumented with RandomRotation and Rescaling

The CNN architecture was as follows: preprocessing layer, conv layer, maxpooling, conv, maxpooling, flatten, dense, dropout, dense with sigmoid activation

Trained on 30 epochs. After 17th epoch, model started to overfit

Accuracy on validation set was 81.13%

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

In [None]:
tf.random.set_seed(12)

 ## Importing kaggle Data Train_Val Set

### Use /tmp to create temporary folder

In [None]:
PATH_TO_METADATA = "../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal.metadata.xlsx"
df = pd.read_excel(PATH_TO_METADATA)
df.head()

In [None]:
!ls /tmp

In [None]:
!mkdir /tmp/Xray_train_data

In [None]:
!cp -R "../input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID/images" "/tmp/Xray_train_data/"

In [None]:
!mv "/tmp/Xray_train_data/images" "/tmp/Xray_train_data/COVID"

In [None]:
!ls -1 "/tmp/Xray_train_data/COVID"| wc -l 

In [None]:
 !mkdir "/tmp/Xray_train_data/NORMAL"

In [None]:
import os
import shutil
cnt = 0
for (i, row) in df.iterrows():
    if (cnt < 3616):
        filename = row["FILE NAME"].lower().capitalize() + "." + row["FORMAT"].lower()
        image_path = os.path.join("../input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal/images", filename)
        image_copy_path = os.path.join("/tmp/Xray_train_data/NORMAL", filename)
        shutil.copy2(image_path, image_copy_path)
        cnt += 1
        
print(cnt)

In [None]:
!ls "/tmp/Xray_train_data"

In [None]:
!ls -1 "/tmp/Xray_train_data/COVID"| wc -l 

In [None]:
!ls -1 "/tmp/Xray_train_data/NORMAL"| wc -l 

### To deal with imbalanced dataset, I just took a lazy way out.

## **Starting Actual work**

In [None]:
train_data_dir= "/tmp/Xray_train_data"

In [None]:
IMAGE_SIZE = (256, 256)
IMAGE_SHAPE = IMAGE_SIZE + (3,)

In [None]:
import keras
# use rotation and rescaling
data_augmentation = keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomRotation(
        factor=(-0.2, 0.3),
        fill_mode='reflect',
        interpolation='bilinear',
        seed=None
    ),
    tf.keras.layers.experimental.preprocessing.Rescaling(
        scale=1/.255, 
        offset=0.0
    ),
])

In [None]:
# Used Sequential
# Dropout changed from 0.1 to 0.2
model=tf.keras.Sequential([
    data_augmentation,
    tf.keras.layers.Conv2D(8, (3,3), activation='relu', input_shape=IMAGE_SHAPE),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid"),
    tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding="valid"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32),
    tf.keras.layers.Dropout(.2, input_shape=(32,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
# Defining optimizer
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(lr=0.0001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

In [None]:
# Directing Images to train folder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_height, img_width= IMAGE_SIZE
batch_size=16
train_datagen = ImageDataGenerator(validation_split=0.3) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training') # set as training data
# Splitting images for validation set
validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation') # set as validation data


In [None]:
# Training the model
es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 2, patience = 4)

trainer=model.fit(train_generator,validation_data=validation_generator,epochs=30, callbacks = [es])

In [None]:
# After 17 epochs model starts to overfit, so it stopped

In [None]:
# Plotting train_loss vs val_loss
plt.figure(figsize=(10,5))
plt.plot(trainer.history["loss"],label="loss")
plt.plot(trainer.history["val_loss"],label="val_loss")
plt.legend()

In [None]:
# Plotting train_accuracy vs Val_accuracy
plt.figure(figsize=(10,5))
plt.plot(trainer.history["accuracy"],label="accuracy")
plt.plot(trainer.history["val_accuracy"],label="val_accuracy")
plt.legend(loc='upper left')

In [None]:
probabilities = model.predict_generator(generator=validation_generator)

In [None]:
probabilities

In [None]:
y_true = validation_generator.classes

In [None]:
y_pred = probabilities > 0.5

In [None]:
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
cf_matrix = confusion_matrix(y_true, y_pred)
print(cf_matrix)

In [None]:
import seaborn as sns
sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True, 
            fmt='.2%', cmap='Blues')
# means that there are 16.85% false positives and 24.72% false negatives

#### Version 3 doesnot have any significance update. Just updated some folder issue.
#### Thanks to @sarques

## If helped, do give an upvote. It means a lot.