# Covid-19 Detection

## Pablo Lázaro Herrasti and Rubén Barco Terrones

### Import

In [100]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing import image
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import applications
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model 
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,Conv2D,MaxPooling2D,BatchNormalization
from tensorflow.keras import backend as k 
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

#### Directories

In [101]:
dir_covid = 'D:/Covid-19 Data/COVID-19 Radiography Database/'
dir_covid_images_train = dir_covid + 'COVID-19_TRAIN/'
#dir_covid_images_train = dir_covid + 'COVID-19_TRAIN_AUG/'
dir_covid_images_test = dir_covid + 'COVID-19_TEST/'
dir_covid_images_val = dir_covid + 'COVID-19_VAL/'
dir_normal_images = dir_covid + 'NORMAL/'
dir_pneumonia_images = dir_covid + 'Viral Pneumonia/'
all_dir_images = [dir_normal_images, dir_pneumonia_images]
dir_covid_metadata = dir_covid + 'COVID-19.metadata.xlsx'
dir_normal_metadata = dir_covid + 'NORMAL.metadata.xlsx'
dir_pneumonia_metadata = dir_covid + 'Viral Pneumonia.matadata.xlsx'

#### Reading and preprocessing data

In [102]:
# metadata_covid = pd.read_excel(dir_covid_metadata)
# metadata_covid['label'] = 0
metadata_normal = pd.read_excel(dir_normal_metadata)
metadata_normal['label'] = 1
metadata_pneumonia = pd.read_excel(dir_pneumonia_metadata)
metadata_pneumonia['label'] = 2
metadata_all = {dir_normal_images:metadata_normal, dir_pneumonia_images:metadata_pneumonia}

#### Data augmentation

In [98]:
import Augmentor
p = Augmentor.Pipeline(dir_covid_images_train)

Initialised with 153 image(s) found.
Output directory set to D:/Covid-19 Data/COVID-19 Radiography Database/COVID-19_TRAIN/output.

In [99]:
p.rotate(probability=0.7, max_left_rotation=15, max_right_rotation=15)
p.flip_left_right(probability=0.7)
p.sample(len(os.listdir(dir_covid_images_train))*4)
p.process()

Processing <PIL.Image.Image image mode=RGB size=1024x1024 at 0x1EE1203D0F0>: 100%|██████████| 616/616 [00:48<00:00, 12.78 Samples/s]                
Processing <PIL.Image.Image image mode=RGB size=1024x1024 at 0x1EE1200F3C8>: 100%|██████████| 153/153 [00:11<00:00, 12.80 Samples/s]                


In [103]:
#Reading Image data and converting it into pixels and separating class labels
Data=[]
Label=[]

for dir_images in all_dir_images:
    files = os.listdir(dir_images)
    for index, row in metadata_all[dir_images].iterrows():
        Label.append(row['label'])
        filename=os.path.join(dir_images, files[index])
        im=image.load_img(filename,target_size=(224, 224))
        im=np.reshape(im,(224,224,3))
        im=im.astype('float32') / 255
        Data.append(im)

In [104]:
#Train Test Split
X_train, X_1, y_train, y_1 = train_test_split(np.array(Data), np.array(Label), test_size=0.3, random_state=42,stratify=Label)

#Train Test Split
X_cv, X_test, y_cv, y_test = train_test_split(X_1, y_1, test_size=0.5, random_state=42,stratify=y_1)

In [106]:
# For data augmentation COVID with label 0

Data=[]
Label=[]

files = os.listdir(dir_covid_images_train)
for file in files:
    Label.append(0)
    filename=os.path.join(dir_covid_images_train, file)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data.append(im)

y_train = np.array(list(y_train) + Label)
X_train = np.array(list(X_train) + Data)

Data=[]
Label=[]
files = os.listdir(dir_covid_images_val)
for file in files:
    Label.append(0)
    filename=os.path.join(dir_covid_images_val, file)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data.append(im)

y_cv = np.array(list(y_cv) + Label)
X_cv = np.array(list(X_cv) + Data)

Data=[]
Label=[]
files = os.listdir(dir_covid_images_test)
for file in files:
    Label.append(0)
    filename=os.path.join(dir_covid_images_test, file)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data.append(im)

y_test = np.array(list(y_test) + Label)
X_test = np.array(list(X_test) + Data)

In [107]:
img_width=224
img_height=224

from keras import backend as K

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
    X_train=X_train.reshape(X_train.shape[0],3,img_width,img_height)
    X_cv=X_cv.reshape(X_cv.shape[0],3,img_width,img_height)
    X_test=X_test.reshape(X_test.shape[0],3,img_width,img_height)
    
else:
    input_shape = (img_width, img_height, 3)
    X_train=X_train.reshape(X_train.shape[0],img_width,img_height,3)
    X_cv=X_cv.reshape(X_cv.shape[0],img_width,img_height,3)
    X_test=X_test.reshape(X_test.shape[0],img_width,img_height,3)
    
del Data

In [110]:
len(X_cv)

436

#### Architecture for binary classification

In [87]:
model=Sequential()
model.add(Conv2D(32, 3, input_shape=input_shape, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Conv2D(64, 3, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Conv2D(128, 3, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 224, 224, 32)      896       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 112, 112, 64)      18496     
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 56, 56, 128)       73856     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 28, 28, 128)       0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 100352)            0         
__________

#### Training model

In [88]:
n_epochs = 3
batch_size = 32
model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=n_epochs, validation_data=(X_cv,y_cv), shuffle=True)

Train on 2533 samples, validate on 543 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3




<tensorflow.python.keras.callbacks.History at 0x1ee11c52fd0>

In [93]:
model.save('C:/Users/polaz/Documents/Covid 19 Detection/Models/model_covid_2.h5')

### General accuracy

##### Multiclass prediction

In [89]:
train_acc = accuracy_score(model.predict_classes(X_train), y_train)
valid_acc = accuracy_score(model.predict_classes(X_cv), y_cv)
test_acc = accuracy_score(model.predict_classes(X_test), y_test)
print("The final train accuracy is ",train_acc*100,"%")
print("The final validation accuracy is ",valid_acc*100,"%")
print("The final test accuracy is ",test_acc*100,"%")

The final train accuracy is  97.35491512041058 %
The final validation accuracy is  96.13259668508287 %
The final test accuracy is  96.35416666666666 %


##### Binary prediction

In [None]:
train_acc = accuracy_score((model.predict(X_train)>0.5).astype(int), y_train)
valid_acc = accuracy_score((model.predict(X_cv)>0.5).astype(int), y_cv)
test_acc = accuracy_score((model.predict(X_test)>0.5).astype(int), y_test)
print("The final train accuracy is ",train_acc*100,"%")
print("The final validation accuracy is ",valid_acc*100,"%")
print("The final test accuracy is ",test_acc*100,"%")

#### Covid accuracy

In [90]:
X_covid = X_test[y_test == 0]
y_covid = y_test[y_test == 0]
test_acc = accuracy_score(model.predict_classes(X_covid), y_covid)
print("The final test accuracy for COVID-19 is ",test_acc*100,"%")

The final test accuracy for COVID-19 is  95.37572254335261 %


In [95]:
len(y_test)

576

In [91]:
X_normal = X_test[y_test == 1]
y_normal = y_test[y_test == 1]
test_acc = accuracy_score(model.predict_classes(X_normal), y_normal)
print("The final test accuracy for NORMAL is ",test_acc*100,"%")

The final test accuracy for NORMAL is  99.00497512437812 %


In [92]:
X_pneumonia = X_test[y_test == 2]
y_pneumonia = y_test[y_test == 2]
test_acc = accuracy_score(model.predict_classes(X_pneumonia), y_pneumonia)
print("The final test accuracy for PNEUMONIA is ",test_acc*100,"%")

The final test accuracy for PNEUMONIA is  94.55445544554455 %


#### Reading other database to test

In [79]:
dir_data_extra = dir_covid + '/Extra COVID-19'

In [87]:
#Reading Image data and converting it into pixels and separating class labels
Data_ext=[]
Label_ext=[]
images = os.listdir(dir_data_extra)
for imag in images:
    Label_ext.append(0)
    filename=os.path.join(dir_data_extra, imag)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data_ext.append(im)
    
Data_ext = np.array(Data_ext)
Label_ext = np.array(Label_ext)

In [88]:
img_width=224
img_height=224

from keras import backend as K

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
    X_extra=Data_ext.reshape(Data_ext.shape[0],3,img_width,img_height)
    
else:
    input_shape = (img_width, img_height, 3)
    X_extra=Data_ext.reshape(Data_ext.shape[0],img_width,img_height,3)

y_extra = Label_ext
del Data_ext

In [89]:
test_acc = accuracy_score(model.predict_classes(X_extra), y_extra)
print("The final test accuracy for COVID-19 is ",test_acc*100,"%")

The final test accuracy for COVID-19 is  90.47619047619048 %
