# Covid-19 Detection

## Pablo Lázaro Herrasti and Rubén Barco Terrones

### Import

In [1]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing import image
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import applications
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model 
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D,Conv2D,MaxPooling2D,BatchNormalization
from tensorflow.keras import backend as k 
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

#### Directories

In [35]:
dir_covid = 'D:/Covid-19 Data/COVID-19 Radiography Database/'
dir_covid_images = dir_covid + 'COVID-19/'
dir_covidaug_images = dir_covid + 'COVID_AUG/'
dir_normal_images = dir_covid + 'NORMAL/'
dir_pneumonia_images = dir_covid + 'Viral Pneumonia/'
all_dir_images = [dir_normal_images, dir_covid_images, dir_pneumonia_images]
dir_covid_metadata = dir_covid + 'COVID-19.metadata.xlsx'
dir_normal_metadata = dir_covid + 'NORMAL.metadata.xlsx'
dir_pneumonia_metadata = dir_covid + 'Viral Pneumonia.matadata.xlsx'

#### Data augmentation

In [32]:
# import Augmentor
# p = Augmentor.Pipeline(dir_covid_images)

Initialised with 219 image(s) found.
Output directory set to D:/Covid-19 Data/COVID-19 Radiography Database/COVID-19/output.

In [33]:
# p.rotate(probability=0.7, max_left_rotation=15, max_right_rotation=15)
# p.sample(len(metadata_covid)*2)
# p.process()

Processing <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1024x1024 at 0x1689540BEF0>: 100%|██████████| 438/438 [00:30<00:00, 14.29 Samples/s]
Processing <PIL.Image.Image image mode=RGB size=1024x1024 at 0x168955163C8>: 100%|██████████| 219/219 [00:15<00:00, 14.45 Samples/s]                


#### Reading and preprocessing data

In [36]:
metadata_covid = pd.read_excel(dir_covid_metadata)
metadata_covid['label'] = 0
metadata_normal = pd.read_excel(dir_normal_metadata)
metadata_normal['label'] = 1
metadata_pneumonia = pd.read_excel(dir_pneumonia_metadata)
metadata_pneumonia['label'] = 2
metadata_all = {dir_covid_images:metadata_covid, dir_normal_images:metadata_normal, dir_pneumonia_images:metadata_pneumonia}

In [39]:
#Reading Image data and converting it into pixels and separating class labels
Data=[]
Label=[]
directory='D:/Data_Master/Deep Learning/ChestXRay2017/All'

for dir_images in all_dir_images:
    files = os.listdir(dir_images)
    for index, row in metadata_all[dir_images].iterrows():
        Label.append(row['label'])
        filename=os.path.join(dir_images, files[index])
        im=image.load_img(filename,target_size=(224, 224))
        im=np.reshape(im,(224,224,3))
        im=im.astype('float32') / 255
        Data.append(im)  

In [40]:
#Train Test Split
X_train, X_1, y_train, y_1 = train_test_split(np.array(Data), np.array(Label), test_size=0.3, random_state=42,stratify=Label)

#Train Test Split
X_cv, X_test, y_cv, y_test = train_test_split(X_1, y_1, test_size=0.5, random_state=42,stratify=y_1)

In [49]:
# For data augmentation COVID with label 0
Data=[]
Label=[]
files = os.listdir(dir_covidaug_images)
for file in files:
    Label.append(0)
    filename=os.path.join(dir_covidaug_images, file)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data.append(im)

y_train = np.array(list(y_train) + Label)
X_train = np.array(list(X_train) + Data)

In [51]:
img_width=224
img_height=224

from keras import backend as K

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
    X_train=X_train.reshape(X_train.shape[0],3,img_width,img_height)
    X_cv=X_cv.reshape(X_cv.shape[0],3,img_width,img_height)
    X_test=X_test.reshape(X_test.shape[0],3,img_width,img_height)
    
else:
    input_shape = (img_width, img_height, 3)
    X_train=X_train.reshape(X_train.shape[0],img_width,img_height,3)
    X_cv=X_cv.reshape(X_cv.shape[0],img_width,img_height,3)
    X_test=X_test.reshape(X_test.shape[0],img_width,img_height,3)
    
del Data

#### Architecture for binary classification

In [None]:
model=Sequential()
model.add(Conv2D(32, 3, input_shape=input_shape, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Conv2D(64, 3, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Conv2D(128, 3, activation='relu', padding='same'))
model.add(MaxPooling2D(2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
model.summary()

#### Training model

In [63]:
n_epochs = 3
batch_size = 32
model.fit(x=X_train, y=y_train, batch_size=batch_size, epochs=n_epochs, validation_data=(X_cv,y_cv), shuffle=True)

Train on 2690 samples, validate on 436 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3




<tensorflow.python.keras.callbacks.History at 0x1689433e278>

In [57]:
model.save('C:/Users/polaz/Documents/Covid 19 Detection/Models/model_covid_2.h5')

### General accuracy

##### Multiclass prediction

In [65]:
train_acc = accuracy_score(model.predict_classes(X_train), y_train)
valid_acc = accuracy_score(model.predict_classes(X_cv), y_cv)
test_acc = accuracy_score(model.predict_classes(X_test), y_test)
print("The final train accuracy is ",train_acc*100,"%")
print("The final validation accuracy is ",valid_acc*100,"%")
print("The final test accuracy is ",test_acc*100,"%")

KeyboardInterrupt: 

##### Binary prediction

In [None]:
train_acc = accuracy_score((model.predict(X_train)>0.5).astype(int), y_train)
valid_acc = accuracy_score((model.predict(X_cv)>0.5).astype(int), y_cv)
test_acc = accuracy_score((model.predict(X_test)>0.5).astype(int), y_test)
print("The final train accuracy is ",train_acc*100,"%")
print("The final validation accuracy is ",valid_acc*100,"%")
print("The final test accuracy is ",test_acc*100,"%")

#### Covid accuracy

In [59]:
X_covid = X_test[y_test == 0]
y_covid = y_test[y_test == 0]
test_acc = accuracy_score(model.predict_classes(X_covid), y_covid)
print("The final test accuracy for COVID-19 is ",test_acc*100,"%")

The final test accuracy for COVID-19 is  93.93939393939394 %


In [60]:
X_normal = X_test[y_test == 1]
y_normal = y_test[y_test == 1]
test_acc = accuracy_score(model.predict_classes(X_normal), y_normal)
print("The final test accuracy for NORMAL is ",test_acc*100,"%")

The final test accuracy for NORMAL is  95.52238805970148 %


In [61]:
X_pneumonia = X_test[y_test == 2]
y_pneumonia = y_test[y_test == 2]
test_acc = accuracy_score(model.predict_classes(X_pneumonia), y_pneumonia)
print("The final test accuracy for PNEUMONIA is ",test_acc*100,"%")

The final test accuracy for PNEUMONIA is  95.54455445544554 %


#### Reading other database to test

In [79]:
dir_data_extra = dir_covid + '/Extra COVID-19'

In [87]:
#Reading Image data and converting it into pixels and separating class labels
Data_ext=[]
Label_ext=[]
images = os.listdir(dir_data_extra)
for imag in images:
    Label_ext.append(0)
    filename=os.path.join(dir_data_extra, imag)
    im=image.load_img(filename,target_size=(224, 224))
    im=np.reshape(im,(224,224,3))
    im=im.astype('float32') / 255
    Data_ext.append(im)
    
Data_ext = np.array(Data_ext)
Label_ext = np.array(Label_ext)

In [88]:
img_width=224
img_height=224

from keras import backend as K

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
    X_extra=Data_ext.reshape(Data_ext.shape[0],3,img_width,img_height)
    
else:
    input_shape = (img_width, img_height, 3)
    X_extra=Data_ext.reshape(Data_ext.shape[0],img_width,img_height,3)

y_extra = Label_ext
del Data_ext

In [89]:
test_acc = accuracy_score(model.predict_classes(X_extra), y_extra)
print("The final test accuracy for COVID-19 is ",test_acc*100,"%")

The final test accuracy for COVID-19 is  90.47619047619048 %
