# Acknowledgements

- Dataset from https://github.com/ieee8023/covid-chestxray-dataset
- COVID-19 Detector code from https://github.com/JordanMicahBennett/SMART-CT-SCAN_BASED-COVID19_VIRUS_DETECTOR/

# Imports

In [105]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

#to suppress un-necessary warnings
import warnings  
warnings.filterwarnings('ignore')

from sklearn.metrics import precision_score, recall_score, accuracy_score, r2_score, classification_report
import keras
import tensorflow.keras.optimizers as optimizers
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Input, BatchNormalization, Flatten, Dense
import os
import random


In [106]:
img_dir = '../input/chest-xray-pneumonia/chest_xray/chest_xray/'
# for root, dirs, files in os.walk(input_path_b):
#     print(root, dirs, files)

In [107]:
os.listdir(img_dir)

In [108]:
os.listdir(os.path.join(img_dir, 'test'))

In [109]:
os.listdir(os.path.join(img_dir, 'val/NORMAL'))

### Directory structure
- img_dir
    - train
        - PNEUMONIA
        - NORMAL
    - val
        - PNEUMONIA
        - NORMAL
    - test
        - PNEUMONIA
        - NORMAL

## Prep data

In [110]:
# Setting seeds for reproducibility
seed = 232
np.random.seed(seed)
tf.random.set_seed(seed)

# Hyperparameters
img_size = 150
batch_size = 8
epochs = 100

In [111]:
def get_data_generators(img_dir, batch_size, img_size):
    """
    This function will return two generators for train and val
    And the test data 
    """
    """
    For the generator we could use tf.keras.preprocessing.image_dataset_from_directory 
    but this does not provide augmentation on the fly
    """
    train_gen = ImageDataGenerator(rescale=1./255, vertical_flip=True, zoom_range=0.3)
    val_test_gen = ImageDataGenerator(rescale=1./255)
    
    train_generator = val_generator = None
    
    if os.path.isdir(os.path.join(img_dir, 'train')):
        train_generator = train_gen.flow_from_directory(
            os.path.join(img_dir, 'train'),
            target_size=(img_size, img_size),
            batch_size=batch_size, 
            class_mode='binary')
    if os.path.isdir(os.path.join(img_dir, 'val')):
        val_generator = val_test_gen.flow_from_directory(
            os.path.join(img_dir, 'val'),
            target_size=(img_size, img_size),
            batch_size=batch_size, 
            class_mode='binary')
    
    test_imgs = []
    test_targets = []
    dirs = ['NORMAL', 'PNEUMONIA']
    im=0
    for dir_ in dirs:
        for img in os.listdir(os.path.join(img_dir,'test', dir_)):
            im = load_img(os.path.join(img_dir,'test', dir_,img), target_size=(img_size, img_size))
            im = img_to_array(im)
            im = im.astype('float')/255
            test_imgs.append(im)
            if dir_== 'NORMAL':
                test_targets.append(0)
            else:
                test_targets.append(1)
    return train_generator, val_generator, np.array(test_imgs), np.array(test_targets)
        
        

In [112]:
train_generator, val_generator, test_imgs, test_targets = get_data_generators(img_dir, batch_size, img_size)

In [113]:
test_imgs[0].shape

# Model

In [114]:
def build_model(img_size):
    model = Sequential()
    model.add(Conv2D(32, 3, 3, activation='relu', padding='same', input_shape=(img_size, img_size, 3)))
    model.add(BatchNormalization())
    model.add(MaxPool2D((2,2)))
    
    model.add(Conv2D(32, 3, 3, activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPool2D((2,2)))
    
    model.add(Conv2D(64, 3, 3, activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPool2D((2,2)))
    
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

## Lung Pneumonia Detection

In [115]:
optim = Adam()
checkpoint = ModelCheckpoint(filepath='model.h5', save_best_only=True, save_weights_only=False)
lr_reduce = ReduceLROnPlateau(monitor='val_loss',patience=5, mode='min') #In 'min' mode, the learning rate will be reduced when the quantity monitored has stopped decreasing
early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min')# it stops when the quantity monitored has stopped decreasing

model = build_model(img_size)
print(model.summary())
model.compile(optimizer=optim, loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(
    train_generator,
    steps_per_epoch = train_generator.samples//batch_size,
    epochs = epochs,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size, callbacks=[checkpoint, lr_reduce, early_stop], verbose=0)


In [116]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()
for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

In [117]:
preds = []
for i in range(len(test_imgs)):
    preds.append(np.round(model.predict(test_imgs[i].reshape(1, img_size, img_size, 3)))[0][0])
print("Confusion matrix is:\n", confusion_matrix(np.array(preds), test_targets))
print(classification_report(np.array(preds), test_targets))

## COVID-19 Lung Pneumonia Detection

In [118]:
img_dir_covid = '../input/covid19-xray-dataset-train-test-sets/xray_dataset_covid19/'

In [119]:
covid_train_generator, covid_val_generator, covid_test_imgs, covid_test_targets = get_data_generators(img_dir_covid, batch_size, img_size)

In [120]:
os.listdir(img_dir_covid+'test')

In [121]:
test_sets = [test_imgs, covid_test_imgs]
fig, axs = plt.subplots(2, 5, figsize=(18,4))
fig.suptitle("First row: chest-xray-pneumonia\n Second row: covid19-xray-dataset")
for i in range(2):
    for j in range(5):
        ind = random.randint(0, len(test_sets[i]))
        axs[i][j].imshow(test_sets[i][ind])


### First: using the pretrained model for this new data

In [122]:
covidmodel = build_model(img_size)
covidmodel.compile(optimizer=optim, loss='binary_crossentropy', metrics=['accuracy'])
covidmodel.load_weights('model.h5')

covid_preds = []
for i in range(len(covid_test_imgs)):
    covid_preds.append(np.round(model.predict(covid_test_imgs[i].reshape(1, img_size, img_size, 3)))[0][0])
print(confusion_matrix(np.array(covid_preds), covid_test_targets))
print(classification_report(np.array(covid_preds), covid_test_targets))

### Second: finetuning the last layers of the model for this data

In [123]:
checkpoint = ModelCheckpoint(filepath='covid_model.h5', save_best_only=True, save_weights_only=False)
lr_reduce = ReduceLROnPlateau(monitor='val_loss',patience=5, mode='min') #In 'min' mode, the learning rate will be reduced when the quantity monitored has stopped decreasing
early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min')# it stops when the quantity monitored has stopped decreasing

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01, # 1e-2
    decay_steps=10000,
    decay_rate=0.9)
optim = SGD(learning_rate=lr_schedule)

last_few_layers = 5
for layer in covidmodel.layers[:-last_few_layers]:
    layer.trainable = False
# for l in covidmodel.layers:
#     print(l.name, l.trainable)
covidmodel.compile(optimizer=optim, loss='binary_crossentropy', metrics=['accuracy'])

history_covid = covidmodel.fit(
    covid_train_generator,
    steps_per_epoch = covid_train_generator.samples//batch_size,
    epochs = epochs,
    callbacks=[checkpoint, lr_reduce, early_stop])

In [124]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3))
ax = ax.ravel()
for i, met in enumerate(['accuracy', 'loss']):
    ax[i].plot(history_covid.history[met])
    #ax[i].plot(history_covid.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    #ax[i].legend(['train', 'val'])

In [125]:
covid_preds = []
for i in range(len(covid_test_imgs)):
    covid_preds.append(np.round(covidmodel.predict(covid_test_imgs[i].reshape(1, img_size, img_size, 3)))[0][0])
print(confusion_matrix(np.array(covid_preds), covid_test_targets))
print(classification_report(np.array(covid_preds), covid_test_targets))

In [126]:

# lr_schedule = keras.optimizers.schedules.ExponentialDecay(
#     initial_learning_rate=1e-2,
#     decay_steps=10000,
#     decay_rate=0.9)
# optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)

# tf.keras.optimizers.schedules.CosineDecay(
#     initial_learning_rate, decay_steps, alpha=0.0, name=None
# )

# References

[1] Joseph Paul Cohen and Paul Morrison and Lan Dao. COVID-19 image data collection, arXiv, 2020. https://github.com/ieee8023/covid-chestxray-dataset

[2] https://github.com/JordanMicahBennett/SMART-CT-SCAN_BASED-COVID19_VIRUS_DETECTOR/