## Import required packages

In [None]:
!pip install imutils

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, metrics
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import callbacks
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import shutil
import cv2
import os

In [None]:
from tensorflow import keras
print(keras.__version__)
print(tf.__version__)

In [None]:
dataset_path = './dataset'

## Build Dataset

In [None]:
%%bash
rm -rf dataset
mkdir -p dataset/train/covid
mkdir -p dataset/test/covid
mkdir -p dataset/train/normal
mkdir -p dataset/test/normal

### Covid xray dataset

In [None]:
covid_dataset_path = '../input/covid-chest-xray'

In [None]:
# construct the path to the metadata CSV file and load it
csvPath = os.path.sep.join([covid_dataset_path, "metadata.csv"])
df = pd.read_csv(csvPath)
#df = df[(df['finding'] == 'COVID-19')]
#df = df[df['view'] == 'PA']
df_train, df_test, _, _ = train_test_split(df, df, test_size=0.20, random_state=42)


In [None]:
df[df['finding'] == 'COVID-19'].view.value_counts()

In [None]:
print(df_train.shape[0])
print(df_test.shape[0])
print(df.shape[0])

In [None]:
covid_count = 0
# loop over the rows of the COVID-19 data frame(train)
for (i, row) in df_train.iterrows():
    # build the path to the input image file
    imagePath = os.path.sep.join([covid_dataset_path, "images", row["filename"]])

    # if the input image file does not exist (there are some errors in
    # the COVID-19 metadeta file), ignore the row
    if not os.path.exists(imagePath):
        continue

    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = row["filename"].split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/train/covid", filename])
    covid_count = covid_count+1

    # copy the image
    shutil.copy2(imagePath, outputPath)
    
    
# loop over the rows of the COVID-19 data frame(test)
for (i, row) in df_test.iterrows():
    
    # build the path to the input image file
    imagePath = os.path.sep.join([covid_dataset_path, "images", row["filename"]])

    # if the input image file does not exist (there are some errors in
    # the COVID-19 metadeta file), ignore the row
    if not os.path.exists(imagePath):
        continue

    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = row["filename"].split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/test/covid", filename])
    covid_count = covid_count+1

    # copy the image
    shutil.copy2(imagePath, outputPath)

In [None]:
covid_count

### Build normal xray dataset

In [None]:
pneumonia_dataset_path ='../input/chest-xray-pneumonia/chest_xray'

In [None]:
basePath = os.path.sep.join([pneumonia_dataset_path, "train", "NORMAL"])
imagePaths = list(paths.list_images(basePath))

from sklearn.model_selection import train_test_split

image_paths_train, image_paths_test, _, _ = train_test_split(imagePaths, imagePaths, test_size=0.20, random_state=42)

#print(image_paths_train)

# randomly sample the image paths
random.seed(42)

# loop over the image paths(training)
for (i, imagePath) in enumerate(image_paths_train):
    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = imagePath.split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/train/normal", filename])

    # copy the image
    shutil.copy2(imagePath, outputPath)
    
# loop over the image paths(test)
for (i, imagePath) in enumerate(image_paths_test):
    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = imagePath.split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/test/normal", filename])

    # copy the image
    shutil.copy2(imagePath, outputPath)

In [None]:
len(imagePaths)

## Data Generators

In [None]:
train_data_dir = dataset_path + "/train"
test_data_dir = dataset_path + "/test"
img_height = 64
img_width = 64
batch_size = 16
train_datagen = ImageDataGenerator(validation_split=0.2) # set validation split

test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    classes = ['normal','covid'],
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    classes = ['normal','covid'],
    subset='validation')


test_generator = test_datagen.flow_from_directory(
    test_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    classes = ['normal','covid'],
    shuffle=False,
    class_mode='binary')



In [None]:
validation_generator.class_indices

## Create Model

In [None]:
def get_model(res, verbose=0):
    base_model = DenseNet121(input_shape=(res, res, 3),include_top=False,weights='imagenet',pooling='avg')
    x = base_model.output
    x = layers.Dense(1024, activation="relu")(x)
    output = layers.Dense(1, activation="sigmoid")(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4),loss='binary_crossentropy',metrics=[metrics.AUC(name='auc')])
    if verbose: print(model.summary())
    return model

## Compute Class Weights

In [None]:
from collections import Counter
def get_class_weights(train_gen):
    counter = Counter(train_gen.classes)                          
    max_val = float(max(counter.values()))       
    class_weights = {class_id : max_val/num_images for class_id, num_images in counter.items()}           
    print(f'class_weights {class_weights}')
    return class_weights

## Train Model

In [None]:
def train_model(train_gen, val_gen, model, class_weights, epochs):
    es_cb = callbacks.EarlyStopping(monitor='val_loss',patience=2,min_delta=0.001)
    lr_cb = callbacks.ReduceLROnPlateau(patience=1,min_delta=.05)

    history = model.fit(
        train_gen
        ,epochs=epochs
        ,validation_data=val_gen
        ,callbacks=[es_cb, lr_cb]
        ,class_weight=class_weights)

## Test Model

In [None]:
import numpy as np
from sklearn.metrics import roc_auc_score
def test_model(model, test_gen):
    y_pred_prob = model.predict(test_gen)
    y_true = test_gen.classes
    auc_score = roc_auc_score(y_true, y_pred_prob)
    print(f'auc_score {auc_score}')

Execute

In [None]:
res = 64
model = get_model(res, verbose=0)

In [None]:
class_weights = get_class_weights(train_generator)

In [None]:
train_model(train_generator, validation_generator, model, class_weights, epochs=1)

In [None]:
test_model(model, test_generator)

## ROC Curve

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
def plot_roc(model, test_gen):
    y_pred_prob = model.predict(test_gen)
    y_true = test_gen.classes

    fp, tp, _ = roc_curve(y_true, y_pred_prob)

    plt.plot(fp, tp, label='ROC', linewidth=3)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.plot(
      [0, 1], [0, 1], 
      linestyle='--', 
      linewidth=2, 
      color='r',
      label='Chance', 
      alpha=.8
    )
    plt.grid(True)
    ax = plt.gca()
    ax.set_aspect('equal')
    plt.legend(loc="lower right")
    

plot_roc(model, test_generator)

In [None]:
y_pred_prob = model.predict(test_generator)
y_true = test_generator.classes
fp, tp, tr = roc_curve(y_true, y_pred_prob)

## Optimal Threshold

In [None]:
diff = tp - fp
thr = tr[np.argmax(diff)]

In [None]:
thr

## Confusion Matrix and Classification Report

In [None]:
from sklearn.metrics import confusion_matrix
tick_labels = ['NORMAL', 'COVID']
predicted_vals = model.predict(test_generator)
cm = confusion_matrix(test_generator.labels, predicted_vals > thr)
ax = sns.heatmap(cm, annot=True, fmt="d")
plt.ylabel('Actual')
plt.xlabel('Predicted')
ax.set_xticklabels(tick_labels)
ax.set_yticklabels(tick_labels)

In [None]:
from sklearn.metrics import classification_report
y_pred = predicted_vals > thr
labels = ['NORMAL', 'COVID']
print(classification_report(y_true, y_pred, target_names=labels))

## Save Model

In [None]:
model.save(dataset_path + "/COVID_Detection")