In [None]:
!pip install imutils

In [None]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from IPython.display import FileLink
from imutils import paths
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(0)

import random
import shutil
import cv2
import os


In [None]:
dataset_path = './dataset'

## Build Dataset

In [None]:
%%bash
rm -rf dataset
mkdir -p dataset/covid
mkdir -p dataset/normal

### Covid xray dataset

In [None]:
samples = 25

In [None]:
covid_dataset_path = '../input/covid-chest-xray'

In [None]:
# construct the path to the metadata CSV file and load it
csvPath = os.path.sep.join([covid_dataset_path, "metadata.csv"])
df = pd.read_csv(csvPath)

# loop over the rows of the COVID-19 data frame
for (i, row) in df.iterrows():
    # if (1) the current case is not COVID-19 or (2) this is not
    # a 'PA' view, then ignore the row
    if row["finding"] != "COVID-19" or row["view"] != "PA":
        continue

    # build the path to the input image file
    imagePath = os.path.sep.join([covid_dataset_path, "images", row["filename"]])

    # if the input image file does not exist (there are some errors in
    # the COVID-19 metadeta file), ignore the row
    if not os.path.exists(imagePath):
        continue

    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = row["filename"].split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/covid", filename])

    # copy the image
    shutil.copy2(imagePath, outputPath)

### Build normal xray dataset

In [None]:
pneumonia_dataset_path ='../input/chest-xray-pneumonia/chest_xray'

In [None]:
basePath = os.path.sep.join([pneumonia_dataset_path, "train", "NORMAL"])
imagePaths = list(paths.list_images(basePath))

# randomly sample the image paths
random.seed(42)
random.shuffle(imagePaths)
imagePaths = imagePaths[:samples]

# loop over the image paths
for (i, imagePath) in enumerate(imagePaths):
    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = imagePath.split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/normal", filename])

    # copy the image
    shutil.copy2(imagePath, outputPath)

## Plot x-rays

Helper function to plot the images in a grid

In [None]:
def ceildiv(a, b):
    return -(-a // b)

def plots_from_files(imspaths, figsize=(10,5), rows=1, titles=None, maintitle=None):
    """Plot the images in a grid"""
    f = plt.figure(figsize=figsize)
    if maintitle is not None: plt.suptitle(maintitle, fontsize=10)
    for i in range(len(imspaths)):
        sp = f.add_subplot(rows, ceildiv(len(imspaths), rows), i+1)
        sp.axis('Off')
        if titles is not None: sp.set_title(titles[i], fontsize=16)
        img = plt.imread(imspaths[i])
        plt.imshow(img)

In [None]:
normal_images = list(paths.list_images(f"{dataset_path}/normal"))
covid_images = list(paths.list_images(f"{dataset_path}/covid"))

In [None]:
normal_images

In [None]:
covid_images

In [None]:
plots_from_files(normal_images, rows=5, maintitle="Normal X-ray images")

In [None]:
plots_from_files(covid_images, rows=5, maintitle="Covid-19 X-ray images")

## Data preprocessing

In [None]:
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images

print("[INFO] loading images...")
imagePaths = list(paths.list_images(dataset_path))

data = []
labels = []

# loop over the image paths
for imagePath in imagePaths:
    # extract the class label from the filename
    label = imagePath.split(os.path.sep)[-2]
    # load the image, swap color channels, and resize it to be a fixed
    # 224x224 pixels while ignoring aspect ratio
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (224, 224))
    # update the data and labels lists, respectively
    image_np = (image).flatten()
    data.append(image_np)
    labels.append(label)
    
# convert the data and labels to NumPy arrays while scaling the pixel
# intensities to the range [0, 1]
data_train = np.array(data) / 255
labels_train = np.array(labels)

In [None]:
data

In [None]:
len(data)

In [None]:
labels

In [None]:
len(labels)

In [None]:
data_train

In [None]:
labels_train

In [None]:
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels_train_y = lb.fit_transform(labels_train)
labels_train_cat = to_categorical(labels_train_y)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing

(X_train, X_test, y_train, y_test) = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)



In [None]:
X_train

In [None]:
y_train

## Comparison of Calibration of Classifiers

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import calibration_curve



# Create classifiers
lr = LogisticRegression(max_iter=10000)
gnb = GaussianNB()
svc = LinearSVC(C=1.0, max_iter=10000)
rfc = RandomForestClassifier()





# FIT MODELS

> /opt/conda/lib/python3.7/site-packages/sklearn/svm/_base.py:977: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
>   "the number of iterations.", ConvergenceWarning)

In [None]:
LR_Model = lr.fit(X_train, y_train)
GNB_Model = gnb.fit(X_train, y_train)
SVC_Model = svc.fit(X_train, y_train)
RFC_Model = rfc.fit(X_train, y_train)

# Now save the model

In [None]:
import pickle

with open('LR_model.pkl', 'wb') as f1:
    pickle.dump(LR_Model,f1)
    f1.close()

with open('GNB_model.pkl', 'wb') as f2:
    pickle.dump(GNB_Model,f2)
    f2.close()

with open('SVM_model.pkl', 'wb') as f3:
    pickle.dump(SVC_Model,f3)
    f3.close()

with open('RF_model.pkl', 'wb') as f4:
    pickle.dump(RFC_Model,f4)
    f4.close()


    


In [None]:
from IPython.display import FileLink

FileLink(r'LR_model.pkl')


In [None]:
FileLink(r'GNB_model.pkl')


In [None]:
FileLink(r'SVM_model.pkl')


In [None]:
FileLink(r'RF_model.pkl')

# Metrics

In [None]:
from sklearn.metrics import accuracy_score


LR_y_pred = LR_Model.predict(X_test)
GNB_y_pred = GNB_Model.predict(X_test)
SVC_y_pred = SVC_Model.predict(X_test)
RFC_y_pred = RFC_Model.predict(X_test)


# accuracy_score(y_true, y_pred)


In [None]:
LR_acc_sc = accuracy_score(y_test, LR_y_pred)
LR_acc_sc

In [None]:
GNB_acc_sc = accuracy_score(y_test, GNB_y_pred)
GNB_acc_sc

In [None]:
SVC_acc_sc = accuracy_score(y_test, SVC_y_pred)
SVC_acc_sc

In [None]:
RFC_acc_sc = accuracy_score(y_test, RFC_y_pred)
RFC_acc_sc

In [None]:
print('LogisticRegression score: %f' % LR_Model.score(X_test, y_test))
print('Naive Bayes score: %f' % GNB_Model.score(X_test, y_test))
print('SVM score: %f' % SVC_Model.score(X_test, y_test))
print('Random Forest score: %f' % RFC_Model.score(X_test, y_test))

In [None]:
# #############################################################################
# Plot calibration plots

plt.figure(figsize=(10, 10))
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
ax2 = plt.subplot2grid((3, 1), (2, 0))

ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
for clf, name in [(lr, 'Logistic'),
                  (gnb, 'Naive Bayes'),
                  (svc, 'Support Vector Classification'),
                  (rfc, 'Random Forest')]:
    clf.fit(X_train, y_train)
    if hasattr(clf, "predict_proba"):
        prob_pos = clf.predict_proba(X_test)[:, 1]
    else:  # use decision function
        prob_pos = clf.decision_function(X_test)
        prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    fraction_of_positives, mean_predicted_value = \
        calibration_curve(y_test, prob_pos, n_bins=10)

    ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
             label="%s" % (name, ))

    ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
             histtype="step", lw=2)

ax1.set_ylabel("Fraction of positives")
ax1.set_ylim([-0.05, 1.05])
ax1.legend(loc="lower right")
ax1.set_title('Calibration plots  (reliability curve)')

ax2.set_xlabel("Mean predicted value")
ax2.set_ylabel("Count")
ax2.legend(loc="upper center", ncol=2)

plt.tight_layout()
plt.show()


Test on covid image:

In [None]:


test_image = cv2.imread('../input/covid-chest-xray/images/1-s2.0-S1684118220300608-main.pdf-002.jpg')
test_image = cv2.cvtColor(test_image, cv2.IMREAD_GRAYSCALE)
test_image = cv2.resize(test_image, (224, 224))
test_img = test_image.flatten().reshape(1, -1)


LR_pred = LR_Model.predict(test_img)
print(LR_pred)

GNB_pred = GNB_Model.predict(test_img)
print(GNB_pred)

SVC_pred = SVC_Model.predict(test_img)
print(SVC_pred)

RFC_pred = RFC_Model.predict(test_img)
print(RFC_pred)



Test on normal image:

In [None]:


test_image = cv2.imread('../input/chest-xray-pneumonia/chest_xray/test/NORMAL/IM-0003-0001.jpeg')
test_image = cv2.cvtColor(test_image, cv2.IMREAD_GRAYSCALE)
test_image = cv2.resize(test_image, (224, 224))
test_img = test_image.flatten().reshape(1, -1)


LR_pred = LR_Model.predict(test_img)
print(LR_pred)

GNB_pred = GNB_Model.predict(test_img)
print(GNB_pred)

SVC_pred = SVC_Model.predict(test_img)
print(SVC_pred)

RFC_pred = RFC_Model.predict(test_img)
print(RFC_pred)

