In [None]:
pip install --upgrade pip

In [None]:
pip install imutils

In [None]:
import os
import cv2
import shutil
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense,Conv2D,MaxPool2D,Flatten,Dropout,Input,AveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from imutils import paths

In [None]:
dataset_path = './dataset'

In [None]:
%%bash
rm -rf dataset
mkdir -p dataset/covid
mkdir -p dataset/normal

In [None]:
covid_dataset_path = '../input/covid-chest-xray'
csvPath = os.path.sep.join([covid_dataset_path, "metadata.csv"])
df = pd.read_csv(csvPath)

for (i, row) in df.iterrows():
    if row["finding"] != "COVID-19": 
        continue

    imagePath = os.path.sep.join([covid_dataset_path, "images", row["filename"]])

    #if the path does not exist we just continue
    if not os.path.exists(imagePath): 
        continue
    
    #if the path exists we then change the path to the dataset under covid label 
    filename = row["filename"].split(os.path.sep)[-1]
    oPath = os.path.sep.join([f"{dataset_path}/covid", filename])
    shutil.copy2(imagePath, oPath)

In [None]:
pneumonia_dataset_path ='../input/chest-xray-pneumonia/chest_xray'

basePath = os.path.sep.join([pneumonia_dataset_path, "train", "NORMAL"])
imagePaths = list(paths.list_images(basePath))

for (i, imagePath) in enumerate(imagePaths):
    filename = imagePath.split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/normal", filename])
    shutil.copy2(imagePath, outputPath)

In [None]:
normal_images = list(paths.list_images(f"{dataset_path}/normal"))
covid_images = list(paths.list_images(f"{dataset_path}/covid"))

In [None]:
print(len(normal_images),len(covid_images))

In [None]:
#plots the image that has the index 19 from normal-xray images
img = plt.imread(normal_images[19])
plt.imshow(img)

In [None]:
#plots the image that has the index 19 from covid-xray images
img = plt.imread(covid_images[19])
plt.imshow(img)

In [None]:
# Convert Image into 2-D matrix and store them 
imagePaths = list(paths.list_images(dataset_path))
data = []
labels = []
for imagePath in imagePaths:
    label = imagePath.split(os.path.sep)[-2]
    # read the image if BGR x*y*3 else x*y*2 here x represents height and width of the image
    image = cv2.imread(imagePath)
    #convertit BGR into RGB for VGG-16
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    #resize each image as vgg-16 takes in image of size 224,224,3 or 2
    image = cv2.resize(image, (150, 150))
    data.append(image)
    labels.append(label)

In [None]:
# Scaling
data = np.array(data) / 255.0
labels = np.array(labels)

In [None]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels)
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

# train is now 80% of the entire data set
# the _junk suffix means that we drop that variable completely
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=1 - train_ratio,stratify=labels)

# test is now 10% of the initial data set
# validation is now 10% of the initial data set
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio)) 

# (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)

trainAug = ImageDataGenerator(rotation_range=15, horizontal_flip = True ,vertical_flip = True ,fill_mode="nearest")

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import AveragePooling2D,Input,Dense,Flatten,Dropout
from tensorflow.keras.applications.inception_v3 import InceptionV3
base = InceptionV3(input_shape = (150, 150, 3), include_top = False, weights = 'imagenet')
# base = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))

# we add this hmodel because the output of the basemodel(vgg-16) is not the output we want. so we add few more 
# layers to get the desired output
hModel = base.output
hModel = AveragePooling2D(pool_size=(3, 3))(hModel)
hModel = Flatten(name="flatten")(hModel)
hModel = Dense(64, activation="relu")(hModel)
hModel = Dropout(0.5)(hModel)
hModel = Dense(2, activation="softmax")(hModel)

model = Model(inputs=base.input, outputs=hModel)

#weights of the pretrained model are not changed
for layer in base.layers:
    layer.trainable = False

In [None]:
from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(lr=0.001),loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
EPOCHS = 3
BS = 1
progess = model.fit_generator(
    trainAug.flow(x_train, y_train, batch_size=BS),steps_per_epoch=len(x_train),
    validation_data=(x_val, y_val),validation_steps=len(x_val),epochs=EPOCHS)

In [None]:
from matplotlib import pyplot
pyplot.plot(progess.history['accuracy'], label='train')
pyplot.plot(progess.history['val_accuracy'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
from matplotlib import pyplot
pyplot.plot(progess.history['loss'], label='train')
pyplot.plot(progess.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
predIdxs = model.predict(x_test, batch_size=BS)
predIdxs = np.argmax(predIdxs, axis=1)
cm = confusion_matrix(y_test.argmax(axis=1), predIdxs)
total = sum(sum(cm))
acc = (cm[0, 0] + cm[1, 1]) / total
print(cm)
print("acc: {:.4f}".format(acc))

In [None]:
model.predict(x_test[0:1]).argmax(axis=1)

In [None]:
y_pred = model.predict(x_test)
from sklearn.metrics import accuracy_score
# print(y_pred.argmax(axis=1))
accuracy_score(y_pred.argmax(axis=1), y_test.argmax(axis=1))
