In [None]:
%load_ext nb_black

# Importing libs and setting paths

In [None]:
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import (
    Activation,
    Dropout,
    BatchNormalization,
    Flatten,
    Dense,
    AvgPool2D,
    MaxPool2D,
)
from keras.models import Sequential, Model
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.optimizers import Adam, SGD, RMSprop

import tensorflow as tf

import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
DATASET_DIR = "data/"

In [None]:
os.listdir(DATASET_DIR)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import math

def plot_dists(df, labels):
    row = 1
    col = 1
    num_graphs = len(labels)
    rows = math.ceil(num_graphs / 2)
    fig = make_subplots(rows=rows, cols=2, subplot_titles=labels)

    index = []
    for row in range(1, rows + 1):
        for col in range(1, 3):
            index.append({"row": row, "col": col})

    graphs = []
    pos_g = 0
    for label in labels:
        local_data = df[label].value_counts()
        x = list(local_data.index)
        y = list(local_data)
        fig.add_trace(
            go.Bar(x=x, y=y, text=y, textposition="auto",),
            row=index[pos_g]["row"],
            col=index[pos_g]["col"],
        )
        pos_g = pos_g + 1
    
    fig.update_layout(
        autosize=False,
        width=800,
        height=200*rows,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=4
        ),
#         paper_bgcolor="LightSteelBlue",
    )

    fig.show()


# The Data

## Summary
To explain the labels and counts of the dataset (metadata)

In [None]:
summary = pd.read_csv(DATASET_DIR + "Chest_xray_Corona_dataset_Summary.csv").drop(
    "Unnamed: 0", axis=1
)
summary.fillna("Not Applicable", inplace=True)
summary

## The information about the dataset

In [None]:
dataset = pd.read_csv(DATASET_DIR + "Chest_xray_Corona_Metadata.csv").drop(
    "Unnamed: 0", axis=1
)
dataset.fillna("Not Applicable", inplace=True)
dataset.Dataset_type.value_counts()
dataset.head()

In [None]:
plot_dists(
    dataset,
    ["Label", "Dataset_type", "Label_2_Virus_category", "Label_1_Virus_category"],
)

# Moving to another directory 
## Normal

In [None]:
import shutil
import os


## Normal
fill = (dataset.Label == "Normal") & (dataset.Dataset_type == "TRAIN")
target_path = DATASET_DIR + "dataset/normal/"
origin_path = DATASET_DIR + "train/"
os.makedirs(target_path, exist_ok=True)
for x in dataset[fill].X_ray_image_name.values:
    shutil.move(origin_path + x, target_path + x)
    
## Normal
fill = (dataset.Label == "Normal") & (dataset.Dataset_type == "TEST")
target_path = DATASET_DIR + "dataset/normal/"
origin_path = DATASET_DIR + "test/"
for x in dataset[fill].X_ray_image_name.values:
    shutil.move(origin_path + x, target_path + x)

## Pnemonia

In [None]:
fill = (dataset.Label == "Pnemonia") & (dataset.Dataset_type == "TRAIN")
target_path = DATASET_DIR + "dataset/pnemonia/"
origin_path = DATASET_DIR + "train/"
os.makedirs(target_path, exist_ok=True)
for x in dataset[fill].X_ray_image_name.values:
    shutil.move(origin_path + x, target_path + x)
    
fill = (dataset.Label == "Pnemonia") & (dataset.Dataset_type == "TEST")
target_path = DATASET_DIR + "dataset/pnemonia/"
origin_path = DATASET_DIR + "test/"
os.makedirs(target_path, exist_ok=True)
for x in dataset[fill].X_ray_image_name.values:
    shutil.move(origin_path + x, target_path + x)

## Example

In [None]:
sample = 0
plt.title(images["TRAIN"]['Y'][sample])
_ = plt.imshow(images["TRAIN"]['X'][sample], cmap="gray")

In [None]:
sample = 1344
plt.title(images["TRAIN"]["Y"][sample])
_ = plt.imshow(images["TRAIN"]["X"][sample], cmap="gray")

In [None]:
IMG_W = 150
IMG_H = 150
CHANNELS = 3

INPUT_SHAPE = (IMG_W, IMG_H, CHANNELS)
NB_CLASSES = 2
EPOCHS = 48
BATCH_SIZE = 6

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=INPUT_SHAPE))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))


model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(Conv2D(250, (3, 3)))
model.add(Activation("relu"))

model.add(Conv2D(128, (3, 3)))
model.add(Activation("relu"))
model.add(AvgPool2D(2, 2))
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(AvgPool2D(2, 2))

model.add(Conv2D(256, (2, 2)))
model.add(Activation("relu"))
model.add(MaxPool2D(2, 2))

model.add(Flatten())
model.add(Dense(32))
model.add(Dropout(0.25))
model.add(Dense(1))
model.add(Activation("sigmoid"))

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.3,
)

train_generator = train_datagen.flow_from_directory(
    DATASET_DIR,
    target_size=(IMG_H, IMG_W),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="training",
)

validation_generator = train_datagen.flow_from_directory(
    DATASET_DIR,
    target_size=(IMG_H, IMG_W),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False,
    subset="validation",
)

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=EPOCHS,
)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
print("training_accuracy", history.history['accuracy'][-1])
print("validation_accuracy", history.history['val_accuracy'][-1])

In [None]:
label = validation_generator.classes

In [None]:
pred= model.predict(validation_generator)
predicted_class_indices=np.argmax(pred,axis=1)
labels = (validation_generator.class_indices)
labels2 = dict((v,k) for k,v in labels.items())
predictions = [labels2[k] for k in predicted_class_indices]


In [None]:
from sklearn.metrics import confusion_matrix

cf = confusion_matrix(predicted_class_indices,label)
cf

In [None]:
exp_series = pd.Series(label)
pred_series = pd.Series(predicted_class_indices)
pd.crosstab(exp_series, pred_series, rownames=['Actual'], colnames=['Predicted'],margins=True)

In [None]:
plt.matshow(cf)
plt.title('Confusion Matrix Plot')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show();

In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=80000)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

X_train, X_train_lr, y_train, y_train_lr = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.5)
from keras.models import Sequential
from keras.layers import Dense

def build_model():
    model = Sequential()
    model.add(Dense(20, input_dim=20, activation='relu'))
    model.add(Dense(40, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
import matplotlib.pyplot as plt
from keras.wrappers.scikit_learn import KerasClassifier
keras_model = build_model()
keras_model.fit(X_train, y_train, epochs=5, batch_size=100, verbose=1)
from sklearn.metrics import roc_curve
y_pred_keras = keras_model.predict(X_test).ravel()
fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_test, y_pred_keras)
from sklearn.metrics import auc
auc_keras = auc(fpr_keras, tpr_keras)
from sklearn.ensemble import RandomForestClassifier
# Supervised transformation based on random forests
rf = RandomForestClassifier(max_depth=3, n_estimators=10)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test, y_pred_rf)
auc_rf = auc(fpr_rf, tpr_rf)
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_keras, tpr_keras, label='Covid (area = {:.3f})'.format(auc_keras))
plt.plot(fpr_rf, tpr_rf, label='Normal (area = {:.3f})'.format(auc_rf))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
