In [None]:
from tensorflow import keras
from keras import layers
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing import image_dataset_from_directory
import tensorflow_datasets as tfds

import os, shutil, pathlib, glob
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import pandas as pd
import random
SEED = 4747
random.seed(SEED)

In [None]:
from google.colab import files
files.upload()

In [None]:
def get_image_path_list():
    #Create empty lists to store the paths of class 0 and class 1 images
    class_0 = []
    class_1 = []
    for dir in dirs:
        #Read class 0 and class 1 images' paths for a given patient
        # and store them it their respective list
        c_0 = glob.glob(f"/content/dataset/{dir}/0/*.png")
        c_1 = glob.glob(f"/content/dataset/{dir}/1/*.png")

        random.shuffle(c_0),random.shuffle(c_0)
        random.shuffle(c_1),random.shuffle(c_1)

        #Add the class 0 and class 1 images' paths for a given patient
        # to the main list
        class_0.extend(c_0)
        class_1.extend(c_1)

        random.shuffle(class_0),random.shuffle(class_0)
        random.shuffle(class_1),random.shuffle(class_1)

    #Shuffle the paths lists randomly
    random.shuffle(class_0), random.shuffle(class_0)
    random.shuffle(class_1), random.shuffle(class_1)

    #Return the class_0 and class_1 lists
    return class_0, class_1

In [None]:
def create_dataset(train_split=0.85):
    #Use the "get_image_path_list" function to get two separate lists
    # of all the images of class 0 and class 1
    class0, class1 = get_image_path_list()

    #Shuffle the paths lists randomly
    random.shuffle(class0), random.shuffle(class0)
    random.shuffle(class1), random.shuffle(class1)

    #Calculate the total number of images of both classes
    total_img0 = len(class0)
    total_img1 = len(class1)

    #Calculate the number of images for train dataset for both classes
    train0_thresh = int(total_img0 * train_split)
    train1_thresh = int(total_img1 * train_split)

    #Create sub-directories for train directory
    train0_dir = "/content/new_dataset/train/0"
    train1_dir = "/content/new_dataset/train/1"
    os.makedirs(train0_dir)
    os.mkdir(train1_dir)

    #Create sub-directories for test directory
    test0_dir = "/content/new_dataset/test/0"
    test1_dir = "/content/new_dataset/test/1"
    os.makedirs(test0_dir)
    os.mkdir(test1_dir)

    #Let's copy the image of class 0
    for i in range(total_img0):
        path = class0[i]
        #If the index of the current image is less then the
        # validation threshold then, we'll copy the image to
        # the train directory, otherwise to the test directory
        if i < train0_thresh:
            shutil.copy(src=path, dst= train0_dir)
        else:
            shutil.copy(src=path, dst= test0_dir)

        #Let's copy the image of class 1

    for i in range(total_img1):
        path = class1[i]
        #If the index of the current image is less then the
        # validation threshold then, we'll copy the image to
        # the train directory, otherwise to the test directory
        if i < train1_thresh:
            shutil.copy(src=path, dst= train1_dir)
        else:
            shutil.copy(src=path, dst= test1_dir)

In [None]:
create_dataset(train_split=0.80)

In [None]:
class0, class1 = get_image_path_list()

print("Size of 80% images of class 0 =", int(len(class0)*0.80))
print("Size of 80% images of class 1 =", int(len(class1)*0.80))
print("Size of 80% images of class both classes =", int(len(class1)*0.80) + int(len(class0)*0.80))

train_data_size = glob.glob("/content/new_dataset/train/**/*.png")
print("Size of training dataset =",len(train_data_size))

In [None]:
image_size = (50,50)
batch_size = 32

#Get "tensorflow.data.Dataset" object for the training data
train_dataset = image_dataset_from_directory(
    "/content/new_dataset/train",
    labels = "inferred",
    label_mode = 'int',
    validation_split=0.2,
    subset="training",
    seed=SEED,
    image_size=image_size,
    batch_size=batch_size,
)

#Get "tensorflow.data.Dataset" object for the validation data
validation_dataset =image_dataset_from_directory(
    "/content/new_dataset/train",
    labels = "inferred",
    label_mode = 'int',
    validation_split=0.2,
    subset="validation",
    seed=SEED,
    image_size=image_size,
    batch_size=batch_size,
)

In [None]:
#Get "tensorflow.data.Dataset" object for the test data
test_dataset = image_dataset_from_directory(
    "/content/new_dataset/test",
    labels = "inferred",
    label_mode = 'int',
    seed=SEED,
    image_size=image_size,
    batch_size=batch_size,
)

Model Build

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.3),
        layers.RandomZoom(0.3)
    ]
)

In [None]:
inputs = keras.Input(shape=(50, 50, 3))
x = data_augmentation(inputs)
x = layers.Rescaling(1./255)(x)
#Since the image size is very small (50x50), so we are starting with
# large number for "filters". Usually, we start with small value
# and gradually increase. But here, we are diverging from
# our normal architecture due to small image size. Because,
# after the first layer, our image will reduce to (24x24)
# and most of the information in the image will be lost.
x = layers.Conv2D(filters=256, kernel_size=3, use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(filters=256, kernel_size=3, use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=256, kernel_size=3, use_bias=False)(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.MaxPooling2D(pool_size=2)(x)

x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation="relu")(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dense(32, activation="relu")(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
#Save the model summary to a text file
with open('model_summary.txt', 'w') as f:
    model.summary(print_fn=lambda x: f.write(x + '\n'))

In [None]:
model.compile(loss="binary_crossentropy",
               optimizer="rmsprop",
               metrics=["accuracy"])

In [None]:
callbacks = [
            #keras.callbacks.EarlyStopping(monitor='val_loss',patience=5),
            keras.callbacks.ModelCheckpoint(filepath="CanDetect.keras",
                                            save_best_only=True,
                                            monitor="val_loss")
            ]

In [None]:
history = model.fit(
                    train_dataset,
                    epochs=20,
                    validation_data=validation_dataset,
                    callbacks=callbacks
                   )

In [None]:
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)
fig, ax = plt.subplots(1,2,figsize=(12,8))

ax[0].plot(epochs, accuracy, "bo", label="Training accuracy")
ax[0].plot(epochs, val_accuracy, "b", label="Validation accuracy")
ax[0].set_title("Training and validation accuracy")
ax[0].legend()

ax[1].plot(epochs, loss, "bo", label="Training loss")
ax[1].plot(epochs, val_loss, "b", label="Validation loss")
ax[1].set_title("Training and validation loss")
ax[1].legend()

In [None]:
test_model = keras.models.load_model("/content/CanDetect.keras")
test_loss, test_acc = test_model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")

In [None]:
def test_the_model():
  class_true = []
  class_pred = []

  for batch_data, batch_labels in test_dataset:

      #Make predictions
      pred_labels = test_model.predict(batch_data)

      #Convert the NumPy ndarray object to simple list
      pred_labels_np = [float(i) for i in pred_labels]

      # Convert "tf.data.Dataset" to NumPy array
      batch_labels_np = tfds.as_numpy(batch_labels)

      #Append the true and predicted labels to their respective list
      class_true.extend(batch_labels_np)
      class_pred.extend(pred_labels_np)
  return class_true, class_pred

In [None]:
class_true, class_pred = test_the_model()

In [None]:
class_pred_int = [round(i) for i in class_pred]

In [None]:
confusion = confusion_matrix(class_true, class_pred_int)
fig,ax = plt.subplots(figsize=(8,5))
sns.heatmap(confusion, annot=True, linewidths=0.1,
            cmap="BuGn", linecolor="green", fmt= '.1f', ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve

In [None]:
ROC_AUC = roc_auc_score(class_true, class_pred_int)
# summarize scores
print('ROC AUC score =%.3f' % (ROC_AUC))

#Calculate roc curves
fpr, tpr, thresholds = roc_curve(class_true, class_pred_int)

#Plot the roc curve
plt.plot(fpr, tpr,label="AUC="+str(ROC_AUC))

# axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')