In [51]:
from os import listdir # to get the files and directories
from os.path import join # to joint base path with sub path
import numpy as np # numpy library will help us to work with the numbers and arrays
import matplotlib.pyplot as plt # to visualize/plot the images
import cv2 # handle the images
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Input,
    Dense,
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dropout,
)
from scikeras.wrappers import KerasClassifier

In [3]:
base_path = "./The IQ-OTHNCCD lung cancer dataset/"
categories = listdir(base_path)

In [4]:
images = {}
for category in categories:
    images[category] = []
    category_path = join(base_path, category)
    for img_name in listdir(category_path):
        img_path = join(category_path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        images[category].append(img)

In [5]:
def resize_images(target_size):
    for category in categories:
        for index, img in enumerate(images[category]):
            if img.shape != target_size:
                images[category][index] = cv2.resize(img, target_size)


resize_images((128, 128))

In [6]:
for category in categories:
    images[category] = np.array(images[category])

In [7]:
X = np.concatenate(list(images.values()))
y = np.concatenate([[category]*len(images[category]) for category in categories])

In [8]:
one_class = ["Malignant cases", "Non-Malignant cases"]
y_one_class = np.where(y == one_class[0], one_class[0], one_class[1])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y_one_class, test_size=0.3, stratify=y_one_class, random_state=42)

In [10]:
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [11]:
X_train = X_train.reshape(-1,*X_train[0].shape, 1)

In [12]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [14]:
def create_model():
    model = Sequential()
    # Input Layer
    model.add(Input(shape=X_train[0].shape))
    # Hidden Layers
    model.add(Conv2D(filters=64, kernel_size=(3, 3), name="conv2d_1", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_1"))

    model.add(Conv2D(filters=32, kernel_size=(3, 3), name="conv2d_2", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_2"))

    model.add(Conv2D(filters=8, kernel_size=(3, 3), name="conv2d_3", activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_3"))

    model.add(Flatten(name="flatten"))

    # Output Layer
    model.add(Dense(1, activation="sigmoid", name="out"))
    return model

## Hyperparameter tuning

### Optimizer (Adam, RMSprop)

- Adam Optimizer

In [15]:
model_adam = create_model()
model_adam.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
history_adam = model_adam.fit(
    X_train,
    y_train_encoded,
    epochs=10,
    batch_size=32,
    validation_data=(X_test, y_test_encoded),
)

Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 466ms/step - accuracy: 0.5823 - loss: 0.6943 - val_accuracy: 0.6970 - val_loss: 0.6291
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.6916 - loss: 0.5863 - val_accuracy: 0.7667 - val_loss: 0.4940
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 424ms/step - accuracy: 0.7861 - loss: 0.4648 - val_accuracy: 0.8152 - val_loss: 0.4244
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 421ms/step - accuracy: 0.8127 - loss: 0.3908 - val_accuracy: 0.8697 - val_loss: 0.3527
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 416ms/step - accuracy: 0.8517 - loss: 0.3573 - val_accuracy: 0.9121 - val_loss: 0.2868
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.9031 - loss: 0.2612 - val_accuracy: 0.9212 - val_loss: 0.2310
Epoch 7/10
[1m24/24[

In [24]:
history_adam.history["val_accuracy"][-1]

0.9757575988769531

- RMSprop Optimizer

In [25]:
model_rms = create_model()
model_rms.compile(optimizer="RMSprop", loss="binary_crossentropy", metrics=["accuracy"])
history_rms = model_rms.fit(
    X_train,
    y_train_encoded,
    epochs=10,
    batch_size=32,
    validation_data=(X_test, y_test_encoded),
)

Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 461ms/step - accuracy: 0.5271 - loss: 0.6892 - val_accuracy: 0.7364 - val_loss: 0.6220
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 468ms/step - accuracy: 0.6776 - loss: 0.6189 - val_accuracy: 0.7424 - val_loss: 0.5148
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 460ms/step - accuracy: 0.7090 - loss: 0.5327 - val_accuracy: 0.7879 - val_loss: 0.4652
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 418ms/step - accuracy: 0.7633 - loss: 0.4667 - val_accuracy: 0.7970 - val_loss: 0.4450
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 423ms/step - accuracy: 0.8154 - loss: 0.4031 - val_accuracy: 0.8667 - val_loss: 0.3400
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 430ms/step - accuracy: 0.8493 - loss: 0.3538 - val_accuracy: 0.8545 - val_loss: 0.3440
Epoch 7/10
[1m24/24[

In [26]:
history_rms.history["val_accuracy"][-1]

0.8939393758773804

From the result we find that adam optimizer is better than RMSprop optimizer.

In [81]:
model = create_model()
model = KerasClassifier(model, loss="binary_crossentropy", metrics=["accuracy"])

## Grid Search

In [100]:
print(3 * 3 * 6 * 2, "Cases")
print((15 + 20 + 30) * 3 * 6 * 2, "Epochs")
print((2340 * 45 * 30 / 1000) / 60)

108 Cases
2340 Epochs
52.65


In [82]:
# define the grid search parameters
batch_size = [32, 64, 128]
epochs = [15, 20, 30]
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3, 0.05]
optimizer = ["RMSprop", "adam"]
param_grid = dict(
    batch_size=batch_size,
    epochs=epochs,
    optimizer__learning_rate=learn_rate,
    optimizer=optimizer,
)
grid = GridSearchCV(estimator=model, param_grid=param_grid)

In [77]:
grid_result = grid.fit(X_train, y_train_encoded)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.5000 - loss: 0.6933
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.3750 - loss: 0.6940
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.4375 - loss: 0.6999
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.5625 - loss: 0.6859
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4375 - loss: 0.7023
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.8125 - loss: 0.6903
[1m1/1[0m 

In [83]:
import time

In [87]:
start = time.time()
time.sleep(1)
end = time.time()
print(end - start)

1.001068353652954


In [104]:
predict = 0.2
print((predict.max() if predict.max() > 0.5 else 1 - predict.max()) * 100)

AttributeError: 'float' object has no attribute 'max'