In [None]:
from os import listdir # to get the files and directories
from os.path import join # to joint base path with sub path
import numpy as np # numpy library will help us to work with the numbers and arrays
import matplotlib.pyplot as plt # to visualize/plot the images
import cv2 # handle the images
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Input,
    Dense,
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dropout,
)
from collections import Counter

In [None]:
base_path = "./The IQ-OTHNCCD lung cancer dataset/"
# . -> current working directory
categories = listdir(base_path)
print(categories)
# We have 3 classes: Benign, Malignant, Normal

In [None]:
images = {}  # to store each class of images in one dictionary
for category in categories:
    images[category] = (
        []
    )  # each class has more than one images and we'll store all in one list
    category_path = join(base_path, category)
    for img_name in listdir(category_path):
        img_path = join(category_path, img_name)
        img = cv2.imread(
            img_path, cv2.IMREAD_GRAYSCALE
        )  # read images in grayscale (one chanel)
        images[category].append(img)

In [None]:
def resize_images(target_size):
    for category in categories:
        # We need to use enumerate to get the index because we want to edit on the original list and we can't do that without index
        for index, img in enumerate(images[category]):
            if img.shape != target_size:
                images[category][index] = cv2.resize(img, target_size)

resize_images((128, 128))
# We choose the majority shape (512, 512) and shrink the image to (128, 128) because training the model with (512, 512) images takes a long time

In [None]:
# Convert the lists to numpy arrays for efficient numerical computations
for category in categories:
    images[category] = np.array(images[category])

In [None]:
# first convert dict_values to list then we'll have list of arrays
X = np.concatenate(list(images.values()))
y = np.concatenate([[category]*len(images[category]) for category in categories])

In [None]:
one_class = ["Malignant cases", "Non-Malignant cases"]
y_one_class = np.where(y == one_class[0], one_class[0], one_class[1])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_one_class, test_size=0.3, stratify=y_one_class, random_state=42)

In [None]:
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

In [None]:
X_train = X_train.reshape(-1,*X_train[0].shape, 1)

In [None]:
X_train = X_train / 255.0
X_test = X_test / 255.0

In [None]:
model = Sequential()
# Input Layer
model.add(Input(shape=X_train[0].shape))
# Hidden Layers
model.add(Conv2D(filters=64, kernel_size=(3, 3), name="conv2d_1", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_1"))

model.add(Conv2D(filters=32, kernel_size=(3, 3), name="conv2d_2", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_2"))

model.add(Conv2D(filters=8, kernel_size=(3, 3), name="conv2d_3", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), name="max2d_3"))

model.add(Flatten(name="flatten"))

# Output Layer
model.add(Dense(1, activation="sigmoid", name="out"))

model.summary()

In [None]:
model.compile(
    optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]
)

In [None]:
history = model.fit(
    X_train,
    y_train_encoded,
    epochs=10,
    batch_size=32,
    validation_data=(X_test, y_test_encoded),
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.show()