In [1]:
import os
import math
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
colon_aca = 'lung_colon_image_set/colon_image_sets/colon_aca'
colon_benign = 'lung_colon_image_set/colon_image_sets/colon_n'
lung_aca = 'lung_colon_image_set/lung_image_sets/lung_aca'
lung_benign = 'lung_colon_image_set/lung_image_sets/lung_n'
lung_scc = 'lung_colon_image_set/lung_image_sets/lung_scc'

In [3]:

def resize_images(folder):
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            img = Image.open(file_path)
            img = img.resize((224, 224))
            img.save(file_path)
            # print(f"Resized and saved: {file_path}")
        except (OSError, IOError) as e:
            print(f"Skipping file: {file_path} due to error: {e}")
     
# Resize images in each directory
# resize_images(colon_aca)
# resize_images(colon_benign)
resize_images(lung_aca)
resize_images(lung_benign)
resize_images(lung_scc)



In [4]:

def load_images(folder):
    images = []
    for filename in os.listdir(folder):
        img = Image.open(folder + '\\' + filename)
        img = np.array(img)
        images.append(img)
    return np.array(images)

In [5]:
# Load images
lung_aca_images = load_images(lung_aca)
lung_benign_images = load_images(lung_benign)
lung_scc_images = load_images(lung_scc)


In [6]:
# augment data
def dataAugmentation(images):
    datagen = ImageDataGenerator(
        rotation_range=30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True
    )
    datagen.fit(images)
    return datagen

In [7]:
# Split data into train and test
from sklearn.model_selection import train_test_split
def splitData(images):
    x_train, x_test = train_test_split(images, test_size=0.2, random_state=42)
    return x_train, x_test


In [20]:
lung_aca_datagen = dataAugmentation(lung_aca_images)
lung_benign_datagen = dataAugmentation(lung_benign_images)
lung_scc_datagen = dataAugmentation(lung_scc_images)

lung_aca_train, lung_aca_test = splitData(lung_aca_images)
lung_benign_train, lung_benign_test = splitData(lung_benign_images)
lung_scc_train, lung_scc_test = splitData(lung_scc_images)

In [21]:
# use cnn to classify images
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam

# Data Augmentation - rotate, shift, flip, zoom to prevent overfitting
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Improved Model Architecture: more layers, more filters, more neurons, diff activation functions
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

model.compile(optimizer=Adam(),
            loss='categorical_crossentropy', 
            metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Lung Cancer

In [None]:
# lung cancer model
# put all the above code into one cell

lung_aca_datagen = dataAugmentation(lung_aca_images)
lung_benign_datagen = dataAugmentation(lung_benign_images)
lung_scc_datagen = dataAugmentation(lung_scc_images)

lung_aca_train, lung_aca_test = splitData(lung_aca_images)
lung_benign_train, lung_benign_test = splitData(lung_benign_images)
lung_scc_train, lung_scc_test = splitData(lung_scc_images)

# combine datasets
train_images = np.concatenate((lung_aca_train, lung_benign_train, lung_scc_train))
test_images = np.concatenate((lung_aca_test, lung_benign_test, lung_scc_test))

train_labels = np.concatenate((np.zeros(len(lung_aca_train)), np.ones(len(lung_benign_train)), np.full(len(lung_scc_train), 2)))
test_labels = np.concatenate((np.zeros(len(lung_aca_test)), np.ones(len(lung_benign_test)), np.full(len(lung_scc_test), 2)))

# one hot encoding
train_labels = to_categorical(train_labels, 3)
test_labels = to_categorical(test_labels, 3)

print("Training images shape:", train_images.shape)
print("Training labels shape:", train_labels.shape)
print("Testing images shape:", test_images.shape)
print("Testing labels shape:", test_labels.shape)
print("Training images sample data:", train_images[:1])
print("Training labels sample data:", train_labels[:1])

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

model.compile(
    optimizer=Adam(),
    loss='categorical_crossentropy',
    metrics=['accuracy'],
    run_eagerly=True
)

# Setup EarlyStopping
early_stopping_monitor = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True  # Optionally restore model weights from the epoch with the best value of the monitored quantity.
)

model.fit(
    train_images,
    train_labels,
    epochs=10,
    batch_size=32,
    validation_data=(test_images, test_labels),
    callbacks=[early_stopping_monitor]  # Include EarlyStopping in the training phase
)

#test model
model.evaluate(test_images, test_labels)

# predict
predictions = model.predict(test_images)
# get accuracy
accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(test_labels, axis=1))

print("Accuracy percent: ", accuracy * 100)


In [None]:
# save model
model.save('lung_cancer_model.h5')


