In [1]:
# Import libraries
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from keras.applications import MobileNetV2
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Dense
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from matplotlib import pyplot

2025-05-11 14:21:27.219729: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746973287.424914      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746973287.492059      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Set your correct dataset directory
dataset_dir = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set'


In [3]:
# Define the image dimensions
img_height, img_width = 224, 224

In [4]:

# Load and preprocess the data
def load_data(dataset_dir):
    images = []
    labels = []

    # List the main folders
    main_folders = ['colon_image_sets', 'lung_image_sets']

    for main_folder in main_folders:
        main_path = os.path.join(dataset_dir, main_folder)
        for folder in os.listdir(main_path):
            label = folder
            folder_path = os.path.join(main_path, folder)
            for filename in os.listdir(folder_path):
                img = cv2.imread(os.path.join(folder_path, filename))
                img = cv2.resize(img, (img_height, img_width))
                images.append(img)
                labels.append(label)

    images = np.array(images)
    labels = np.array(labels)
    return images, labels
    
images, labels = load_data(dataset_dir)

In [5]:
# Encode labels (Map class names to numbers)
label_dict = {
    'colon_aca': 0, 
    'colon_n': 1, 
    'lung_aca': 2, 
    'lung_n': 3, 
    'lung_scc': 4
}
labels = np.array([label_dict[label] for label in labels])
labels = to_categorical(labels)

In [6]:
# Split the data into train and test sets
X_train, X_temp, y_train, y_temp = train_test_split(
    images, labels, test_size=0.3, random_state=42)

X_test, X_val, y_test, y_val = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
# Load base model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

I0000 00:00:1746973624.373237      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
# Add custom top layers for classification
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(5, activation='softmax')  # 5 classes
])

In [9]:
# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
# Define Early Stopping
early_stopping = EarlyStopping(
    monitor='val_loss', # Monitor validation loss
    verbose=1,
    patience=3,         # Stop if no improvement for 3 epochs
    restore_best_weights=True  # Restore best model weights
)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)

In [11]:
# Train the model with callbacks
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=200,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, lr_reduce]  
)

Epoch 1/200


I0000 00:00:1746973670.285971      58 service.cc:148] XLA service 0x7ed8000c7540 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746973670.286630      58 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746973673.725059      58 cuda_dnn.cc:529] Loaded cuDNN version 90300
E0000 00:00:1746973679.402520      58 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746973679.599289      58 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m  1/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:51:18[0m 58s/step - accuracy: 0.4375 - loss: 1.6626

I0000 00:00:1746973691.524873      58 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m546/547[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 73ms/step - accuracy: 0.9207 - loss: 0.2505

E0000 00:00:1746973739.525827      59 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746973739.724584      59 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 128ms/step - accuracy: 0.9208 - loss: 0.2501 - val_accuracy: 0.3312 - val_loss: 41.3568 - learning_rate: 0.0010
Epoch 2/200
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 77ms/step - accuracy: 0.9644 - loss: 0.1071 - val_accuracy: 0.2003 - val_loss: 11.3770 - learning_rate: 0.0010
Epoch 3/200
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 77ms/step - accuracy: 0.9792 - loss: 0.0607 - val_accuracy: 0.5691 - val_loss: 8.4891 - learning_rate: 0.0010
Epoch 4/200
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 77ms/step - accuracy: 0.9844 - loss: 0.0465 - val_accuracy: 0.3853 - val_loss: 7.5680 - learning_rate: 0.0010
Epoch 5/200
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 77ms/step - accuracy: 0.9913 - loss: 0.0278 - val_accuracy: 0.6048 - val_loss: 6.4595 - learning_rate: 0.0010
Epoch 6/200
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [12]:
# After training the model, evaluate on all datasets
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

In [13]:
# Print the accuracies
print(f"\nTraining Accuracy: {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")


Training Accuracy: 1.0000
Validation Accuracy: 0.9995
Test Accuracy: 0.9995


In [14]:
# Calculate overall training accuracy (which is just train_acc in this case)
print(f"\nOverall Training Accuracy: {train_acc:.4f}")


Overall Training Accuracy: 1.0000
