In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Model

In [None]:
# Install gdown (if not already installed)
!pip install --upgrade --no-cache-dir gdown

# Download the file using its Google Drive file ID
!gdown --id 1O4w4RT3sLnufJgO9pgCEbnCtLobiJ8_2

In [None]:
!unzip /content/Lung_and_Colon_Cancer.zip -d /content/Lung_dataset_1

In [None]:
# Import necessary modules
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization # Import BatchNormalization
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import plot_model
from sklearn.utils import resample
import pandas as pd
import random
import matplotlib.image as mpimg

In [None]:
# Define image dimensions and batch size
img_width, img_height = 224, 224
batch_size = 32
num_classes = 2 # Should match the number of subdirectories in Formatted_Colon_Data/Train

# Define paths for training and test datasets (using the reorganized paths)
# These variables are defined in the cell above (Step 1)
train_data_path = doubled_colon_train_path
test_data_path = doubled_colon_test_path

print(f"Using Training data path: {train_data_path}")
print(f"Using Test data path: {test_data_path}")

In [None]:
# Data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Split training data into training and validation
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)

In [None]:
# Load datasets
train_generator = train_datagen.flow_from_directory(
    train_data_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_data_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_data_path,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

In [None]:
# Print the number of samples in each dataset
print(f"Number of training samples: {train_generator.samples}")
print(f"Number of validation samples: {validation_generator.samples}")
print(f"Number of test samples: {test_generator.samples}")

In [None]:
# Number of images to display
num_images = 6

# Retrieve a batch of images and labels from the generator
augmented_images, labels = next(train_generator)

# Duplicate the images and labels to display double the images
duplicated_images = np.concatenate([augmented_images, augmented_images], axis=0)
duplicated_labels = np.concatenate([labels, labels], axis=0)

# Ensure we don't exceed the available images
num_images = min(num_images * 2, duplicated_images.shape[0])  # Double the images

# Get class indices mapping
class_indices = {v: k for k, v in train_generator.class_indices.items()}

# Create a figure and display the images
fig, axes = plt.subplots(2, num_images // 2, figsize=(15, 10))  # Adjust grid for double rows

for i in range(num_images):
    img = duplicated_images[i]
    img = np.clip(img, 0, 1)  # Ensure pixel values are in the [0, 1] range
    label_index = np.argmax(duplicated_labels[i])  # Get the class index
    label = class_indices[label_index]  # Map index to class name
    axes[i // (num_images // 2), i % (num_images // 2)].imshow(img)
    axes[i // (num_images // 2), i % (num_images // 2)].axis("off")
    axes[i // (num_images // 2), i % (num_images // 2)].set_title(label)

plt.tight_layout()
plt.show()

In [None]:
# Build the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Regularization to prevent overfitting
    Dense(num_classes, activation='softmax')  # Output layer for classification
])

In [None]:
# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Summarize the model
model.summary()

In [None]:
# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,  # Adjust based on your needs
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

In [None]:
# Save the model
model.save('/content/drive/MyDrive/API/Colon_cancer_detector.keras')
print("Model saved to Colon_cancer_detector.keras")

# Load the model (for inference)
loaded_model = load_model('/content/drive/MyDrive/API/Colon_cancer_detector.keras')
print("Model loaded from Colon_cancer_detector.keras")

In [None]:
# Evaluate the loaded model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [None]:
# Predictions for confusion matrix and classification report
y_true = validation_generator.classes
y_pred_probs = model.predict(validation_generator, steps=len(validation_generator))
y_pred = np.argmax(y_pred_probs, axis=1)

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Plot Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=validation_generator.class_indices.keys(),
            yticklabels=validation_generator.class_indices.keys())
plt.title('Confusion Matrix')
plt.ylabel('Actual Class')
plt.xlabel('Predicted Class')
plt.show()

# Classification Report
class_report = classification_report(y_true, y_pred, target_names=list(validation_generator.class_indices.keys()))
print("Classification Report:\n", class_report)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model

# Load the trained model
model_path = '/content/drive/MyDrive/API/Colon_cancer_detector.keras'
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")
loaded_model = load_model(model_path)

class_names_for_prediction = ['Malignant (Adenocarcinoma)', 'Benign (Benign Tissue)'] # Index 0 is Malignant, Index 1 is Benign


def predict_image(img_path):
    """
    Predict the class of the input image and display the results.
    """
    # Check if the image exists
    if not os.path.exists(img_path):
        print(f"Image file not found at {img_path}. Please upload the image or check the path.")
        return # Gracefully exit if image not found

    # Load the image and resize it to 224x224 pixels
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img) / 255.0  # Normalize pixel values
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    # Make a prediction using the loaded model
    prediction = loaded_model.predict(img_array)
    predicted_class_index = np.argmax(prediction, axis=1)[0]
    confidence = np.max(prediction) * 100

    # Display prediction results
    print(f"🖼️ Image: {img_path}")
    print(f"🔍 Prediction: {class_names_for_prediction[predicted_class_index]}") # Use the ordered list
    print(f"⚡ Confidence: {confidence:.2f}%")

    # Optional: Display the image with predictions
    plt.imshow(image.load_img(img_path))
    plt.axis('off')
    plt.title(f"Predicted: {class_names_for_prediction[predicted_class_index]} ({confidence:.2f}%)") # Use the ordered list
    plt.show()

# Example: Predict a colon cancer image
img_path_to_predict = "/content/WhatsApp Image 2025-04-08 at 12.26.53_92940954.jpg"
predict_image(img_path_to_predict)