<a href="https://colab.research.google.com/github/seanpaz478/USDAAI521Team1FinalProject/blob/main/AAI_521FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import kagglehub
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Getting dataset using Kaggle API
path = kagglehub.dataset_download("andrewmvd/lung-and-colon-cancer-histopathological-images")

In [None]:
# Loading and preprocessing the dataset
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True
)

train_data = datagen.flow_from_directory(
    path,
    target_size=(150, 150),
    batch_size=64,
    subset='training',
    class_mode='binary'
)

val_data = datagen.flow_from_directory(
    path,
    target_size=(150, 150),
    batch_size=64,
    subset='validation',
    class_mode='binary'
)

Found 20000 images belonging to 1 classes.
Found 5000 images belonging to 1 classes.


In [None]:
# Using MobileNet pre-trained model as the base
base_model = tf.keras.applications.MobileNetV2(input_shape=(150, 150, 3),
                                               include_top=False,
                                               weights='imagenet'
)

base_model.trainable = False

# Building CNN off of pre-trained model
model = Sequential([
    base_model,
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

  base_model = tf.keras.applications.MobileNetV2(input_shape=(150, 150, 3),


In [None]:
# Implementing early stopping to stop training when validation accuracy stops improving
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=3,
    restore_best_weights=True
)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_data,
                    epochs=50,
                    validation_data=val_data,
                    callbacks=[early_stopping])

Epoch 1/50


  self._warn_if_super_not_called()


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 2s/step - accuracy: 0.9906 - loss: 0.0228 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 2/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m552s[0m 2s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 3/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m542s[0m 2s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 4/50
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m540s[0m 2s/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00


In [None]:
val_preds = model.predict(val_data)
print(classification_report(val_data.classes, np.argmax(val_preds, axis=1)))

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5000

    accuracy                           1.00      5000
   macro avg       1.00      1.00      1.00      5000
weighted avg       1.00      1.00      1.00      5000



In [None]:
model.summary()

In [None]:
_ = model.predict(train_data[0])

# Using Grad-CAM to visualize model behavior
def grad_cam(model, img_array, layer_name='dense_3'):
    grad_model = tf.keras.models.Model(
        inputs=[model.inputs],
        outputs=[model.get_layer(layer_name).output, model.output]
    )
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.array([img_array]))
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)
    weights = tf.reduce_mean(grads, axis=(0, 1))

    cam = tf.reduce_sum(weights * conv_outputs, axis=-1)
    cam = tf.nn.relu(cam)
    return cam[0]

def overlay_cam_on_image(image, cam, alpha=0.4):
    cam_resized = tf.image.resize(cam, image.shape[:2]).numpy()
    cam_resized = (cam_resized - cam_resized.min()) / (cam_resized.max() - cam_resized.min())
    heatmap = plt.cm.jet(cam_resized)[..., :3]
    overlay = (image * (1 - alpha)) + (heatmap * alpha).astype(np.uint8)
    return overlay



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 924ms/step


In [None]:
iterator = iter(train_data)
for i in range(5):
    # Geting the image and label
    img, label = next(iterator)
    img = img[0]
    label = label[0]

    # Preprocessing image
    img_array = np.expand_dims(img, axis=0)

    # Generating Grad-CAM heatmap
    cam = grad_cam(model, img_array, layer_name="dense_3")

    # Overlaying Grad-CAM on original image
    overlay = overlay_cam_on_image((img * 255).astype(np.uint8), cam)

    # Plotting results
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.title(f"Original Image (Label: {label})")
    plt.imshow(img)
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.title("Grad-CAM Overlay")
    plt.imshow(overlay)
    plt.axis("off")

    plt.tight_layout()
    plt.show()

ValueError: The layer sequential_1 has never been called and thus has no defined output.