In [3]:
import kagglehub
import os
import random
import shutil
from imutils import paths
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib
matplotlib.use('Agg')

In [4]:
# Download the dataset
print("Downloading dataset...")
path = kagglehub.dataset_download("paultimothymooney/breast-histopathology-images")
print("Path to dataset files:", path)

Downloading dataset...
Path to dataset files: /kaggle/input/breast-histopathology-images


In [5]:
# Define paths
input_dataset = path  # Use the downloaded dataset path directly
base_path = "datasets/idc"
train_path = os.path.sep.join([base_path, "training"])
val_path = os.path.sep.join([base_path, "validation"])
test_path = os.path.sep.join([base_path, "testing"])
train_split = 0.8
val_split = 0.1

In [6]:
# List and shuffle images
print("Listing images...")
originalPaths = list(paths.list_images(input_dataset))
random.seed(7)
random.shuffle(originalPaths)
print(f"Found {len(originalPaths)} images")

Listing images...
Found 555048 images


In [7]:
# Split into train, val, test
index = int(len(originalPaths) * train_split)
trainPaths = originalPaths[:index]
testPaths = originalPaths[index:]
index = int(len(trainPaths) * val_split)
valPaths = trainPaths[:index]
trainPaths = trainPaths[index:]
print(f"Train: {len(trainPaths)}, Validation: {len(valPaths)}, Test: {len(testPaths)}")

Train: 399635, Validation: 44403, Test: 111010


In [8]:
# Define datasets
datasets = [
    ("training", trainPaths, train_path),
    ("validation", valPaths, val_path),
    ("testing", testPaths, test_path)
]

In [None]:
# Organize files into directories
for (setType, originalPaths, basepath) in datasets:
    print(f'Building {setType} set')
    if not os.path.exists(basepath):
        print(f'Building directory {basepath}')
        os.makedirs(basepath)
    for path in originalPaths:
        file = path.split(os.path.sep)[-1]
        label = "0" if "class0" in file else "1"  # Extract label from filename
        labelPath = os.path.sep.join([basepath, label])
        if not os.path.exists(labelPath):
            print(f'Building directory {labelPath}')
            os.makedirs(labelPath)
        newPath = os.path.sep.join([labelPath, file])
        shutil.copy2(path, newPath)

Building training set
Building directory datasets/idc/training
Building directory datasets/idc/training/0
Building directory datasets/idc/training/1


In [None]:
# Define the CancerNet model
class CancerNet:
    @staticmethod
    def build(width, height, depth, classes):
        model = tf.keras.models.Sequential()
        inputShape = (height, width, depth)
        channelDim = -1
        if tf.keras.backend.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            channelDim = 1

        model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=inputShape))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
        model.add(tf.keras.layers.Dropout(0.25))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(units=256, activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(axis=channelDim))
        model.add(tf.keras.layers.Dropout(0.5))

        model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))  # Changed to 'sigmoid'
        return model

# Build the model
print("Building model...")
model = CancerNet.build(width=64, height=64, depth=3, classes=2)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Set up data generator for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

print("Loading training data...")
training_set = train_datagen.flow_from_directory(
    train_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)

# Verify data loading
print("Class indices:", training_set.class_indices)

# Optional: Train the model (uncomment to run)
# print("Training model...")
# model.fit(training_set, epochs=10)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("Loading validation data...")
# Create an ImageDataGenerator instance for validation data with the same configuration as training data.
val_datagen = ImageDataGenerator(rescale=1./255) # Only rescale is needed for validation.
validation_set = val_datagen.flow_from_directory(
    val_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)


# Load test data
print("Loading test data...")
# For consistency, use the same ImageDataGenerator instance for the test data as well.
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescale is needed for test.
test_set = test_datagen.flow_from_directory(
    test_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Keep order for evaluation
)

# Verify data loading
print("Training class indices:", training_set.class_indices)

# Train the model
print("Training model...")
history = model.fit(
    training_set,
    epochs=5,
    validation_data=validation_set
)

# Evaluate the model on test set
print("Evaluating model on test set...")
test_loss, test_accuracy = model.evaluate(test_set)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

# Optional: Save the model
model.save("cancernet_model.h5")
print("Model saved as 'cancernet_model.h5'")

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import matplotlib.pyplot as plt
import os
from imutils import paths

# Load the trained model
model = tf.keras.models.load_model("cancernet_model.h5")
print("Model loaded successfully.")

# Function to preprocess a single image
def preprocess_image(image_path, target_size=(64, 64)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

# Function to predict and display result
def predict_breast_cancer(image_path, save_output=False, output_path="prediction_output.png"):
    # Preprocess the image
    processed_img = preprocess_image(image_path)

    # Make prediction
    prediction = model.predict(processed_img)[0][0]
    label = "Cancerous (Class 1)" if prediction >= 0.5 else "Non-Cancerous (Class 0)"
    confidence = prediction if prediction >= 0.5 else 1 - prediction

    # Print result
    print(f"Image: {image_path}")
    print(f"Prediction: {label}")
    print(f"Confidence: {confidence:.4f}")

    # Load and display the image
    img = load_img(image_path)
    plt.figure(figsize=(6, 6))  # Set figure size for better visibility
    plt.imshow(img)
    plt.title(f"{label} (Confidence: {confidence:.4f})", fontsize=12)
    plt.axis("off")
    plt.show()  # This displays the image

    # Optionally save the output
    if save_output:
        plt.figure(figsize=(6, 6))  # Recreate figure for saving
        plt.imshow(img)
        plt.title(f"{label} (Confidence: {confidence:.4f})", fontsize=12)
        plt.axis("off")
        plt.savefig(output_path, bbox_inches='tight', dpi=100)
        print(f"Prediction saved to {output_path}")
        plt.close()  # Close the figure to free memory

    return label, confidence

# Find an image from the testing set
test_images = list(paths.list_images("datasets/idc/testing/"))
if not test_images:
    print("No images found in datasets/idc/testing/. Check your dataset structure.")
    exit()

# Use the first available image
image_path = test_images[0]  # You can change this index (e.g., test_images[1]) to try different images
print(f"Selected image: {image_path}")

# Predict and display the image
if os.path.exists(image_path):
    label, confidence = predict_breast_cancer(image_path, save_output=True, output_path="prediction_result.png")
else:
    print(f"Image not found at {image_path}. Check the path or dataset.")



Model loaded successfully.
Selected image: datasets/idc/testing/1/9075_idx5_x1301_y351_class1.png
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338ms/step
Image: datasets/idc/testing/1/9075_idx5_x1301_y351_class1.png
Prediction: Cancerous (Class 1)
Confidence: 0.9079
Prediction saved to prediction_result.png


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import matplotlib.pyplot as plt
import os
from imutils import paths

# Load the trained model
model = tf.keras.models.load_model("cancernet_model.h5")
print("Model loaded successfully.")

# Function to preprocess a single image
def preprocess_image(image_path, target_size=(64, 64)):
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

# Function to predict and display result
def predict_breast_cancer(image_path, save_output=False, output_path="prediction_output.png"):
    # Preprocess the image
    processed_img = preprocess_image(image_path)

    # Make prediction
    prediction = model.predict(processed_img)[0][0]
    label = "Cancerous (Class 1)" if prediction >= 0.5 else "Non-Cancerous (Class 0)"
    confidence = prediction if prediction >= 0.5 else 1 - prediction

    # Print result
    print(f"Image: {image_path}")
    print(f"Prediction: {label}")
    print(f"Confidence: {confidence:.4f}")

    # Load and display the image
    img = load_img(image_path)
    plt.figure(figsize=(6, 6))  # Set figure size for better visibility
    plt.imshow(img)
    plt.title(f"{label} (Confidence: {confidence:.4f})", fontsize=12)
    plt.axis("off")
    plt.show()  # This displays the image

    # Optionally save the output
    if save_output:
        plt.figure(figsize=(6, 6))  # Recreate figure for saving
        plt.imshow(img)
        plt.title(f"{label} (Confidence: {confidence:.4f})", fontsize=12)
        plt.axis("off")
        plt.savefig(output_path, bbox_inches='tight', dpi=100)
        print(f"Prediction saved to {output_path}")
        plt.close()  # Close the figure to free memory

    return label, confidence

# Find an image from the testing set
test_images = list(paths.list_images("datasets/idc/testing/"))
if not test_images:
    print("No images found in datasets/idc/testing/. Check your dataset structure.")
    exit()

# Use the first available image
image_path = test_images[0]  # You can change this index (e.g., test_images[1]) to try different images
print(f"Selected image: {image_path}")

# Predict and display the image
if os.path.exists(image_path):
    label, confidence = predict_breast_cancer(image_path, save_output=True, output_path="prediction_result.png")
else:
    print(f"Image not found at {image_path}. Check the path or dataset.")

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
from imutils import paths
import os

# ... (rest of the code before loading the model) ...

# Load the trained model
model = load_model("cancernet_model.h5")
# Re-compile the model after loading to associate the optimizer with the loaded variables
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print("Model loaded and recompiled successfully.")

# ... (rest of the code) ...

# Data generators
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
train_path = "datasets/idc/training"
training_set = train_datagen.flow_from_directory(
    train_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)

val_datagen = ImageDataGenerator(rescale=1./255)
val_path = "datasets/idc/validation"
validation_set = val_datagen.flow_from_directory(
    val_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary'
)

test_datagen = ImageDataGenerator(rescale=1./255)
test_path = "datasets/idc/testing"
test_set = test_datagen.flow_from_directory(
    test_path,
    target_size=(64, 64),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Important for aligning predictions with true labels
)

# Train the model (to get history)
print("Training model...")
history = model.fit(
    training_set,
    steps_per_epoch=training_set.samples // training_set.batch_size,
    epochs=5,  # Adjust epochs as needed
    validation_data=validation_set,
    validation_steps=validation_set.samples // validation_set.batch_size,
    verbose=1  # Add verbose for progress output
)

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.savefig('training_history.png')  # Save the plot
plt.show()

# Evaluate the model on test set
print("Evaluating the model...")
# Use model.predict with the generator
predictions = model.predict(test_set, steps=np.ceil(test_set.samples / test_set.batch_size), verbose=1)
predicted_labels = (predictions > 0.5).astype(int).flatten()
true_labels = test_set.classes[:len(predicted_labels)]  # Match lengths

# Compute confusion matrix and classification report
conf_matrix = confusion_matrix(true_labels, predicted_labels)
class_report = classification_report(true_labels, predicted_labels, target_names=['Non-Cancerous', 'Cancerous'])
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

# Custom metrics: Sensitivity, Specificity, Accuracy
tn, fp, fn, tp = conf_matrix.ravel()
accuracy = (tp + tn) / (tp + tn + fp + fn)
sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0  # Recall for Class 1
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

print("\nAdditional Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

# Print sensitivity value (as in your example)
print(f"Sensitivity: {sensitivity}")



Model loaded and recompiled successfully.
Found 255813 images belonging to 2 classes.
Found 42610 images belonging to 2 classes.
Found 99955 images belonging to 2 classes.
Training model...
Epoch 1/5


  self._warn_if_super_not_called()


[1m7994/7994[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4824s[0m 603ms/step - accuracy: 0.8725 - loss: 0.3054 - val_accuracy: 0.8747 - val_loss: 0.3186
Epoch 2/5
[1m   1/7994[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:30:31[0m 679ms/step - accuracy: 0.8750 - loss: 0.2755



[1m7994/7994[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 21ms/step - accuracy: 0.8750 - loss: 0.2755 - val_accuracy: 0.8751 - val_loss: 0.3182
Epoch 3/5
[1m 687/7994[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1:10:40[0m 580ms/step - accuracy: 0.8780 - loss: 0.2924