In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("paultimothymooney/breast-histopathology-images")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1


In [7]:
# Import necessary libraries
import kagglehub
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report

# Display plots inline
%matplotlib inline


In [8]:
import os
from tensorflow.keras.preprocessing.image import load_img

# Display dataset structure
for root, dirs, files in os.walk(path):
    print(f"Directory: {root}, Number of Files: {len(files)}")

# Update directory path for classes
class_counts = {}
for subdir in os.listdir(path):
    subdir_path = os.path.join(path, subdir)
    if os.path.isdir(subdir_path):
        class_counts[subdir] = len(os.listdir(subdir_path))

# Print class distribution
print("\nClass Distribution:")
for class_name, count in class_counts.items():
    print(f"{class_name}: {count}")

# Visualize sample images
sample_images = []
for class_name in list(class_counts.keys())[:2]:  # Pick two classes for visualization
    class_dir = os.path.join(path, class_name)
    if os.path.isdir(class_dir):
        for img_file in os.listdir(class_dir)[:3]:  # Pick three images per class
            img_path = os.path.join(class_dir, img_file)
            try:
                img = load_img(img_path, target_size=(150, 150))
                sample_images.append((img, class_name))
            except Exception as e:
                print(f"Error loading image: {img_path}, Error: {e}")

# Display the images
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
for i, (img, class_name) in enumerate(sample_images):
    plt.subplot(2, 3, i + 1)
    plt.imshow(img)
    plt.title(class_name)
    plt.axis('off')
plt.show()


Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1, Number of Files: 0
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/12884, Number of Files: 0
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/12884/0, Number of Files: 533
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/12884/1, Number of Files: 236
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/9345, Number of Files: 0
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/9345/0, Number of Files: 554
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/9345/1, Number of Files: 631
Directory: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1/140

<Figure size 1200x600 with 0 Axes>

In [9]:
# Define augmentation strategies
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # Splitting dataset into training and validation
)

# Loading and augmenting dataset
train_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),  # Resize images
    batch_size=64,
    class_mode='binary',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    subset='validation'
)


Found 444147 images belonging to 280 classes.
Found 110901 images belonging to 280 classes.


In [None]:
# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(train_generator, validation_data=validation_generator, epochs=4)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/4


  self._warn_if_super_not_called()


[1m6940/6940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 787ms/step - accuracy: 0.0015 - loss: -2747373068945981440.0000

In [None]:
# Evaluate model on validation set
val_images, val_labels = next(iter(validation_generator))
predictions = model.predict(val_images) > 0.5

# Classification report
print("\nClassification Report:")
print(classification_report(val_labels, predictions))

# Visualize predictions
plt.figure(figsize=(12, 6))
for i in range(5):
    plt.subplot(1, 5, i + 1)
    plt.imshow(val_images[i].astype('uint8'))
    plt.title(f"True: {val_labels[i]}, Pred: {int(predictions[i])}")
    plt.axis('off')
plt.show()


In [None]:
# Grad-CAM Implementation
def grad_cam(model, img_path, layer_name='conv2d_1'):
    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    grad_model = Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)[0]
    weights = tf.reduce_mean(grads, axis=(0, 1))
    cam = np.dot(conv_outputs[0], weights)

    cam = tf.maximum(cam, 0) / tf.math.reduce_max(cam)
    cam = tf.image.resize(cam[..., np.newaxis], (224, 224)).numpy()

    # Overlay the heatmap
    plt.imshow(img)
    plt.imshow(cam[..., 0], cmap='jet', alpha=0.5)
    plt.axis('off')
    plt.show()

# Test on a sample image
grad_cam(model, 'path_to_test_image.jpg', layer_name='conv2d_1')


In [None]:
# Transfer Learning with VGG16
from tensorflow.keras.applications import VGG16

# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze layers to prevent retraining
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
history = model.fit(train_generator, validation_data=validation_generator, epochs=4)
