In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("paultimothymooney/breast-histopathology-images")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/breast-histopathology-images?dataset_version_number=1...


100%|██████████| 3.10G/3.10G [00:24<00:00, 137MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/paultimothymooney/breast-histopathology-images/versions/1


### **Preprocessing the data**

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define augmentation strategies
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  # Splitting dataset into training and validation
)

# Loading and augmenting dataset
train_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),  # Resize images
    batch_size=64,
    class_mode='binary',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    subset='validation'
)

Found 444147 images belonging to 280 classes.
Found 110901 images belonging to 280 classes.


### **Building the model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### **Model Training**

In [None]:
# Train model
model.fit(train_generator, validation_data=validation_generator, epochs=4)

Epoch 1/4
[1m2302/6940[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m59:49[0m 774ms/step - accuracy: 0.0013 - loss: -71086888479358976.0000

### **Highlighting the cancerous regions**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

# Grad-CAM implementation
def grad_cam(model, img_path, layer_name='block5_conv3'):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    grad_model = Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(img_array)
        loss = predictions[:, 0]

    grads = tape.gradient(loss, conv_outputs)[0]
    weights = tf.reduce_mean(grads, axis=(0, 1))
    cam = np.dot(conv_outputs[0], weights)

    cam = tf.maximum(cam, 0) / tf.math.reduce_max(cam)
    cam = tf.image.resize(cam[..., np.newaxis], (224, 224)).numpy()

    # Overlay the heatmap
    plt.imshow(img)
    plt.imshow(cam[..., 0], cmap='jet', alpha=0.5)
    plt.axis('off')
    plt.show()

# Test on a sample image
grad_cam(model, 'path_to_test_image.jpg', layer_name='block5_conv3')


### **Model Evaluation**

In [None]:
from sklearn.metrics import classification_report

# Evaluate model on validation set
val_images, val_labels = next(iter(validation_generator))
predictions = model.predict(val_images) > 0.5

print(classification_report(val_labels, predictions))


### **Using Transfer Learning**

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze layers to prevent retraining
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(train_generator, validation_data=validation_generator, epochs=4)
