In [19]:
!pip install torch torchvision fastai matplotlib
!pip install opencv-python
!pip install --upgrade fastai torchvision


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from fastai.vision.all import untar_data, URLs
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os

In [21]:
imagenette_path = untar_data(URLs.IMAGENETTE)
imagewoof_path = untar_data(URLs.IMAGEWOOF)

print(f"Imagenette path: {imagenette_path}")
print(f"Imagewoof path: {imagewoof_path}")

Imagenette path: /Users/sammizhu/.fastai/data/imagenette2
Imagewoof path: /Users/sammizhu/.fastai/data/imagewoof2


In [22]:
# Define transformations for training and validation
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Create Datasets
imagenette_train_dataset = datasets.ImageFolder(imagenette_path/'train', transform=train_transforms)
imagenette_val_dataset = datasets.ImageFolder(imagenette_path/'val', transform=val_transforms)

imagewoof_train_dataset = datasets.ImageFolder(imagewoof_path/'train', transform=train_transforms)
imagewoof_val_dataset = datasets.ImageFolder(imagewoof_path/'val', transform=val_transforms)

# Create DataLoaders
batch_size = 32
imagenette_train_loader = DataLoader(imagenette_train_dataset, batch_size=batch_size, shuffle=True)
imagenette_val_loader = DataLoader(imagenette_val_dataset, batch_size=batch_size, shuffle=False)

imagewoof_train_loader = DataLoader(imagewoof_train_dataset, batch_size=batch_size, shuffle=True)
imagewoof_val_loader = DataLoader(imagewoof_val_dataset, batch_size=batch_size, shuffle=False)

In [23]:
# Define the model (ResNet18 for example)
def create_model(num_classes):
    model = models.resnet18(pretrained=True)
    # Replace the final fully connected layer with the correct number of classes
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

# Imagenette and Imagewoof both have 10 classes
imagenette_model = create_model(num_classes=10)
imagewoof_model = create_model(num_classes=10)

In [24]:
# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam or SGD)
imagenette_optimizer = optim.Adam(imagenette_model.parameters(), lr=0.001)
imagewoof_optimizer = optim.Adam(imagewoof_model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    model = model.to(device)
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        
        # Training loop
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        # Compute average loss for this epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validate the model
        val_acc = evaluate_model(model, val_loader)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
        
        # Save the best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
    
    print(f"Training complete. Best validation accuracy: {best_acc:.4f}")

# Function to evaluate the model
def evaluate_model(model, val_loader):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    
    with torch.no_grad():  # Disable gradient computation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            # Update correct predictions
            correct += torch.sum(preds == labels).item()
            total += labels.size(0)
    
    return correct / total


In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train the model on Imagenette
print("Training on Imagenette dataset...")
# train_model(imagenette_model, imagenette_train_loader, imagenette_val_loader, criterion, imagenette_optimizer, num_epochs=1)

# Train the model on Imagewoof
print("Training on Imagewoof dataset...")
# train_model(imagewoof_model, imagewoof_train_loader, imagewoof_val_loader, criterion, imagewoof_optimizer, num_epochs=1)


Training on Imagenette dataset...
Training on Imagewoof dataset...


In [26]:
# Save the best model
# torch.save(imagenette_model.state_dict(), 'imagenette_best_model.pth')
# torch.save(imagewoof_model.state_dict(), 'imagewoof_best_model.pth')

# Load the models for inference
imagenette_model.load_state_dict(torch.load('models/imagenette_best_model.pth'))
imagewoof_model.load_state_dict(torch.load('models/imagewoof_best_model.pth'))


  imagenette_model.load_state_dict(torch.load('models/imagenette_best_model.pth'))


FileNotFoundError: [Errno 2] No such file or directory: 'models/imagenette_best_model.pth'

In [27]:
# Load your trained model (e.g., on Imagewoof)
model = models.resnet18(pretrained=True)
num_classes = 10 
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# model.load_state_dict(torch.load('imagewoof_best_model.pth'))

model.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [28]:
# Dictionary to store activations for multiple layers
activations = {}

# Hook function to save activations from any layer
def get_activation(name):
    def hook(model, input, output):
        activations[name] = output
    return hook

# Register hooks on all layers that are convolutional
for name, layer in model.named_modules():
    if isinstance(layer, torch.nn.Conv2d):  # Only capture Conv2d layers
        layer.register_forward_hook(get_activation(name))


In [29]:
# Define image preprocessing
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load and preprocess the dog image
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    input_tensor = preprocess(image).unsqueeze(0)
    return input_tensor, image

# Load the image of your dog
input_tensor, image = load_image('test_img/dog.jpeg')


FileNotFoundError: [Errno 2] No such file or directory: '/Users/sammizhu/cs2822r-project/to_use/test_img/dog.jpeg'

In [91]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Move the input tensor to the device
input_tensor = input_tensor.to(device)

# Forward pass to get the model's output
output = model(input_tensor)

# Get the predicted class
target_class = output.argmax().item()

# Zero out any previous gradients
model.zero_grad()

# Backward pass to compute gradients for the target class
output[:, target_class].backward(retain_graph=True)


In [None]:
# Function to compute the Grad-CAM heatmap
def grad_cam(activation, gradients):
    # Global average pooling over the gradients (average the gradients per feature map)
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
    # Multiply each channel in the activation map by the pooled gradients
    for i in range(activation.size(1)):
        activation[:, i, :, :] *= pooled_gradients[i]
    # Create the heatmap by averaging the weighted activation maps
    heatmap = torch.mean(activation, dim=1).squeeze()
    # Apply ReLU to remove negative values and normalize the heatmap
    heatmap = torch.relu(heatmap)
    heatmap -= heatmap.min()
    heatmap /= heatmap.max()
    return heatmap.detach().cpu().numpy()

# Function to save the heatmap without overlaying on the original image
def save_heatmap_only(heatmap, layer_name, i, save_folder="all_layers", colormap='jet'):
    # Ensure save folder exists
    os.makedirs(save_folder, exist_ok=True)
    
    # Normalize heatmap to 8-bit and resize
    heatmap = np.uint8(255 * heatmap)
    heatmap_resized = cv2.resize(heatmap, (1229, 1229), interpolation=cv2.INTER_LINEAR)

    # Apply colormap
    heatmap_colored = cv2.applyColorMap(heatmap_resized, cv2.COLORMAP_JET)
    
    # Save the heatmap as an image file
    save_path = os.path.join(save_folder, f"cnn_layer_{i}.png")
    cv2.imwrite(save_path, heatmap_colored)
    print(f"Saved Grad-CAM heatmap for layer '{layer_name}' to: {save_path}")

# Main function to generate Grad-CAM for all layers and save each heatmap
def generate_gradcam_for_all_layers(activations, target_class, output, i=1):
    for layer_name, activation in activations.items():
        try:
            # Compute gradients with allow_unused=True
            gradients = torch.autograd.grad(
                outputs=output[:, target_class], 
                inputs=activation, 
                retain_graph=True, 
                allow_unused=True
            )[0]

            if gradients is None:
                print(f"No gradients found for layer: {layer_name}")
                continue

            # Generate the Grad-CAM heatmap
            heatmap = grad_cam(activation, gradients)
            print(f"Generating and saving Grad-CAM heatmap for layer: {layer_name}")

            # Save the heatmap without overlaying on the original image
            save_heatmap_only(heatmap, layer_name, i)
            i += 1  # Increment the layer counter for each saved image

        except RuntimeError as e:
            print(f"Error processing layer {layer_name}: {str(e)}")

# Example usage
# Ensure `activations`, `target_class`, and `output` are defined
generate_gradcam_for_all_layers(activations, target_class, output, i=1)

In [12]:
import os
import numpy as np
import cv2

def save_top_k_regions_on_gradcam(grad_cam, output_filename, top_k=1, save_folder="top_k_regions"):
    """
    Saves an image showing only the top-K most intense distinct regions from a Grad-CAM heatmap using contour-based detection.

    Parameters:
    grad_cam (numpy array): The Grad-CAM heatmap (2D array, values between 0-1).
    output_filename (str): Base filename for the saved image.
    top_k (int): The number of top distinct regions to highlight based on intensity.
    save_folder (str): Folder to save the output images.
    """
    # Step 1: Normalize Grad-CAM to range [0, 1] if not already normalized
    grad_cam -= grad_cam.min()
    if grad_cam.max() > 0:
        grad_cam /= grad_cam.max()

    # Step 2: Convert Grad-CAM to 8-bit for contour detection
    grad_cam_8bit = np.uint8(255 * grad_cam)

    # Step 3: Apply thresholding to better separate regions
    _, binary_map = cv2.threshold(grad_cam_8bit, 100, 255, cv2.THRESH_BINARY)

    # Step 4: Find contours representing distinct regions
    contours, _ = cv2.findContours(binary_map, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    print(f"Number of contours (regions) found: {len(contours)}")

    # Step 5: Calculate intensity for each contour region using the original grad_cam values
    contour_intensities = []
    for i, contour in enumerate(contours):
        mask = np.zeros_like(grad_cam, dtype=np.uint8)
        cv2.drawContours(mask, [contour], -1, 1, thickness=-1)  # Fill contour area with 1s
        intensity = np.sum(grad_cam * mask)  # Sum intensity within the contour
        contour_intensities.append((contour, intensity))

    # Step 6: Sort contours by intensity and select the top-k contours
    top_k_contours = sorted(contour_intensities, key=lambda x: x[1], reverse=True)[:top_k]

    # Step 7: Create a mask to display only the top-k contours
    top_k_mask = np.zeros_like(grad_cam)
    for contour, intensity in top_k_contours:
        print(f"Including contour with intensity: {intensity}")
        cv2.drawContours(top_k_mask, [contour], -1, 1, thickness=-1)

    # Step 8: Apply the mask on the Grad-CAM to isolate and display only the top-k regions
    top_k_grad_cam = grad_cam * top_k_mask
    top_k_grad_cam_colored = cv2.applyColorMap(np.uint8(255 * top_k_grad_cam), cv2.COLORMAP_JET)

    # Ensure the save folder exists
    os.makedirs(save_folder, exist_ok=True)
    
    # Define the save path with the original image name and k value
    save_path = os.path.join(save_folder, f"rose_{output_filename}_top{top_k}.png")
    cv2.imwrite(save_path, top_k_grad_cam_colored)
    print(f"Saved image with Top-{top_k} distinct Grad-CAM regions for '{output_filename}' to: {save_path}")

def process_folder_of_gradcams(gradcam_folder, top_k=1, save_folder="top_k_regions"):
    """
    Processes a folder of Grad-CAM heatmaps and saves only the top-K distinct regions for each image.

    Parameters:
    gradcam_folder (str): Path to the folder containing the Grad-CAM heatmap images.
    top_k (int): The number of top distinct regions to highlight based on intensity.
    save_folder (str): Folder to save the output images.
    """
    for filename in os.listdir(gradcam_folder):
        gradcam_path = os.path.join(gradcam_folder, filename)
        
        # Load the Grad-CAM heatmap
        grad_cam = cv2.imread(gradcam_path, cv2.IMREAD_GRAYSCALE) / 255.0  # Normalize the Grad-CAM
        
        # Check that the Grad-CAM heatmap loaded correctly
        if grad_cam is None:
            print(f"Error loading {filename}. Skipping.")
            continue
        
        # Save the Top-K distinct regions from the Grad-CAM heatmap
        output_filename = os.path.splitext(filename)[0]  # Use filename without extension
        save_top_k_regions_on_gradcam(grad_cam, output_filename, top_k, save_folder)

# Example Usage:
gradcam_folder = "/Users/sammizhu/cs2822r-project/rose"  # Folder containing Grad-CAM heatmap variations
process_folder_of_gradcams(gradcam_folder, top_k=18)

Number of contours (regions) found: 46
Including contour with intensity: 131553.10426540283
Including contour with intensity: 42222.44549763037
Including contour with intensity: 22034.12796208531
Including contour with intensity: 21112.587677725118
Including contour with intensity: 20697.606635071097
Including contour with intensity: 17722.061611374407
Including contour with intensity: 15256.668246445499
Including contour with intensity: 15154.175355450237
Including contour with intensity: 12437.691943127962
Including contour with intensity: 7948.857819905213
Including contour with intensity: 6995.127962085309
Including contour with intensity: 5126.009478672986
Including contour with intensity: 4751.582938388626
Including contour with intensity: 2953.4786729857824
Including contour with intensity: 2837.805687203792
Including contour with intensity: 2281.303317535545
Including contour with intensity: 2104.7962085308063
Including contour with intensity: 1916.0616113744074
Saved image wit