In [64]:
import torch
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import os

In [65]:
# ============================================
# Section 1: Base Classes
# ============================================

class BaseCAM:
    """Base CAM class - Equation (2) in paper"""

    def __init__(self, model_arch, layer_name):
        self.model_arch = model_arch
        self.gradients = {}
        self.activations = {}

        def backward_hook(module, grad_input, grad_output):
            self.gradients['value'] = grad_output[0].detach().clone()

        def forward_hook(module, input, output):
            self.activations['value'] = output

        # Find target layer in VGG16
        hierarchy = layer_name.split('_')
        if len(hierarchy) >= 2:
            layer_num = int(hierarchy[1])
            self.target_layer = self.model_arch.features[layer_num]
        else:
            self.target_layer = self.model_arch.features

        # Register hooks
        self.target_layer.register_forward_hook(forward_hook)
        self.target_layer.register_full_backward_hook(backward_hook)

    def forward(self, input, class_idx=None):
        return None

    def __call__(self, input, class_idx=None):
        return self.forward(input, class_idx)


class LayerCAM(BaseCAM):
    """
    LayerCAM implementation - Equations (6), (7), (8) from paper

    Paper equations:
    (6) w^c_{k,ij} = ReLU(g^c_{k,ij})
    (7) Â^k_{ij} = w^c_{k,ij} · A^k_{ij}
    (8) M^c = ReLU(Σ_k Â^k)
    """

    def forward(self, input, class_idx=None):
        b, c, h, w = input.size()
        device = input.device

        # Forward pass
        logit = self.model_arch(input)

        # Select target class
        if class_idx is None:
            predicted_class = logit.max(1)[-1]
            score = logit[:, predicted_class].squeeze()
        else:
            predicted_class = torch.LongTensor([class_idx]).to(device)
            score = logit[:, class_idx].squeeze()

        # Create one-hot vector
        one_hot = torch.FloatTensor(1, logit.size()[-1]).zero_().to(device)
        one_hot[0][predicted_class] = 1

        # Backward pass
        self.model_arch.zero_grad()
        logit.backward(gradient=one_hot, retain_graph=True)

        # Get activations and gradients
        activations = self.activations['value']
        gradients = self.gradients['value']
        b, k, u, v = activations.size()

        # LayerCAM computation - Equations (6), (7), (8)
        with torch.no_grad():
            # (6) & (7): w_{k,ij} = ReLU(g_{k,ij}), Â = w * A
            activation_maps = activations * F.relu(gradients)

            # (8): M = ReLU(Σ Â)
            cam = F.relu(torch.sum(activation_maps, dim=1, keepdim=True))


            # Resize to input size
            cam = F.interpolate(cam, size=(h, w), mode='bilinear', align_corners=False)

            # Normalize
            cam = cam.squeeze()
            cam_min, cam_max = cam.min(), cam.max()
            cam = (cam - cam_min) / (cam_max - cam_min + 1e-8)

        return cam.unsqueeze(0).unsqueeze(0)


In [66]:
# ============================================
# Section 2: Fusion
# ============================================

def scale_cam(cam, gamma=2):
    """
    Equation (9) from paper:
    M̂^c = tanh(γ * M^c / max(M^c))

    Args:
        cam: Class activation map
        gamma: Scale parameter (paper uses γ=2)
    """
    cam_normalized = cam / (cam.max() + 1e-8)
    cam_scaled = torch.tanh(gamma * cam_normalized)
    return cam_scaled


def fuse_cams(cam_list, gamma=2):
    """
    Fuse CAMs using element-wise maximum
    As described in Section III-A

    Args:
        cam_list: List of CAMs from different layers
        gamma: Scale parameter (default: 2)
    """
    if len(cam_list) == 1:
        return cam_list[0]

    # Scale all CAMs - Equation (9)
    scaled_cams = [
      scale_cam(cam, gamma) if i < 3 else cam
      for i, cam in enumerate(cam_list)
    ]

    # Element-wise maximum
    fused = scaled_cams[0]
    for cam in scaled_cams[1:]:
        fused = torch.maximum(fused, cam)

    return fused


In [67]:
# ============================================
# Section 3: Utilities
# ============================================

def load_image(image_path):
    """Load and convert image to RGB"""
    return Image.open(image_path).convert('RGB')


def apply_transforms(image, size=(224, 224)):
    """Standard ImageNet transformations"""
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    return transform(image).unsqueeze(0)


def denormalize(tensor):
    """Remove normalization for visualization"""
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    return tensor * std + mean


def prepare_visualization_data(input_tensor, cam_map):
    """Prepare image and CAM for visualization"""
    from matplotlib import cm

    # Convert to numpy
    img = denormalize(input_tensor.squeeze()).permute(1, 2, 0).cpu().numpy()
    img = np.clip(img, 0, 1)

    cam = cam_map.squeeze().cpu().numpy()
    cam = (cam * 255).astype(np.uint8)

    # Create heatmap
    heatmap = cm.jet(cam / 255.0)[:, :, :3]

    # Overlay
    overlayed = 0.5 * img + 0.5 * heatmap
    overlayed = np.clip(overlayed, 0, 1)

    return img, cam, overlayed


def visualize_combined_stages(input_tensor, cam_maps_dict, save_path='stages_combined.png'):
    """ترکیب تمام stage های منفرد در یک تصویر بزرگ"""

    stages = ['Stage1', 'Stage2', 'Stage3', 'Stage4', 'Stage5']
    n_stages = len(stages)

    # Create figure with 3 columns (original, heatmap, overlay) and 5 rows (stages)
    fig, axes = plt.subplots(n_stages, 3, figsize=(15, 5 * n_stages))

    for idx, stage_name in enumerate(stages):
        cam_map = cam_maps_dict[stage_name]
        img, cam, overlayed = prepare_visualization_data(input_tensor, cam_map)

        # Original image
        axes[idx, 0].imshow(img)
        axes[idx, 0].set_title(f'{stage_name} - Original', fontsize=12, fontweight='bold')
        axes[idx, 0].axis('off')

        # Heatmap
        axes[idx, 1].imshow(cam, cmap='jet')
        axes[idx, 1].set_title(f'{stage_name} - Heatmap', fontsize=12, fontweight='bold')
        axes[idx, 1].axis('off')

        # Overlay
        axes[idx, 2].imshow(overlayed)
        axes[idx, 2].set_title(f'{stage_name} - Overlay', fontsize=12, fontweight='bold')
        axes[idx, 2].axis('off')

    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=150)
    plt.close()
    print(f"✓ Saved combined stages: {save_path}")


def visualize_combined_fusions(input_tensor, fusion_results, save_path='fusions_combined.png'):
    """ترکیب تمام حالت‌های fused در یک تصویر بزرگ"""

    n_fusions = len(fusion_results)

    # Create figure with 3 columns and n rows
    fig, axes = plt.subplots(n_fusions, 3, figsize=(15, 5 * n_fusions))

    # اگر فقط یک fusion داشته باشیم، axes را به لیست تبدیل می‌کنیم
    if n_fusions == 1:
        axes = [axes]

    for idx, (config_name, fused_cam) in enumerate(fusion_results):
        img, cam, overlayed = prepare_visualization_data(input_tensor, fused_cam)

        # Original image
        axes[idx][0].imshow(img)
        axes[idx][0].set_title(f'{config_name} - Original', fontsize=12, fontweight='bold')
        axes[idx][0].axis('off')

        # Heatmap
        axes[idx][1].imshow(cam, cmap='jet')
        axes[idx][1].set_title(f'{config_name} - Heatmap', fontsize=12, fontweight='bold')
        axes[idx][1].axis('off')

        # Overlay
        axes[idx][2].imshow(overlayed)
        axes[idx][2].set_title(f'{config_name} - Overlay', fontsize=12, fontweight='bold')
        axes[idx][2].axis('off')

    plt.tight_layout()
    plt.savefig(save_path, bbox_inches='tight', dpi=150)
    plt.close()
    print(f"✓ Saved combined fusions: {save_path}")


In [68]:
# ============================================
# Section 4: Main Experiment
# ============================================

def run_layercam_experiment(image_path, save_dir='./layercam_results'):
    """
    Run LayerCAM experiment exactly as in paper

    Uses layers from Table I:
    - Stage1: conv1_2 (features_4)
    - Stage2: conv2_2 (features_9)
    - Stage3: conv3_3 (features_16)
    - Stage4: conv4_3 (features_23)
    - Stage5: conv5_3 (features_30)

    Fusion strategy from Table II:
    - S5
    - S5+S4
    - S5+S4+S3
    - S5+S4+S3+S2
    - S5+S4+S3+S2+S1
    """
    os.makedirs(save_dir, exist_ok=True)

    print("="*60)
    print("LayerCAM: Combined Visualization Output")
    print("="*60)

    # Load image
    print("\n[1/4] Loading image...")
    input_image = load_image(image_path)
    input_tensor = apply_transforms(input_image)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_tensor = input_tensor.to(device)
    print(f"✓ Image shape: {input_tensor.shape} on {device}")

    # Load VGG16
    print("\n[2/4] Loading VGG16...")
    vgg = models.vgg16(pretrained=True).eval().to(device)

    import torch.nn as nn
    for m in vgg.features:
        if isinstance(m, nn.ReLU):
            m.inplace = False

    print("✓ Model loaded")

    # Predict class
    print("\n[3/4] Predicting...")
    with torch.no_grad():
        logit = vgg(input_tensor)
        predicted_class = logit.max(1)[-1].item()
        confidence = F.softmax(logit, dim=1).max().item()
    print(f"✓ Predicted class: {predicted_class} (confidence: {confidence:.2%})")

    # Generate CAMs for each stage (Table I)
    print("\n[4/4] Generating LayerCAMs (Table I)...")

    # Layers as specified in paper

    layers = {
        'Stage1': ('features_2', 'conv1_2'),
        'Stage2': ('features_7', 'conv2_2'),
        'Stage3': ('features_14', 'conv3_3'),
        'Stage4': ('features_21', 'conv4_3'),
        'Stage5': ('features_28', 'conv5_3')
    }

    cam_maps = {}

    for stage_name, (layer_name, conv_name) in layers.items():
        print(f"  Generating {stage_name} ({conv_name})...", end=' ')
        layercam = LayerCAM(vgg, layer_name)
        cam_map = layercam(input_tensor)
        cam_maps[stage_name] = cam_map
        print("✓")

    # ترکیب تمام stage ها در یک تصویر
    print("\n" + "="*60)
    print("Creating combined visualization for stages...")
    print("="*60)

    stages_output = os.path.join(save_dir, 'all_stages_combined.png')
    visualize_combined_stages(input_tensor.cpu(), cam_maps, stages_output)

    # Fusion experiments (Table II)
    print("\n" + "="*60)
    print("Generating fused CAMs (Table II)...")
    print("="*60)

    gamma = 2  # As per Table III

    fusion_configs = [
        (['Stage5'], 'S5'),
        (['Stage5', 'Stage4'], 'S5+S4'),
        (['Stage5', 'Stage4', 'Stage3'], 'S5+S4+S3'),
        (['Stage5', 'Stage4', 'Stage3', 'Stage2'], 'S5+S4+S3+S2'),
        (['Stage5', 'Stage4', 'Stage3', 'Stage2', 'Stage1'], 'S5+S4+S3+S2+S1')
    ]

    fusion_results = []

    for stages, config_name in fusion_configs:
        print(f"  Generating {config_name}...", end=' ')
        cams_to_fuse = [cam_maps[s] for s in stages]
        fused_cam = fuse_cams(cams_to_fuse, gamma=gamma)
        fusion_results.append((config_name, fused_cam))
        print("✓")

    # ترکیب تمام fusion ها در یک تصویر
    print("\n" + "="*60)
    print("Creating combined visualization for fusions...")
    print("="*60)

    fusions_output = os.path.join(save_dir, 'all_fusions_combined.png')
    visualize_combined_fusions(input_tensor.cpu(), fusion_results, fusions_output)

    print("\n" + "="*60)
    print(f"✓ Results saved to: {save_dir}")
    print(f"  - all_stages_combined.png (5 stages × 3 views)")
    print(f"  - all_fusions_combined.png (5 fusions × 3 views)")
    print("="*60)

    return cam_maps



In [70]:
# ============================================
# Section 5: Execution
# ============================================

if __name__ == '__main__':
    # Upload your image in Colab
    print("Please upload an image file...")
    from google.colab import files
    uploaded = files.upload()

    if uploaded:
        image_path = list(uploaded.keys())[0]
        print(f"\nProcessing: {image_path}\n")

        # Run experiment
        cam_maps = run_layercam_experiment(
            image_path=image_path,
            save_dir='./layercam_results'
        )

        # Download results
        print("\nCompressing results...")
        import shutil
        shutil.make_archive('layercam_results', 'zip', './layercam_results')
        files.download('layercam_results.zip')
        print("✓ Download complete!")
    else:
        print("No file uploaded!")

Please upload an image file...


Saving dog1.jpg to dog1 (8).jpg

Processing: dog1 (8).jpg

LayerCAM: Combined Visualization Output

[1/4] Loading image...
✓ Image shape: torch.Size([1, 3, 224, 224]) on cuda

[2/4] Loading VGG16...
✓ Model loaded

[3/4] Predicting...
✓ Predicted class: 161 (confidence: 88.27%)

[4/4] Generating LayerCAMs (Table I)...
  Generating Stage1 (conv1_2)... ✓
  Generating Stage2 (conv2_2)... ✓
  Generating Stage3 (conv3_3)... ✓
  Generating Stage4 (conv4_3)... ✓
  Generating Stage5 (conv5_3)... ✓

Creating combined visualization for stages...
✓ Saved combined stages: ./layercam_results/all_stages_combined.png

Generating fused CAMs (Table II)...
  Generating S5... ✓
  Generating S5+S4... ✓
  Generating S5+S4+S3... ✓
  Generating S5+S4+S3+S2... ✓
  Generating S5+S4+S3+S2+S1... ✓

Creating combined visualization for fusions...
✓ Saved combined fusions: ./layercam_results/all_fusions_combined.png

✓ Results saved to: ./layercam_results
  - all_stages_combined.png (5 stages × 3 views)
  - all_fus

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✓ Download complete!
