# Prepare

In [1]:
import warnings
warnings.filterwarnings('ignore')
import cv2
import sys
import os
sys.path.append(os.path.abspath('..'))
from typing import Union, List
from enum import Enum
import json
import os
import random
import torch
import shutil
from PIL import Image
import numpy as np

import torchvision.models as models

import matplotlib.pyplot as plt

import pytorch_grad_cam as cam
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget, ClassifierOutputSoftmaxTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.image import show_cam_on_image, \
    deprocess_image, \
    preprocess_image
class CAMType(Enum):
    GRAD_CAM = cam.GradCAM
    HI_RES_CAM = cam.HiResCAM
    GRAD_CAM_ELEMENT_WISE = cam.GradCAMElementWise
    ABLATION_CAM = cam.AblationCAM
    X_GRAD_CAM = cam.XGradCAM
    GRAD_CAM_PLUS_PLUS = cam.GradCAMPlusPlus
    SCORE_CAM = cam.ScoreCAM
    LAYER_CAM = cam.LayerCAM
    EIGEN_CAM = cam.EigenCAM
    EIGEN_GRAD_CAM = cam.EigenGradCAM
    KPCA_CAM = cam.KPCA_CAM
    RANDOM_CAM = cam.RandomCAM
    FULL_GRAD = cam.FullGrad

from typing import List, Callable

def get_cam(cam_type: CAMType, model: torch.nn.Module, target_layers: torch.nn.Module):
    cam_class = cam_type.value
    return cam_class(model=model, target_layers=target_layers)

def visualize(grayscale: torch.Tensor, rgb_img: np.ndarray):
    visualization = show_cam_on_image(rgb_img, grayscale, use_rgb=True)
    plt.imshow(visualization)
    plt.axis('off')
    plt.show()

def display_images_in_rows(rgb_img: np.ndarray, grayscale_list: List[torch.Tensor], labels: List[str]):
    """
    Displays a list of images in rows, with each row containing up to max_images_per_row images.
    
    Args:
        images (list of np.ndarray): List of images to display.
        labels (list of str): List of labels for each image.
        max_images_per_row (int): Maximum number of images per row (default is 4).
    """

    images = [rgb_img]

    for grayscale in grayscale_list:
        images.append(show_cam_on_image(rgb_img, grayscale, use_rgb=True))

    labels.insert(0, "Image")

    fig, axes = plt.subplots(1, 4, figsize=(15, 5))

    # Loop through the images and labels to display them
    for ax, img, label in zip(axes, images, labels):
        ax.imshow(img)  # Show the image
        ax.set_title(label)  # Set the title
        ax.axis('off')  # Hide axes

    plt.tight_layout()
    plt.show()

def execute_cam(
        img_path: str,
        model: torch.nn.Module,
        target_layers: List[torch.nn.Module],
        cam_type: CAMType,
        targets: Union[List[ClassifierOutputTarget], None] = None,
        visualization: bool = False,
        output_path: Union[str, None] = None
    ):
        model_name = "unknown"
        target_layer_name = "unknown"
        try:
            model_name = model._get_name()
            target_layer_name = target_layers[0]._get_name()
        except:
            pass

        print(f"Executing CAM on {model_name} with target layer {target_layer_name}:")

        img = np.array(Image.open(img_path).convert("RGB"))
        img = cv2.resize(img, (224, 224))
        img = np.float32(img) / 255
        input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

        # Forward to get target class if not specified
        if not targets:
            with torch.no_grad():
                predicted_class = model(input_tensor).max(1)[-1]
                targets = [ClassifierOutputTarget(predicted_class)]
                print(predicted_class)
                print(f"Target class: {labels[str(predicted_class.item())][1]}")

        cam = get_cam(cam_type=cam_type, model=model, target_layers=target_layers)

        # Generate the CAM
        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)

        # Overlay the CAM on the image
        if visualization:
            # Convert the PIL image to a NumPy array for visualization
            rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0  # Normalize the pixel values
            
            visualize(grayscale=grayscale_cam[0, :], rgb_img=rgb_img)

            # Optionally, you can save the resulting CAM visualization
            if output_path:
                Image.fromarray((visualization * 255).astype(np.uint8)).save(output_path)
                print(f"Saved output image to {output_path}")

        return grayscale_cam, input_tensor

labels = json.load(open("../pytorch_grad_cam/utils/imagenet_class_index.json"))

In [2]:
class PerturbationConfidenceMetric:
    def __init__(self, perturbation):
        self.perturbation = perturbation

    def __call__(self, input_tensor: torch.Tensor,
                 cams: np.ndarray,
                 targets: List[Callable],
                 model: torch.nn.Module,
                 return_visualization=False,
                 return_diff=True):

        if return_diff:
            with torch.no_grad():
                outputs = model(input_tensor)
                scores = [target(output).cpu().numpy()
                          for target, output in zip(targets, outputs)]
                scores = np.float32(scores)

        batch_size = input_tensor.size(0)
        perturbated_tensors = []
        for i in range(batch_size):
            cam = cams[i]
            tensor = self.perturbation(input_tensor[i, ...].cpu(),
                                       torch.from_numpy(cam))
            tensor = tensor.to(input_tensor.device)
            perturbated_tensors.append(tensor.unsqueeze(0))
        perturbated_tensors = torch.cat(perturbated_tensors)

        with torch.no_grad():
            outputs_after_imputation = model(perturbated_tensors)
        scores_after_imputation = [
            target(output).cpu().numpy() for target, output in zip(
                targets, outputs_after_imputation)]
        scores_after_imputation = np.float32(scores_after_imputation)

        if return_diff:
            result = scores_after_imputation - scores
        else:
            result = scores_after_imputation

        if return_visualization:
            return result, scores, scores_after_imputation, perturbated_tensors
        else:
            return result, scores, scores_after_imputation

def multiply_tensor_with_cam(input_tensor: torch.Tensor,
                             cam: torch.Tensor):
    """ Multiply an input tensor (after normalization)
        with a pixel attribution map
    """
    return input_tensor * cam
        
class CamMultImageConfidenceChange(PerturbationConfidenceMetric):
    def __init__(self):
        super(CamMultImageConfidenceChange,
              self).__init__(multiply_tensor_with_cam)
        
class DropInConfidence(CamMultImageConfidenceChange):
    def __init__(self):
        super(DropInConfidence, self).__init__()

    def __call__(self, *args, **kwargs):
        scores, scores_before, scores_after = super(DropInConfidence, self).__call__(*args, **kwargs)
        scores = -scores
        return np.maximum(scores, 0) / scores_before * 100


class IncreaseInConfidence(CamMultImageConfidenceChange):
    def __init__(self):
        super(IncreaseInConfidence, self).__init__()

    def __call__(self, *args, **kwargs):
        scores, bef_score, scores_after = super(IncreaseInConfidence, self).__call__(*args, **kwargs)
        return np.float32(scores > 0)

# Body

## Choose model & method

In [3]:
model = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval()
target_layers = [model.features[28]]

# model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).eval()
# target_layers = [model.features[10]]

cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()

## Average drop & Average increase

In [4]:
# Paths
input_folder = r"C:\Users\HaPham\Documents\ThesisXAI\Code\CAM-combination\ILSVRC2012\ILSVRC2012_img_val"
imputated_folder = "../imputated_images"

if os.path.exists(imputated_folder):
    shutil.rmtree(imputated_folder)
os.makedirs(imputated_folder)

# Generate a list of image names within the specified range
start_idx = 1
end_idx = 50000
num_images_to_sample = 100  # Adjust this to how many random images you want

all_image_names = [
    f"ILSVRC2012_val_{i:08d}.JPEG" for i in range(start_idx, end_idx + 1)
]
random_image_names = random.sample(all_image_names, num_images_to_sample)
random_image_names = ['ILSVRC2012_val_00048014.JPEG', 'ILSVRC2012_val_00040662.JPEG', 'ILSVRC2012_val_00021194.JPEG', 'ILSVRC2012_val_00038840.JPEG', 'ILSVRC2012_val_00026519.JPEG', 'ILSVRC2012_val_00026939.JPEG', 'ILSVRC2012_val_00000022.JPEG', 'ILSVRC2012_val_00000165.JPEG', 'ILSVRC2012_val_00028945.JPEG', 'ILSVRC2012_val_00032674.JPEG', 'ILSVRC2012_val_00026695.JPEG', 'ILSVRC2012_val_00043106.JPEG', 'ILSVRC2012_val_00038333.JPEG', 'ILSVRC2012_val_00018162.JPEG', 'ILSVRC2012_val_00004863.JPEG', 'ILSVRC2012_val_00034851.JPEG', 'ILSVRC2012_val_00004517.JPEG', 'ILSVRC2012_val_00010810.JPEG', 'ILSVRC2012_val_00011824.JPEG', 'ILSVRC2012_val_00020485.JPEG', 'ILSVRC2012_val_00048173.JPEG', 'ILSVRC2012_val_00041393.JPEG', 'ILSVRC2012_val_00033662.JPEG', 'ILSVRC2012_val_00045303.JPEG', 'ILSVRC2012_val_00004455.JPEG', 'ILSVRC2012_val_00011500.JPEG', 'ILSVRC2012_val_00025962.JPEG', 'ILSVRC2012_val_00043584.JPEG', 'ILSVRC2012_val_00036038.JPEG', 'ILSVRC2012_val_00001159.JPEG', 'ILSVRC2012_val_00036157.JPEG', 'ILSVRC2012_val_00047597.JPEG', 'ILSVRC2012_val_00044337.JPEG', 'ILSVRC2012_val_00003755.JPEG', 'ILSVRC2012_val_00043447.JPEG', 'ILSVRC2012_val_00031518.JPEG', 'ILSVRC2012_val_00041929.JPEG', 'ILSVRC2012_val_00010950.JPEG', 'ILSVRC2012_val_00023940.JPEG', 'ILSVRC2012_val_00034458.JPEG', 'ILSVRC2012_val_00003772.JPEG', 'ILSVRC2012_val_00017173.JPEG', 'ILSVRC2012_val_00035194.JPEG', 'ILSVRC2012_val_00013968.JPEG', 'ILSVRC2012_val_00007289.JPEG', 'ILSVRC2012_val_00035626.JPEG', 'ILSVRC2012_val_00001925.JPEG', 'ILSVRC2012_val_00018556.JPEG', 'ILSVRC2012_val_00005887.JPEG', 'ILSVRC2012_val_00037546.JPEG', 'ILSVRC2012_val_00037983.JPEG', 'ILSVRC2012_val_00028321.JPEG', 'ILSVRC2012_val_00006292.JPEG', 'ILSVRC2012_val_00010227.JPEG', 'ILSVRC2012_val_00020722.JPEG', 'ILSVRC2012_val_00010561.JPEG', 'ILSVRC2012_val_00040482.JPEG', 'ILSVRC2012_val_00042051.JPEG', 'ILSVRC2012_val_00001760.JPEG', 'ILSVRC2012_val_00021865.JPEG', 'ILSVRC2012_val_00010828.JPEG', 'ILSVRC2012_val_00043847.JPEG', 'ILSVRC2012_val_00036917.JPEG', 'ILSVRC2012_val_00047053.JPEG', 'ILSVRC2012_val_00002225.JPEG', 'ILSVRC2012_val_00014391.JPEG', 'ILSVRC2012_val_00023265.JPEG', 'ILSVRC2012_val_00025722.JPEG', 'ILSVRC2012_val_00035266.JPEG', 'ILSVRC2012_val_00000334.JPEG', 'ILSVRC2012_val_00009316.JPEG', 'ILSVRC2012_val_00037959.JPEG', 'ILSVRC2012_val_00015267.JPEG', 'ILSVRC2012_val_00045274.JPEG', 'ILSVRC2012_val_00005621.JPEG', 'ILSVRC2012_val_00009324.JPEG', 'ILSVRC2012_val_00036612.JPEG', 'ILSVRC2012_val_00012167.JPEG', 'ILSVRC2012_val_00013826.JPEG', 'ILSVRC2012_val_00039615.JPEG', 'ILSVRC2012_val_00003550.JPEG', 'ILSVRC2012_val_00018661.JPEG', 'ILSVRC2012_val_00037578.JPEG', 'ILSVRC2012_val_00032692.JPEG', 'ILSVRC2012_val_00022024.JPEG', 'ILSVRC2012_val_00011285.JPEG', 'ILSVRC2012_val_00017859.JPEG', 'ILSVRC2012_val_00025713.JPEG', 'ILSVRC2012_val_00027390.JPEG', 'ILSVRC2012_val_00045695.JPEG', 'ILSVRC2012_val_00038690.JPEG', 'ILSVRC2012_val_00016934.JPEG', 'ILSVRC2012_val_00027410.JPEG', 'ILSVRC2012_val_00039936.JPEG', 'ILSVRC2012_val_00025764.JPEG', 'ILSVRC2012_val_00024909.JPEG', 'ILSVRC2012_val_00003979.JPEG', 'ILSVRC2012_val_00035948.JPEG', 'ILSVRC2012_val_00044730.JPEG', 'ILSVRC2012_val_00041283.JPEG']

In [5]:
cam_type = CAMType.GRAD_CAM

# Initialize metrics
total_drop_in_conf = 0
total_increase_in_conf = 0
num_images = 0  # Track number of processed images

# Process the randomly selected images
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    if os.path.exists(img_path):  # Ensure the file exists
        # Execute CAM for the current image
        gray_scale, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)
        
        # Add threshold
        threshold = np.percentile(gray_scale, 50)
        
        gray_scale[gray_scale < threshold] = 0
        # gray_scale = gray_scale < threshold

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]

        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Calculate Drop in Confidence and Increase in Confidence
        drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
        increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

        # Accumulate the scores for averaging
        total_drop_in_conf += drop_in_conf
        total_increase_in_conf += increase_in_conf
        num_images += 1  # Increment image count

        # Process the visualization for display and scoring
        score = scores[0]

        # Save images to imputated folder
        visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        visualization = deprocess_image(visualization)
        Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

        # Print individual results
        print(f"Image: {filename}")
        print(f"Confidence before imputation: {scores_before}")
        print(f"Confidence after imputation: {scores_after}")
        print(f"The confidence increase raw: {score}")
        print(f"The drop in confidence percentage: {drop_in_conf}%")
        print(f"The increase in confidence: {increase_in_conf}")
        print("----------------------------------------")

# Calculate and print averages after processing all images
if num_images > 0:
    avg_drop_in_conf = total_drop_in_conf / num_images
    avg_increase_in_conf = total_increase_in_conf / num_images * 100
    print(f"\nNumber of images processed: {num_images}")
    print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
    print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
else:
    print("No valid images found in the specified folder.")

Executing CAM on VGG with target layer Conv2d:
tensor([605])
Target class: iPod
Image: ILSVRC2012_val_00048014.JPEG
Confidence before imputation: [[0.9759646]]
Confidence after imputation: [[0.97425985]]
The confidence increase raw: [-0.00170475]
The drop in confidence percentage: [[0.17467359]]%
The increase in confidence: [[0.]]
----------------------------------------
Executing CAM on VGG with target layer Conv2d:
tensor([748])
Target class: purse
Image: ILSVRC2012_val_00040662.JPEG
Confidence before imputation: [[0.42008683]]
Confidence after imputation: [[0.01459827]]
The confidence increase raw: [-0.40548855]
The drop in confidence percentage: [[96.52493]]%
The increase in confidence: [[0.]]
----------------------------------------
Executing CAM on VGG with target layer Conv2d:
tensor([898])
Target class: water_bottle
Image: ILSVRC2012_val_00021194.JPEG
Confidence before imputation: [[0.24797332]]
Confidence after imputation: [[0.00338788]]
The confidence increase raw: [-0.244585

In [6]:
cam_type = CAMType.GRAD_CAM_PLUS_PLUS

# Initialize metrics
total_drop_in_conf = 0
total_increase_in_conf = 0
num_images = 0  # Track number of processed images

# Process the randomly selected images
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    if os.path.exists(img_path):  # Ensure the file exists
        # Execute CAM for the current image
        gray_scale, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)
        
        # Add threshold
        threshold = np.percentile(gray_scale, 50)
        
        gray_scale[gray_scale < threshold] = 0
        # gray_scale = gray_scale < threshold

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]

        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Calculate Drop in Confidence and Increase in Confidence
        drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
        increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

        # Accumulate the scores for averaging
        total_drop_in_conf += drop_in_conf
        total_increase_in_conf += increase_in_conf
        num_images += 1  # Increment image count

        # Process the visualization for display and scoring
        score = scores[0]

        # Save images to imputated folder
        visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        visualization = deprocess_image(visualization)
        Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

        # Print individual results
        print(f"Image: {filename}")
        print(f"Confidence before imputation: {scores_before}")
        print(f"Confidence after imputation: {scores_after}")
        print(f"The confidence increase raw: {score}")
        print(f"The drop in confidence percentage: {drop_in_conf}%")
        print(f"The increase in confidence: {increase_in_conf}")
        print("----------------------------------------")

# Calculate and print averages after processing all images
if num_images > 0:
    avg_drop_in_conf = total_drop_in_conf / num_images
    avg_increase_in_conf = total_increase_in_conf / num_images * 100
    print(f"\nNumber of images processed: {num_images}")
    print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
    print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
else:
    print("No valid images found in the specified folder.")

Executing CAM on VGG with target layer Conv2d:
tensor([605])
Target class: iPod
Image: ILSVRC2012_val_00048014.JPEG
Confidence before imputation: [[0.9759646]]
Confidence after imputation: [[0.8933943]]
The confidence increase raw: [-0.08257031]
The drop in confidence percentage: [[8.460381]]%
The increase in confidence: [[0.]]
----------------------------------------
Executing CAM on VGG with target layer Conv2d:
tensor([748])
Target class: purse
Image: ILSVRC2012_val_00040662.JPEG
Confidence before imputation: [[0.42008683]]
Confidence after imputation: [[0.01470881]]
The confidence increase raw: [-0.405378]
The drop in confidence percentage: [[96.49863]]%
The increase in confidence: [[0.]]
----------------------------------------
Executing CAM on VGG with target layer Conv2d:
tensor([898])
Target class: water_bottle
Image: ILSVRC2012_val_00021194.JPEG
Confidence before imputation: [[0.24797332]]
Confidence after imputation: [[0.00360898]]
The confidence increase raw: [-0.24436434]
T

In [7]:
# cam_type = CAMType.SCORE_CAM

# # Initialize metrics
# total_drop_in_conf = 0
# total_increase_in_conf = 0
# num_images = 0  # Track number of processed images

# # Process the randomly selected images
# for filename in random_image_names:
#     img_path = os.path.join(input_folder, filename)

#     if os.path.exists(img_path):  # Ensure the file exists
#         # Execute CAM for the current image
#         gray_scale, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)
        
#         # Add threshold
#         threshold = np.percentile(gray_scale, 50)
        
#         gray_scale[gray_scale < threshold] = 0
#         # gray_scale = gray_scale < threshold

#         # Calculate predicted class and confidence score change
#         predicted_class = model(input_tensor).max(1)[-1]
#         targets = [ClassifierOutputSoftmaxTarget(predicted_class)]

#         scores, scores_before, scores_after, visualizations = cam_metric(
#             input_tensor, gray_scale, targets, model, return_visualization=True
#         )

#         # Calculate Drop in Confidence and Increase in Confidence
#         drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
#         increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

#         # Accumulate the scores for averaging
#         total_drop_in_conf += drop_in_conf
#         total_increase_in_conf += increase_in_conf
#         num_images += 1  # Increment image count

#         # Process the visualization for display and scoring
#         score = scores[0]

#         # Save images to imputated folder
#         visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
#         visualization = deprocess_image(visualization)
#         Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

#         # Print individual results
#         print(f"Image: {filename}")
#         print(f"Confidence before imputation: {scores_before}")
#         print(f"Confidence after imputation: {scores_after}")
#         print(f"The confidence increase raw: {score}")
#         print(f"The drop in confidence percentage: {drop_in_conf}%")
#         print(f"The increase in confidence: {increase_in_conf}")
#         print("----------------------------------------")

# # Calculate and print averages after processing all images
# if num_images > 0:
#     avg_drop_in_conf = total_drop_in_conf / num_images
#     avg_increase_in_conf = total_increase_in_conf / num_images * 100
#     print(f"\nNumber of images processed: {num_images}")
#     print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
#     print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
# else:
#     print("No valid images found in the specified folder.")