# Prepare

In [1]:
import warnings
warnings.filterwarnings('ignore')
import cv2
import sys
import os
sys.path.append(os.path.abspath('..'))
from typing import Union, List
from enum import Enum
import json
import shutil
import random

from PIL import Image
import numpy as np

import torch
import torchvision.models as models

import matplotlib.pyplot as plt

import pytorch_grad_cam as cam
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget, ClassifierOutputSoftmaxTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.image import show_cam_on_image, \
    deprocess_image, \
    preprocess_image
class CAMType(Enum):
    GRAD_CAM = cam.GradCAM
    HI_RES_CAM = cam.HiResCAM
    GRAD_CAM_ELEMENT_WISE = cam.GradCAMElementWise
    ABLATION_CAM = cam.AblationCAM
    X_GRAD_CAM = cam.XGradCAM
    GRAD_CAM_PLUS_PLUS = cam.GradCAMPlusPlus
    SCORE_CAM = cam.ScoreCAM
    LAYER_CAM = cam.LayerCAM
    EIGEN_CAM = cam.EigenCAM
    EIGEN_GRAD_CAM = cam.EigenGradCAM
    KPCA_CAM = cam.KPCA_CAM
    RANDOM_CAM = cam.RandomCAM
    FULL_GRAD = cam.FullGrad

from typing import List, Callable

def get_cam(cam_type: CAMType, model: torch.nn.Module, target_layers: torch.nn.Module):
    cam_class = cam_type.value
    return cam_class(model=model, target_layers=target_layers)

def visualize(grayscale: torch.Tensor, rgb_img: np.ndarray):
    visualization = show_cam_on_image(rgb_img, grayscale, use_rgb=True)
    plt.imshow(visualization)
    plt.axis('off')
    plt.show()

def display_images_in_rows(rgb_img: np.ndarray, grayscale_list: List[torch.Tensor], labels: List[str]):
    """
    Displays a list of images in rows, with each row containing up to max_images_per_row images.
    
    Args:
        images (list of np.ndarray): List of images to display.
        labels (list of str): List of labels for each image.
        max_images_per_row (int): Maximum number of images per row (default is 4).
    """

    images = [rgb_img]

    for grayscale in grayscale_list:
        images.append(show_cam_on_image(rgb_img, grayscale, use_rgb=True))

    labels.insert(0, "Image")

    fig, axes = plt.subplots(1, 4, figsize=(15, 5))

    # Loop through the images and labels to display them
    for ax, img, label in zip(axes, images, labels):
        ax.imshow(img)  # Show the image
        ax.set_title(label)  # Set the title
        ax.axis('off')  # Hide axes

    plt.tight_layout()
    plt.show()

def execute_cam(
        img_path: str,
        model: torch.nn.Module,
        target_layers: List[torch.nn.Module],
        cam_type: CAMType,
        targets: Union[List[ClassifierOutputTarget], None] = None,
        visualization: bool = False,
        output_path: Union[str, None] = None
    ):
        model_name = "unknown"
        target_layer_name = "unknown"
        try:
            model_name = model._get_name()
            target_layer_name = target_layers[0]._get_name()
        except:
            pass

        print(f"Executing CAM on {model_name} with target layer {target_layer_name}:")

        img = np.array(Image.open(img_path).convert("RGB"))
        img = cv2.resize(img, (224, 224))
        img = np.float32(img) / 255
        input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

        # Forward to get target class if not specified
        if not targets:
            with torch.no_grad():
                predicted_class = model(input_tensor).max(1)[-1]
                targets = [ClassifierOutputTarget(predicted_class)]
                print(predicted_class)
                print(f"Target class: {labels[str(predicted_class.item())][1]}")

        cam = get_cam(cam_type=cam_type, model=model, target_layers=target_layers)

        # Generate the CAM
        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)

        # Overlay the CAM on the image
        if visualization:
            # Convert the PIL image to a NumPy array for visualization
            rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0  # Normalize the pixel values
            
            visualize(grayscale=grayscale_cam[0, :], rgb_img=rgb_img)

            # Optionally, you can save the resulting CAM visualization
            if output_path:
                Image.fromarray((visualization * 255).astype(np.uint8)).save(output_path)
                print(f"Saved output image to {output_path}")

        return grayscale_cam, input_tensor

labels = json.load(open("../pytorch_grad_cam/utils/imagenet_class_index.json"))

In [2]:
class PerturbationConfidenceMetric:
    def __init__(self, perturbation):
        self.perturbation = perturbation

    def __call__(self, input_tensor: torch.Tensor,
                 cams: np.ndarray,
                 targets: List[Callable],
                 model: torch.nn.Module,
                 return_visualization=False,
                 return_diff=True):

        if return_diff:
            with torch.no_grad():
                outputs = model(input_tensor)
                scores = [target(output).cpu().numpy()
                          for target, output in zip(targets, outputs)]
                scores = np.float32(scores)

        batch_size = input_tensor.size(0)
        perturbated_tensors = []
        for i in range(batch_size):
            cam = cams[i]
            tensor = self.perturbation(input_tensor[i, ...].cpu(),
                                       torch.from_numpy(cam))
            tensor = tensor.to(input_tensor.device)
            perturbated_tensors.append(tensor.unsqueeze(0))
        perturbated_tensors = torch.cat(perturbated_tensors)

        with torch.no_grad():
            outputs_after_imputation = model(perturbated_tensors)
        scores_after_imputation = [
            target(output).cpu().numpy() for target, output in zip(
                targets, outputs_after_imputation)]
        scores_after_imputation = np.float32(scores_after_imputation)

        if return_diff:
            result = scores_after_imputation - scores
        else:
            result = scores_after_imputation

        if return_visualization:
            return result, scores, scores_after_imputation, perturbated_tensors
        else:
            return result, scores, scores_after_imputation

def multiply_tensor_with_cam(input_tensor: torch.Tensor,
                             cam: torch.Tensor):
    """ Multiply an input tensor (after normalization)
        with a pixel attribution map
    """
    return input_tensor * cam
        
class CamMultImageConfidenceChange(PerturbationConfidenceMetric):
    def __init__(self):
        super(CamMultImageConfidenceChange,
              self).__init__(multiply_tensor_with_cam)
        
class DropInConfidence(CamMultImageConfidenceChange):
    def __init__(self):
        super(DropInConfidence, self).__init__()

    def __call__(self, *args, **kwargs):
        scores, scores_before, scores_after = super(DropInConfidence, self).__call__(*args, **kwargs)
        scores = -scores
        return np.maximum(scores, 0) / scores_before * 100


class IncreaseInConfidence(CamMultImageConfidenceChange):
    def __init__(self):
        super(IncreaseInConfidence, self).__init__()

    def __call__(self, *args, **kwargs):
        scores, bef_score, scores_after = super(IncreaseInConfidence, self).__call__(*args, **kwargs)
        return np.float32(scores > 0)

# Body

## Choose model & method

In [3]:
model = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval()
target_layers = [model.features[28]]

cam_type = CAMType.GRAD_CAM_PLUS_PLUS

## Confidence of image 0% pixel and 100% pixel

In [4]:
# Paths
input_folder = r"C:\Users\HaPham\Documents\ThesisXAI\Code\CAM-combination\ILSVRC2012\ILSVRC2012_img_val"
imputated_folder = "../imputated_images"

if os.path.exists(imputated_folder):
    shutil.rmtree(imputated_folder)
os.makedirs(imputated_folder)

# Generate a list of image names within the specified range
start_idx = 1
end_idx = 50000
num_images_to_sample = 1  # Adjust this to how many random images you want

all_image_names = [
    f"ILSVRC2012_val_{i:08d}.JPEG" for i in range(start_idx, end_idx + 1)
]
random_image_names = random.sample(all_image_names, num_images_to_sample)


In [5]:
cam_type = CAMType.GRAD_CAM
cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()
imputated_folder = "../imputated_images"
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    gray_scale_original, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)

    gray_scale = np.zeros_like(gray_scale_original)

    # Load and preprocess the image for CAM overlay
    rgb_img = Image.open(img_path).convert('RGB')
    rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0
    img_saliency = show_cam_on_image(rgb_img, gray_scale[0, :], use_rgb=True)

    # Calculate predicted class and confidence score change
    predicted_class = model(input_tensor).max(1)[-1]
    # targets = [ClassifierOutputTarget(predicted_class)]
    targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
    scores, scores_before, scores_after, visualizations = cam_metric(
        input_tensor, gray_scale, targets, model, return_visualization=True
    )

    # Process the visualization for display and scoring
    score = scores[0]
    visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
    visualization = deprocess_image(visualization)

    # Save the visualization
    # Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))
    # Print individual results
    print(f"Image: {filename}")
    print(f"Confidence 100% img: {scores_before}")
    print(f"Confidence 0% img: {scores_after}")
    print("----------------------------------------")

Executing CAM on VGG with target layer Conv2d:
tensor([22])
Target class: bald_eagle
Image: ILSVRC2012_val_00039605.JPEG
Confidence 100% img: [[0.999977]]
Confidence 0% img: [[0.00022715]]
----------------------------------------


## Insertion

In [6]:
cam_type = CAMType.GRAD_CAM
cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()
imputated_folder = "../imputated_images_insertion"

count = 0
sum_auc = 0
for filename in random_image_names:
    confidences = []
    img_path = os.path.join(input_folder, filename)

    gray_scale_original, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)

    for i in range(100,-1,-1):
        if(i == 100):
            gray_scale = np.zeros_like(gray_scale_original)
        elif(i == 0):
            gray_scale = np.ones_like(gray_scale_original)
        else:
            gray_scale = gray_scale_original.copy()
            threshold = np.percentile(gray_scale, i)
            gray_scale = gray_scale >= threshold

        # Load and preprocess the image for CAM overlay
        rgb_img = Image.open(img_path).convert('RGB')
        rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0
        img_saliency = show_cam_on_image(rgb_img, gray_scale[0, :], use_rgb=True)

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        # targets = [ClassifierOutputTarget(predicted_class)]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Process the visualization for display and scoring
        # score = scores[0]
        # visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        # visualization = deprocess_image(visualization)

        # Save the visualization
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, f"{os.path.splitext(filename)[0]}_{i}{os.path.splitext(filename)[1]}"))
        confidences.append(scores_after)
        
        # Print individual results
        print(f"Image: {filename}")
        print(f"Step: {i}\tConfidence: {scores_after}")
        # print("----------------------------------------")
    confidences = np.array(confidences).flatten()
    confidences = (confidences - confidences.min()) / (confidences.max() - confidences.min())
    fraction_revealed = np.linspace(0, 1, len(confidences))
    auc = np.trapz(confidences, fraction_revealed)
    count += 1
    sum_auc += auc
    print(f"Image: {filename}")
    print(f"AUC: {auc}")

print(f"Average AUC: {sum_auc/count}")

Executing CAM on VGG with target layer Conv2d:
tensor([22])
Target class: bald_eagle
Image: ILSVRC2012_val_00039605.JPEG
Step: 100	Confidence: [[0.00022715]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 99	Confidence: [[0.01567769]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 98	Confidence: [[0.00345962]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 97	Confidence: [[0.00307689]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 96	Confidence: [[0.00065779]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 95	Confidence: [[0.03986163]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 94	Confidence: [[0.15234204]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 93	Confidence: [[0.0494281]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 92	Confidence: [[0.06362353]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 91	Confidence: [[0.05731063]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 90	Confidence: [[0.07917179]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 89	Confidence: [[0.05182704]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 88	Confid

## Deletion

In [7]:
cam_type = CAMType.GRAD_CAM
cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()
imputated_folder = "../imputated_images_deletion"

count = 0
sum_auc = 0
for filename in random_image_names:
    confidences = []
    img_path = os.path.join(input_folder, filename)

    gray_scale_original, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)

    for i in range(100,-1,-1):

        if(i == 0):
            gray_scale = np.zeros_like(gray_scale_original)
        elif(i == 0):
            gray_scale = np.ones_like(gray_scale_original)
        else:
            gray_scale = gray_scale_original.copy()
            threshold = np.percentile(gray_scale, i)
            gray_scale = gray_scale <= threshold

        # Load and preprocess the image for CAM overlay
        rgb_img = Image.open(img_path).convert('RGB')
        rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0
        img_saliency = show_cam_on_image(rgb_img, gray_scale[0, :], use_rgb=True)

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        # targets = [ClassifierOutputTarget(predicted_class)]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Process the visualization for display and scoring
        # score = scores[0]
        # visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        # visualization = deprocess_image(visualization)

        # Save the visualization
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, f"{os.path.splitext(filename)[0]}_{i}{os.path.splitext(filename)[1]}"))
        confidences.append(scores_after)

        # Print individual results
        print(f"Image: {filename}")
        print(f"Step: {i}\tConfidence: {scores_after}")
        # print("----------------------------------------")
    confidences = np.array(confidences).flatten()
    confidences = (confidences - confidences.min()) / (confidences.max() - confidences.min())
    fraction_revealed = np.linspace(0, 1, len(confidences))
    auc = np.trapz(confidences, fraction_revealed)
    count += 1
    sum_auc += auc
    print(f"Image: {filename}")
    print(f"AUC: {auc}")

print(f"Average AUC: {auc/count}")

Executing CAM on VGG with target layer Conv2d:
tensor([22])
Target class: bald_eagle
Image: ILSVRC2012_val_00039605.JPEG
Step: 100	Confidence: [[0.999977]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 99	Confidence: [[0.99994314]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 98	Confidence: [[0.99993086]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 97	Confidence: [[0.9998547]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 96	Confidence: [[0.999749]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 95	Confidence: [[0.9998429]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 94	Confidence: [[0.9999093]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 93	Confidence: [[0.9997695]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 92	Confidence: [[0.99951637]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 91	Confidence: [[0.9983398]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 90	Confidence: [[0.99423015]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 89	Confidence: [[0.9872519]]
Image: ILSVRC2012_val_00039605.JPEG
Step: 88	Confidence: [[0

## Test

## Grad CAM ++

In [8]:
cam_type = CAMType.GRAD_CAM_PLUS_PLUS
cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()
imputated_folder = "../imputated_images_insertion"

count = 0
sum_auc = 0
for filename in random_image_names:
    confidences = []
    img_path = os.path.join(input_folder, filename)

    gray_scale_original, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)

    for i in range(100,-1,-1):
        if(i == 100):
            gray_scale = np.zeros_like(gray_scale_original)
        elif(i == 0):
            gray_scale = np.ones_like(gray_scale_original)
        else:
            gray_scale = gray_scale_original.copy()
            threshold = np.percentile(gray_scale, i)
            gray_scale = gray_scale >= threshold

        # Load and preprocess the image for CAM overlay
        rgb_img = Image.open(img_path).convert('RGB')
        rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0
        img_saliency = show_cam_on_image(rgb_img, gray_scale[0, :], use_rgb=True)

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        # targets = [ClassifierOutputTarget(predicted_class)]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Process the visualization for display and scoring
        # score = scores[0]
        # visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        # visualization = deprocess_image(visualization)

        # Save the visualization
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, f"{os.path.splitext(filename)[0]}_{i}{os.path.splitext(filename)[1]}"))
        confidences.append(scores_after)
        
        # Print individual results
        # print(f"Image: {filename}")
        # print(f"Step: {i}\tConfidence: {scores_after}")
        # print("----------------------------------------")
    confidences = np.array(confidences).flatten()
    confidences = (confidences - confidences.min()) / (confidences.max() - confidences.min())
    fraction_revealed = np.linspace(0, 1, len(confidences))
    auc = np.trapz(confidences, fraction_revealed)
    count += 1
    sum_auc += auc
    print(f"Image: {filename}")
    print(f"AUC: {auc}")

print(f"Average AUC: {sum_auc/count}")

Executing CAM on VGG with target layer Conv2d:
tensor([22])
Target class: bald_eagle
Image: ILSVRC2012_val_00039605.JPEG
AUC: 0.8760039091715589
Average AUC: 0.8760039091715589


In [9]:
cam_type = CAMType.GRAD_CAM_PLUS_PLUS
cam_metric = CamMultImageConfidenceChange()
drop_in_conf_metric = DropInConfidence()
increase_in_conf_metric = IncreaseInConfidence()
imputated_folder = "../imputated_images_deletion"

count = 0
sum_auc = 0
for filename in random_image_names:
    confidences = []
    img_path = os.path.join(input_folder, filename)

    gray_scale_original, input_tensor = execute_cam(img_path=img_path, model=model, target_layers=target_layers, cam_type=cam_type)

    for i in range(100,-1,-1):

        if(i == 0):
            gray_scale = np.zeros_like(gray_scale_original)
        elif(i == 0):
            gray_scale = np.ones_like(gray_scale_original)
        else:
            gray_scale = gray_scale_original.copy()
            threshold = np.percentile(gray_scale, i)
            gray_scale = gray_scale <= threshold

        # Load and preprocess the image for CAM overlay
        rgb_img = Image.open(img_path).convert('RGB')
        rgb_img = np.array(rgb_img.resize((224, 224))) / 255.0
        img_saliency = show_cam_on_image(rgb_img, gray_scale[0, :], use_rgb=True)

        # Calculate predicted class and confidence score change
        predicted_class = model(input_tensor).max(1)[-1]
        # targets = [ClassifierOutputTarget(predicted_class)]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Process the visualization for display and scoring
        # score = scores[0]
        # visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        # visualization = deprocess_image(visualization)

        # Save the visualization
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))
        # Image.fromarray(visualization).save(os.path.join(imputated_folder, f"{os.path.splitext(filename)[0]}_{i}{os.path.splitext(filename)[1]}"))
        confidences.append(scores_after)

        # Print individual results
        # print(f"Image: {filename}")
        # print(f"Step: {i}\tConfidence: {scores_after}")
        # print("----------------------------------------")
    confidences = np.array(confidences).flatten()
    confidences = (confidences - confidences.min()) / (confidences.max() - confidences.min())
    fraction_revealed = np.linspace(0, 1, len(confidences))
    auc = np.trapz(confidences, fraction_revealed)
    count += 1
    sum_auc += auc
    print(f"Image: {filename}")
    print(f"AUC: {auc}")

print(f"Average AUC: {auc/count}")

Executing CAM on VGG with target layer Conv2d:
tensor([22])
Target class: bald_eagle
Image: ILSVRC2012_val_00039605.JPEG
AUC: 0.3327720255306122
Average AUC: 0.3327720255306122
