# Setup

In [1]:
import warnings
warnings.filterwarnings('ignore')

import sys
import os
sys.path.append(os.path.abspath('..'))
from typing import List
import json
import numpy as np
import cv2

import torch
import torchvision.models as models

import matplotlib.pyplot as plt

import pytorch_grad_cam as cam
import pytorch_grad_cam.utils as utils
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget, ClassifierOutputSoftmaxTarget
from pytorch_grad_cam.utils.image import show_cam_on_image, \
    deprocess_image, \
    preprocess_image

def display_images_in_rows(rgb_img: np.ndarray, grayscale_list: List[torch.Tensor], labels: List[str]):
    """
    Displays a list of images in rows, with each row containing up to max_images_per_row images.
    
    Args:
        images (list of np.ndarray): List of images to display.
        labels (list of str): List of labels for each image.
        max_images_per_row (int): Maximum number of images per row (default is 4).
    """

    images = [rgb_img]

    for grayscale in grayscale_list:
        images.append(show_cam_on_image(rgb_img, grayscale, use_rgb=True))

    labels.insert(0, "Image")

    fig, axes = plt.subplots(1, 4, figsize=(15, 5))

    # Loop through the images and labels to display them
    for ax, img, label in zip(axes, images, labels):
        ax.imshow(img)  # Show the image
        ax.set_title(label)  # Set the title
        ax.axis('off')  # Hide axes

    plt.tight_layout()
    plt.show()

labels = json.load(open("../pytorch_grad_cam/utils/imagenet_class_index.json"))

In [2]:
import os
import random
from PIL import Image
import numpy as np
import shutil

# Paths
input_folder = r"../val_folder"
imputated_folder = "../imputated_images"

if os.path.exists(imputated_folder):
    shutil.rmtree(imputated_folder)
os.makedirs(imputated_folder)

# Generate a list of image names within the specified range
start_idx = 1
end_idx = 50000
num_images_to_sample = 100  # Adjust this to how many random images you want

all_image_names = [
    f"ILSVRC2012_val_{i:08d}.JPEG" for i in range(start_idx, end_idx + 1)
]
random_image_names = random.sample(all_image_names, num_images_to_sample)

In [3]:
import pandas as pd
from PIL import Image

import matplotlib.patches as patches

def draw_bb(img, bb):
    fig, ax = plt.subplots(1)

    # Display the image
    ax.imshow(img, cmap='gray')

    # Define the bounding box coordinates
    x1, y1, x2, y2 = bb

    # Calculate width and height
    width = x2 - x1
    height = y2 - y1

    # Create a rectangle patch
    rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='red', facecolor='none')

    # Add the rectangle to the axes
    ax.add_patch(rect)

    # Show the image with the bounding box
    plt.show()

def filter_images(csv_file, images_folder, num_samples=None):
    eligible_images = []
    data = pd.read_csv(csv_file)
    data = data.sample(frac=1).reset_index(drop=True)  # Shuffle data for randomness upfront

    for _, row in data.iterrows():
        image_id = row["ImageId"]
        predictions = row["PredictionString"].split()

        # Parse predictions into labels and coordinates
        bboxes = []
        for i in range(0, len(predictions), 5):
            class_label = predictions[i]
            coords = list(map(int, predictions[i + 1:i + 5]))
            bboxes.append((class_label, coords))

        # Skip if there are multiple bounding boxes
        if len(bboxes) > 1:
            continue

        # Load the image to get dimensions
        image_path = f"{images_folder}/{image_id}.JPEG"
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            continue

        image = Image.open(image_path)
        img_width, img_height = image.size

        # Calculate total bounding box area
        total_bbox_area = sum(
            (x_max - x_min) * (y_max - y_min)
            for _, (x_min, y_min, x_max, y_max) in bboxes
        )
        image_area = img_width * img_height

        # Skip if the object occupies 50% or more of the image
        if total_bbox_area / image_area >= 0.5:
            continue

        # Add eligible image and bounding box to the list
        eligible_images.append(row)

        # Stop if we've collected enough samples
        if num_samples and len(eligible_images) >= num_samples:
            break

    return eligible_images

# Matching CAM

## Drop in confidence

In [None]:
# Initialize metrics
total_drop_in_conf = 0
total_increase_in_conf = 0
num_images = 0  # Track number of processed images

input_folder = r"../val_folder"
random_image_names = ['ILSVRC2012_val_00048014.JPEG', 'ILSVRC2012_val_00040662.JPEG', 'ILSVRC2012_val_00021194.JPEG', 'ILSVRC2012_val_00038840.JPEG', 'ILSVRC2012_val_00026519.JPEG', 'ILSVRC2012_val_00026939.JPEG', 'ILSVRC2012_val_00000022.JPEG', 'ILSVRC2012_val_00000165.JPEG', 'ILSVRC2012_val_00028945.JPEG', 'ILSVRC2012_val_00032674.JPEG', 'ILSVRC2012_val_00026695.JPEG', 'ILSVRC2012_val_00043106.JPEG', 'ILSVRC2012_val_00038333.JPEG', 'ILSVRC2012_val_00018162.JPEG', 'ILSVRC2012_val_00004863.JPEG', 'ILSVRC2012_val_00034851.JPEG', 'ILSVRC2012_val_00004517.JPEG', 'ILSVRC2012_val_00010810.JPEG', 'ILSVRC2012_val_00011824.JPEG', 'ILSVRC2012_val_00020485.JPEG', 'ILSVRC2012_val_00048173.JPEG', 'ILSVRC2012_val_00041393.JPEG', 'ILSVRC2012_val_00033662.JPEG', 'ILSVRC2012_val_00045303.JPEG', 'ILSVRC2012_val_00004455.JPEG', 'ILSVRC2012_val_00011500.JPEG', 'ILSVRC2012_val_00025962.JPEG', 'ILSVRC2012_val_00043584.JPEG', 'ILSVRC2012_val_00036038.JPEG', 'ILSVRC2012_val_00001159.JPEG', 'ILSVRC2012_val_00036157.JPEG', 'ILSVRC2012_val_00047597.JPEG', 'ILSVRC2012_val_00044337.JPEG', 'ILSVRC2012_val_00003755.JPEG', 'ILSVRC2012_val_00043447.JPEG', 'ILSVRC2012_val_00031518.JPEG', 'ILSVRC2012_val_00041929.JPEG', 'ILSVRC2012_val_00010950.JPEG', 'ILSVRC2012_val_00023940.JPEG', 'ILSVRC2012_val_00034458.JPEG', 'ILSVRC2012_val_00003772.JPEG', 'ILSVRC2012_val_00017173.JPEG', 'ILSVRC2012_val_00035194.JPEG', 'ILSVRC2012_val_00013968.JPEG', 'ILSVRC2012_val_00007289.JPEG', 'ILSVRC2012_val_00035626.JPEG', 'ILSVRC2012_val_00001925.JPEG', 'ILSVRC2012_val_00018556.JPEG', 'ILSVRC2012_val_00005887.JPEG', 'ILSVRC2012_val_00037546.JPEG', 'ILSVRC2012_val_00037983.JPEG', 'ILSVRC2012_val_00028321.JPEG', 'ILSVRC2012_val_00006292.JPEG', 'ILSVRC2012_val_00010227.JPEG', 'ILSVRC2012_val_00020722.JPEG', 'ILSVRC2012_val_00010561.JPEG', 'ILSVRC2012_val_00040482.JPEG', 'ILSVRC2012_val_00042051.JPEG', 'ILSVRC2012_val_00001760.JPEG', 'ILSVRC2012_val_00021865.JPEG', 'ILSVRC2012_val_00010828.JPEG', 'ILSVRC2012_val_00043847.JPEG', 'ILSVRC2012_val_00036917.JPEG', 'ILSVRC2012_val_00047053.JPEG', 'ILSVRC2012_val_00002225.JPEG', 'ILSVRC2012_val_00014391.JPEG', 'ILSVRC2012_val_00023265.JPEG', 'ILSVRC2012_val_00025722.JPEG', 'ILSVRC2012_val_00035266.JPEG', 'ILSVRC2012_val_00000334.JPEG', 'ILSVRC2012_val_00009316.JPEG', 'ILSVRC2012_val_00037959.JPEG', 'ILSVRC2012_val_00015267.JPEG', 'ILSVRC2012_val_00045274.JPEG', 'ILSVRC2012_val_00005621.JPEG', 'ILSVRC2012_val_00009324.JPEG', 'ILSVRC2012_val_00036612.JPEG', 'ILSVRC2012_val_00012167.JPEG', 'ILSVRC2012_val_00013826.JPEG', 'ILSVRC2012_val_00039615.JPEG', 'ILSVRC2012_val_00003550.JPEG', 'ILSVRC2012_val_00018661.JPEG', 'ILSVRC2012_val_00037578.JPEG', 'ILSVRC2012_val_00032692.JPEG', 'ILSVRC2012_val_00022024.JPEG', 'ILSVRC2012_val_00011285.JPEG', 'ILSVRC2012_val_00017859.JPEG', 'ILSVRC2012_val_00025713.JPEG', 'ILSVRC2012_val_00027390.JPEG', 'ILSVRC2012_val_00045695.JPEG', 'ILSVRC2012_val_00038690.JPEG', 'ILSVRC2012_val_00016934.JPEG', 'ILSVRC2012_val_00027410.JPEG', 'ILSVRC2012_val_00039936.JPEG', 'ILSVRC2012_val_00025764.JPEG', 'ILSVRC2012_val_00024909.JPEG', 'ILSVRC2012_val_00003979.JPEG', 'ILSVRC2012_val_00035948.JPEG', 'ILSVRC2012_val_00044730.JPEG', 'ILSVRC2012_val_00041283.JPEG']

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).eval().to(device)
target_layers = [model.features[10]]

cam_metric = utils.CamMultImageConfidenceChange()
drop_in_conf_metric = utils.DropInConfidence()
increase_in_conf_metric = utils.IncreaseInConfidence()

# Process the randomly selected images
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    if os.path.exists(img_path):  # Ensure the file exists
        img = np.array(Image.open(img_path).convert("RGB"))
        img = cv2.resize(img, (224, 224))
        img = np.float32(img) / 255
        input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).to(device)

        with torch.no_grad():
            predicted_class = model(input_tensor).max(1)[-1]
            targets = [ClassifierOutputTarget(predicted_class)]
            print(f"Target class: {labels[str(predicted_class.item())][1]}")

        grad_cam_pp_grayscale = cam.GradCAMPlusPlus(model=model, target_layers=target_layers)(input_tensor=input_tensor, targets=targets)
        score_cam_grayscale = cam.ScoreCAM(model=model, target_layers=target_layers)(input_tensor=input_tensor, targets=targets)

        gray_scale = utils.combine_by_matching_important_pixels(grayscales=[grad_cam_pp_grayscale[0, :], score_cam_grayscale[0, :]], thresholds=[25, 25])
        gray_scale = np.expand_dims(gray_scale, axis=0)

        threshold = np.percentile(gray_scale, 50)
        gray_scale[gray_scale < threshold] = 0
        
        # display_images_in_rows(rgb_img=img, grayscale_list=[grad_cam_pp_grayscale[0, :], score_cam_grayscale[0, :], gray_scale[0, :]], labels=["GradCAM++", "ScoreCAM", "MatchingCAM"])
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Calculate Drop in Confidence and Increase in Confidence
        drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
        increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

        # Accumulate the scores for averaging
        total_drop_in_conf += drop_in_conf
        total_increase_in_conf += increase_in_conf
        num_images += 1  # Increment image count

        # Process the visualization for display and scoring
        score = scores[0]
        visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        visualization = deprocess_image(visualization)

        # Save the visualization
        Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

        # Print individual results
        print(f"Image: {filename}")
        print(f"Confidence before imputation: {scores_before}")
        print(f"Confidence after imputation: {scores_after}")
        print(f"The confidence increase raw: {score}")
        print(f"The drop in confidence percentage: {drop_in_conf}%")
        print(f"The increase in confidence: {increase_in_conf}")
        print("----------------------------------------")

# Calculate and print averages after processing all images
if num_images > 0:
    avg_drop_in_conf = total_drop_in_conf / num_images
    avg_increase_in_conf = total_increase_in_conf / num_images * 100
    print(f"\nNumber of images processed: {num_images}")
    print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
    print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
else:
    print("No valid images found in the specified folder.")

In [5]:
# Initialize metrics
total_drop_in_conf = 0
total_increase_in_conf = 0
num_images = 0  # Track number of processed images

# input_folder = r"../val_folder"
input_folder = r"C:\Users\HaPham\Documents\ThesisXAI\Code\CAM-combination\ILSVRC2012\ILSVRC2012_img_val"

random_image_names = ['ILSVRC2012_val_00048014.JPEG', 'ILSVRC2012_val_00040662.JPEG', 'ILSVRC2012_val_00021194.JPEG', 'ILSVRC2012_val_00038840.JPEG', 'ILSVRC2012_val_00026519.JPEG', 'ILSVRC2012_val_00026939.JPEG', 'ILSVRC2012_val_00000022.JPEG', 'ILSVRC2012_val_00000165.JPEG', 'ILSVRC2012_val_00028945.JPEG', 'ILSVRC2012_val_00032674.JPEG', 'ILSVRC2012_val_00026695.JPEG', 'ILSVRC2012_val_00043106.JPEG', 'ILSVRC2012_val_00038333.JPEG', 'ILSVRC2012_val_00018162.JPEG', 'ILSVRC2012_val_00004863.JPEG', 'ILSVRC2012_val_00034851.JPEG', 'ILSVRC2012_val_00004517.JPEG', 'ILSVRC2012_val_00010810.JPEG', 'ILSVRC2012_val_00011824.JPEG', 'ILSVRC2012_val_00020485.JPEG', 'ILSVRC2012_val_00048173.JPEG', 'ILSVRC2012_val_00041393.JPEG', 'ILSVRC2012_val_00033662.JPEG', 'ILSVRC2012_val_00045303.JPEG', 'ILSVRC2012_val_00004455.JPEG', 'ILSVRC2012_val_00011500.JPEG', 'ILSVRC2012_val_00025962.JPEG', 'ILSVRC2012_val_00043584.JPEG', 'ILSVRC2012_val_00036038.JPEG', 'ILSVRC2012_val_00001159.JPEG', 'ILSVRC2012_val_00036157.JPEG', 'ILSVRC2012_val_00047597.JPEG', 'ILSVRC2012_val_00044337.JPEG', 'ILSVRC2012_val_00003755.JPEG', 'ILSVRC2012_val_00043447.JPEG', 'ILSVRC2012_val_00031518.JPEG', 'ILSVRC2012_val_00041929.JPEG', 'ILSVRC2012_val_00010950.JPEG', 'ILSVRC2012_val_00023940.JPEG', 'ILSVRC2012_val_00034458.JPEG', 'ILSVRC2012_val_00003772.JPEG', 'ILSVRC2012_val_00017173.JPEG', 'ILSVRC2012_val_00035194.JPEG', 'ILSVRC2012_val_00013968.JPEG', 'ILSVRC2012_val_00007289.JPEG', 'ILSVRC2012_val_00035626.JPEG', 'ILSVRC2012_val_00001925.JPEG', 'ILSVRC2012_val_00018556.JPEG', 'ILSVRC2012_val_00005887.JPEG', 'ILSVRC2012_val_00037546.JPEG', 'ILSVRC2012_val_00037983.JPEG', 'ILSVRC2012_val_00028321.JPEG', 'ILSVRC2012_val_00006292.JPEG', 'ILSVRC2012_val_00010227.JPEG', 'ILSVRC2012_val_00020722.JPEG', 'ILSVRC2012_val_00010561.JPEG', 'ILSVRC2012_val_00040482.JPEG', 'ILSVRC2012_val_00042051.JPEG', 'ILSVRC2012_val_00001760.JPEG', 'ILSVRC2012_val_00021865.JPEG', 'ILSVRC2012_val_00010828.JPEG', 'ILSVRC2012_val_00043847.JPEG', 'ILSVRC2012_val_00036917.JPEG', 'ILSVRC2012_val_00047053.JPEG', 'ILSVRC2012_val_00002225.JPEG', 'ILSVRC2012_val_00014391.JPEG', 'ILSVRC2012_val_00023265.JPEG', 'ILSVRC2012_val_00025722.JPEG', 'ILSVRC2012_val_00035266.JPEG', 'ILSVRC2012_val_00000334.JPEG', 'ILSVRC2012_val_00009316.JPEG', 'ILSVRC2012_val_00037959.JPEG', 'ILSVRC2012_val_00015267.JPEG', 'ILSVRC2012_val_00045274.JPEG', 'ILSVRC2012_val_00005621.JPEG', 'ILSVRC2012_val_00009324.JPEG', 'ILSVRC2012_val_00036612.JPEG', 'ILSVRC2012_val_00012167.JPEG', 'ILSVRC2012_val_00013826.JPEG', 'ILSVRC2012_val_00039615.JPEG', 'ILSVRC2012_val_00003550.JPEG', 'ILSVRC2012_val_00018661.JPEG', 'ILSVRC2012_val_00037578.JPEG', 'ILSVRC2012_val_00032692.JPEG', 'ILSVRC2012_val_00022024.JPEG', 'ILSVRC2012_val_00011285.JPEG', 'ILSVRC2012_val_00017859.JPEG', 'ILSVRC2012_val_00025713.JPEG', 'ILSVRC2012_val_00027390.JPEG', 'ILSVRC2012_val_00045695.JPEG', 'ILSVRC2012_val_00038690.JPEG', 'ILSVRC2012_val_00016934.JPEG', 'ILSVRC2012_val_00027410.JPEG', 'ILSVRC2012_val_00039936.JPEG', 'ILSVRC2012_val_00025764.JPEG', 'ILSVRC2012_val_00024909.JPEG', 'ILSVRC2012_val_00003979.JPEG', 'ILSVRC2012_val_00035948.JPEG', 'ILSVRC2012_val_00044730.JPEG', 'ILSVRC2012_val_00041283.JPEG']

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).eval().to(device)
# target_layers = [model.features[10]]

model = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval()
target_layers = [model.features[28]]

cam_metric = utils.CamMultImageConfidenceChange()
drop_in_conf_metric = utils.DropInConfidence()
increase_in_conf_metric = utils.IncreaseInConfidence()

# Process the randomly selected images
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    if os.path.exists(img_path):  # Ensure the file exists
        img = np.array(Image.open(img_path).convert("RGB"))
        img = cv2.resize(img, (224, 224))
        img = np.float32(img) / 255
        input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).to(device)

        with torch.no_grad():
            predicted_class = model(input_tensor).max(1)[-1]
            targets = [ClassifierOutputTarget(predicted_class)]
            print(predicted_class)
            print(f"Target class: {labels[str(predicted_class.item())][1]}")

        gray_scale = cam.GradCAM(model=model, target_layers=target_layers)(input_tensor=input_tensor, targets=targets)
        
        threshold = np.percentile(gray_scale, 50)
        gray_scale[gray_scale < threshold] = 0
        
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Calculate Drop in Confidence and Increase in Confidence
        drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
        increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

        # Accumulate the scores for averaging
        total_drop_in_conf += drop_in_conf
        total_increase_in_conf += increase_in_conf
        num_images += 1  # Increment image count

        # Process the visualization for display and scoring
        score = scores[0]
        visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        visualization = deprocess_image(visualization)

        # Save the visualization
        Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

        # Print individual results
        # print(f"Image: {filename}")
        # print(f"Confidence before imputation: {scores_before}")
        # print(f"Confidence after imputation: {scores_after}")
        # print(f"The confidence increase raw: {score}")
        # print(f"The drop in confidence percentage: {drop_in_conf}%")
        # print(f"The increase in confidence: {increase_in_conf}")
        # print("----------------------------------------")

# Calculate and print averages after processing all images
if num_images > 0:
    avg_drop_in_conf = total_drop_in_conf / num_images
    avg_increase_in_conf = total_increase_in_conf / num_images * 100
    print(f"\nNumber of images processed: {num_images}")
    print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
    print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
else:
    print("No valid images found in the specified folder.")

Using device: cpu
tensor([605])
Target class: iPod
tensor([748])
Target class: purse
tensor([898])
Target class: water_bottle
tensor([760])
Target class: refrigerator
tensor([980])
Target class: volcano
tensor([126])
Target class: isopod
tensor([209])
Target class: Chesapeake_Bay_retriever
tensor([937])
Target class: broccoli
tensor([203])
Target class: West_Highland_white_terrier
tensor([954])
Target class: banana
tensor([561])
Target class: forklift
tensor([850])
Target class: teddy
tensor([495])
Target class: china_cabinet
tensor([12])
Target class: house_finch
tensor([694])
Target class: paddlewheel
tensor([248])
Target class: Eskimo_dog
tensor([305])
Target class: dung_beetle
tensor([98])
Target class: red-breasted_merganser
tensor([763])
Target class: revolver
tensor([746])
Target class: puck
tensor([735])
Target class: poncho
tensor([211])
Target class: vizsla
tensor([807])
Target class: solar_dish
tensor([653])
Target class: milk_can
tensor([586])
Target class: half_track
tenso

In [6]:
# Initialize metrics
total_drop_in_conf = 0
total_increase_in_conf = 0
num_images = 0  # Track number of processed images

# input_folder = r"../val_folder"
input_folder = r"C:\Users\HaPham\Documents\ThesisXAI\Code\CAM-combination\ILSVRC2012\ILSVRC2012_img_val"
random_image_names = ['ILSVRC2012_val_00048014.JPEG', 'ILSVRC2012_val_00040662.JPEG', 'ILSVRC2012_val_00021194.JPEG', 'ILSVRC2012_val_00038840.JPEG', 'ILSVRC2012_val_00026519.JPEG', 'ILSVRC2012_val_00026939.JPEG', 'ILSVRC2012_val_00000022.JPEG', 'ILSVRC2012_val_00000165.JPEG', 'ILSVRC2012_val_00028945.JPEG', 'ILSVRC2012_val_00032674.JPEG', 'ILSVRC2012_val_00026695.JPEG', 'ILSVRC2012_val_00043106.JPEG', 'ILSVRC2012_val_00038333.JPEG', 'ILSVRC2012_val_00018162.JPEG', 'ILSVRC2012_val_00004863.JPEG', 'ILSVRC2012_val_00034851.JPEG', 'ILSVRC2012_val_00004517.JPEG', 'ILSVRC2012_val_00010810.JPEG', 'ILSVRC2012_val_00011824.JPEG', 'ILSVRC2012_val_00020485.JPEG', 'ILSVRC2012_val_00048173.JPEG', 'ILSVRC2012_val_00041393.JPEG', 'ILSVRC2012_val_00033662.JPEG', 'ILSVRC2012_val_00045303.JPEG', 'ILSVRC2012_val_00004455.JPEG', 'ILSVRC2012_val_00011500.JPEG', 'ILSVRC2012_val_00025962.JPEG', 'ILSVRC2012_val_00043584.JPEG', 'ILSVRC2012_val_00036038.JPEG', 'ILSVRC2012_val_00001159.JPEG', 'ILSVRC2012_val_00036157.JPEG', 'ILSVRC2012_val_00047597.JPEG', 'ILSVRC2012_val_00044337.JPEG', 'ILSVRC2012_val_00003755.JPEG', 'ILSVRC2012_val_00043447.JPEG', 'ILSVRC2012_val_00031518.JPEG', 'ILSVRC2012_val_00041929.JPEG', 'ILSVRC2012_val_00010950.JPEG', 'ILSVRC2012_val_00023940.JPEG', 'ILSVRC2012_val_00034458.JPEG', 'ILSVRC2012_val_00003772.JPEG', 'ILSVRC2012_val_00017173.JPEG', 'ILSVRC2012_val_00035194.JPEG', 'ILSVRC2012_val_00013968.JPEG', 'ILSVRC2012_val_00007289.JPEG', 'ILSVRC2012_val_00035626.JPEG', 'ILSVRC2012_val_00001925.JPEG', 'ILSVRC2012_val_00018556.JPEG', 'ILSVRC2012_val_00005887.JPEG', 'ILSVRC2012_val_00037546.JPEG', 'ILSVRC2012_val_00037983.JPEG', 'ILSVRC2012_val_00028321.JPEG', 'ILSVRC2012_val_00006292.JPEG', 'ILSVRC2012_val_00010227.JPEG', 'ILSVRC2012_val_00020722.JPEG', 'ILSVRC2012_val_00010561.JPEG', 'ILSVRC2012_val_00040482.JPEG', 'ILSVRC2012_val_00042051.JPEG', 'ILSVRC2012_val_00001760.JPEG', 'ILSVRC2012_val_00021865.JPEG', 'ILSVRC2012_val_00010828.JPEG', 'ILSVRC2012_val_00043847.JPEG', 'ILSVRC2012_val_00036917.JPEG', 'ILSVRC2012_val_00047053.JPEG', 'ILSVRC2012_val_00002225.JPEG', 'ILSVRC2012_val_00014391.JPEG', 'ILSVRC2012_val_00023265.JPEG', 'ILSVRC2012_val_00025722.JPEG', 'ILSVRC2012_val_00035266.JPEG', 'ILSVRC2012_val_00000334.JPEG', 'ILSVRC2012_val_00009316.JPEG', 'ILSVRC2012_val_00037959.JPEG', 'ILSVRC2012_val_00015267.JPEG', 'ILSVRC2012_val_00045274.JPEG', 'ILSVRC2012_val_00005621.JPEG', 'ILSVRC2012_val_00009324.JPEG', 'ILSVRC2012_val_00036612.JPEG', 'ILSVRC2012_val_00012167.JPEG', 'ILSVRC2012_val_00013826.JPEG', 'ILSVRC2012_val_00039615.JPEG', 'ILSVRC2012_val_00003550.JPEG', 'ILSVRC2012_val_00018661.JPEG', 'ILSVRC2012_val_00037578.JPEG', 'ILSVRC2012_val_00032692.JPEG', 'ILSVRC2012_val_00022024.JPEG', 'ILSVRC2012_val_00011285.JPEG', 'ILSVRC2012_val_00017859.JPEG', 'ILSVRC2012_val_00025713.JPEG', 'ILSVRC2012_val_00027390.JPEG', 'ILSVRC2012_val_00045695.JPEG', 'ILSVRC2012_val_00038690.JPEG', 'ILSVRC2012_val_00016934.JPEG', 'ILSVRC2012_val_00027410.JPEG', 'ILSVRC2012_val_00039936.JPEG', 'ILSVRC2012_val_00025764.JPEG', 'ILSVRC2012_val_00024909.JPEG', 'ILSVRC2012_val_00003979.JPEG', 'ILSVRC2012_val_00035948.JPEG', 'ILSVRC2012_val_00044730.JPEG', 'ILSVRC2012_val_00041283.JPEG']

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).eval().to(device)
# target_layers = [model.features[10]]
model = models.vgg16(weights=models.VGG16_Weights.DEFAULT).eval()
target_layers = [model.features[28]]

cam_metric = utils.CamMultImageConfidenceChange()
drop_in_conf_metric = utils.DropInConfidence()
increase_in_conf_metric = utils.IncreaseInConfidence()

# Process the randomly selected images
for filename in random_image_names:
    img_path = os.path.join(input_folder, filename)

    if os.path.exists(img_path):  # Ensure the file exists
        img = np.array(Image.open(img_path).convert("RGB"))
        img = cv2.resize(img, (224, 224))
        img = np.float32(img) / 255
        input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).to(device)

        with torch.no_grad():
            predicted_class = model(input_tensor).max(1)[-1]
            targets = [ClassifierOutputTarget(predicted_class)]
            print(predicted_class)
            print(f"Target class: {labels[str(predicted_class.item())][1]}")

        gray_scale = cam.GradCAMPlusPlus(model=model, target_layers=target_layers)(input_tensor=input_tensor, targets=targets)
        
        threshold = np.percentile(gray_scale, 50)
        gray_scale[gray_scale < threshold] = 0
        
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        scores, scores_before, scores_after, visualizations = cam_metric(
            input_tensor, gray_scale, targets, model, return_visualization=True
        )

        # Calculate Drop in Confidence and Increase in Confidence
        drop_in_conf = drop_in_conf_metric(input_tensor, gray_scale, targets, model)
        increase_in_conf = increase_in_conf_metric(input_tensor, gray_scale, targets, model)

        # Accumulate the scores for averaging
        total_drop_in_conf += drop_in_conf
        total_increase_in_conf += increase_in_conf
        num_images += 1  # Increment image count

        # Process the visualization for display and scoring
        score = scores[0]
        visualization = visualizations[0].cpu().numpy().transpose((1, 2, 0))
        visualization = deprocess_image(visualization)

        # Save the visualization
        Image.fromarray(visualization).save(os.path.join(imputated_folder, filename))

        # Print individual results
        # print(f"Image: {filename}")
        # print(f"Confidence before imputation: {scores_before}")
        # print(f"Confidence after imputation: {scores_after}")
        # print(f"The confidence increase raw: {score}")
        # print(f"The drop in confidence percentage: {drop_in_conf}%")
        # print(f"The increase in confidence: {increase_in_conf}")
        # print("----------------------------------------")

# Calculate and print averages after processing all images
if num_images > 0:
    avg_drop_in_conf = total_drop_in_conf / num_images
    avg_increase_in_conf = total_increase_in_conf / num_images * 100
    print(f"\nNumber of images processed: {num_images}")
    print(f"Average Drop in Confidence: {avg_drop_in_conf}%")
    print(f"Average Increase in Confidence: {avg_increase_in_conf}%")
else:
    print("No valid images found in the specified folder.")

Using device: cpu
tensor([605])
Target class: iPod
tensor([748])
Target class: purse
tensor([898])
Target class: water_bottle
tensor([760])
Target class: refrigerator
tensor([980])
Target class: volcano
tensor([126])
Target class: isopod
tensor([209])
Target class: Chesapeake_Bay_retriever
tensor([937])
Target class: broccoli
tensor([203])
Target class: West_Highland_white_terrier
tensor([954])
Target class: banana
tensor([561])
Target class: forklift
tensor([850])
Target class: teddy
tensor([495])
Target class: china_cabinet
tensor([12])
Target class: house_finch
tensor([694])
Target class: paddlewheel
tensor([248])
Target class: Eskimo_dog
tensor([305])
Target class: dung_beetle
tensor([98])
Target class: red-breasted_merganser
tensor([763])
Target class: revolver
tensor([746])
Target class: puck
tensor([735])
Target class: poncho
tensor([211])
Target class: vizsla
tensor([807])
Target class: solar_dish
tensor([653])
Target class: milk_can
tensor([586])
Target class: half_track
tenso

## Energy Based Pointing Game

In [7]:
csv_file = "../LOC_val_solution/LOC_val_solution.csv"  # Replace with your CSV file
images_folder = "../val_folder"  # Replace with the folder containing the images

images = filter_images(csv_file=csv_file, images_folder=images_folder, num_samples=100)

FileNotFoundError: [Errno 2] No such file or directory: '../LOC_val_solution/LOC_val_solution.csv'

In [None]:
total_proportion = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).eval().to(device)
target_layers = [model.features[10]]

for image in images:
    img_path = f"{images_folder}/{image['ImageId']}.JPEG"

    # Open image to get original dimensions
    with Image.open(img_path) as img:
        orig_width, orig_height = img.size

    # Adjust bounding box to resized image
    bbox = list(map(int, image["PredictionString"].split()[1:5]))
    x_min, y_min, x_max, y_max = bbox
    x_min = int(x_min * 224 / orig_width)
    y_min = int(y_min * 224 / orig_height)
    x_max = int(x_max * 224 / orig_width)
    y_max = int(y_max * 224 / orig_height)
    resized_bbox = [x_min, y_min, x_max, y_max]

    # Run CAM visualization
    img = np.array(Image.open(img_path).convert("RGB"))
    img = cv2.resize(img, (224, 224))
    img = np.float32(img) / 255
    input_tensor = preprocess_image(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).to(device)

    with torch.no_grad():
        predicted_class = model(input_tensor).max(1)[-1]
        targets = [ClassifierOutputSoftmaxTarget(predicted_class)]
        print(f"Target class: {labels[str(predicted_class.item())][1]}")

    gray_scale = cam.ScoreCAM(model=model, target_layers=target_layers)(input_tensor=input_tensor, targets=targets)

    # Draw resized bounding box
    # draw_bb(img=img, bb=resized_bbox)

    # Convert score CAM to tensor
    score_cam_tensor = torch.from_numpy(gray_scale[0, :])

    # Compute energy proportion
    proportion = utils.energy_point_game(bbox=resized_bbox, saliency_map=score_cam_tensor)

    total_proportion += proportion

    print(proportion)
    print("-----------------")

print("Proportion:", total_proportion / len(images))