# Imports:

In [None]:
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
from torchvision import models
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision.datasets import Caltech101
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
import random
from captum.attr import Occlusion
import cv2 as cv
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image

# Phase 1:

## Data Import:

In [27]:
# Local Datapath
dataset_path = "./caltech101"

# Transforms for normalization,  turning from PIL to tensor, resizing, transforming to RGB, and cropping
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    # Convert all images to RGB format before converting to a tensor
    transforms.Lambda(lambda x: x.convert('RGB')),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225])
])

# Create dataset object
caltech_dataset = Caltech101(
    root=dataset_path,
    download=False,
    transform=transform
)

# Create dataloader
caltech_dataloader = DataLoader(caltech_dataset, batch_size=64, shuffle=True)

In [28]:
# Splitting dataset into training and validation sets
train_size = int(0.8 * len(caltech_dataset))
val_size = len(caltech_dataset) - train_size
train_dataset, val_dataset = random_split(caltech_dataset, [train_size, val_size])

# Load Dataset using DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Performing the Transfer learning:

### Load pre-trained models:

#### Resnet-34

In [16]:
resnet34 = models.resnet34(pretrained=True)

# Freeze all layers except the final layer
for param in resnet34.parameters():
    param.requires_grad = False

# Modify the final layer for 101 classes from FC to linear
num_ftrs = resnet34.fc.in_features
resnet34.fc = nn.Linear(num_ftrs, 101)

#### MobileNetV2

In [11]:
mobilenet_v2 = models.mobilenet_v2(pretrained=True)

# Freeze all layers except the final layer
for param in mobilenet_v2.parameters():
    param.requires_grad = False

# Modify the final layer for 101 classes from sequential to linear
num_ftrs = mobilenet_v2.classifier[1].in_features
mobilenet_v2.classifier[1] = nn.Linear(num_ftrs, 101)



### Train the last layer:

#### ResNet-34

In [21]:
# Number of epochs
num_epochs = 1

# Initialize Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet34.parameters(), lr=0.001)

# Train the last layer
for epoch in range(num_epochs):
    # Training
    resnet34.train()
    running_train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = resnet34(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)

    # Validation
    resnet34.eval()
    running_val_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = resnet34(inputs)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    avg_val_loss = running_val_loss / len(val_loader)
    accuracy = correct_predictions / total_samples

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Validation Loss: {avg_val_loss:.4f}, "
          f"Validation Accuracy: {accuracy:.4f}")

print("Done")

Epoch [1/1], Train Loss: 0.2165, Validation Loss: 0.3015, Validation Accuracy: 0.9159
Done


#### MobileNet V2

In [None]:
# Initialize Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet_v2.classifier[1].parameters(), lr=0.001)


# Train the last layer
for epoch in range(num_epochs):
    # Training
    mobilenet_v2.train()
    running_train_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = mobilenet_v2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)

    # Validation
    mobilenet_v2.eval()
    running_val_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = mobilenet_v2(inputs)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    avg_val_loss = running_val_loss / len(val_loader)
    accuracy = correct_predictions / total_samples

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Validation Loss: {avg_val_loss:.4f}, "
          f"Validation Accuracy: {accuracy:.4f}")

print("Done")

# Phase 2:

## The JSMA Attack function:

In [22]:
def jsma_attack(model, original_image, original_label, mask_labels, target_label, theta=-0.1, epsilon=0.1):
    """
    Implements the Jacobian-based Saliency Map Attack (JSMA).
    
    Args:
        model (nn.Module): The neural network model to attack.
        original_image: The original input image.
        original_label: The true label of the image.
        target_label: The desired adversarial target class.
        theta: The perturbation amount added to each pixel.
        gamma: A scalar between 0 and 1 that controls the maximum
                       number of pixels to modify.
    
    Returns:
        torch.Tensor: The perturbed, adversarial image.
        bool: True if the attack was successful, False otherwise.
    """
    # Set the model to evaluation mode
    model.eval()
    
    # Clone the original image and enable gradient tracking
    adversarial_image = original_image.clone().detach()
    adversarial_image.requires_grad = True
    
    # Max perturbations allowed based on gamma
    max_perturbations = int(np.prod(original_image.shape) * epsilon)
    
    # Keep track of modified pixels to prevent redundant changes
    modified_pixels = set()
    
    # Determine the number of classes for the Jacobian calculation
    output = model(adversarial_image)
    num_classes = output.shape[1]

    # Check if label needs to be masked
    if original_label in mask_labels:
        return adversarial_image.detach(), True
    
    # The attack is an iterative process
    for _ in tqdm(range(max_perturbations), desc="JSMA Attack Progress"):
        # Forward pass to get the output logits
        output = model(adversarial_image)
        
        # Check if the attack has succeeded
        if output.argmax(dim=1).item() == target_label:
            print("Attack successful! Model classified as target label.")
            return adversarial_image.detach(), True

        # Compute the Jacobian Matrix
        # Initialize the Jacobian tensor with zeros
        jacobian = torch.zeros(num_classes, np.prod(adversarial_image.shape))
        
        # For each class, compute the gradient of its logit with respect to the input image
        for c in range(num_classes):
            # Zero out previous gradients
            if adversarial_image.grad is not None:
                adversarial_image.grad.zero_()
            
            # Compute the gradient of the current class's output
            output[0, c].backward(retain_graph=True)
            
            # Flatten the gradient and store it in the Jacobian matrix
            jacobian[c] = adversarial_image.grad.view(-1).clone()

        # Construct the Saliency Map
        # Get the Jacobian for the target class and for all other classes
        target_jacobian = jacobian[target_label]
        other_jacobians = jacobian[np.arange(num_classes) != target_label].sum(dim=0)
        
        # Saliency map calculation based on the paper's formula
        saliency_map = target_jacobian * (other_jacobians + target_jacobian)
        
        # Mask out pixels that have already been modified
        for pixel_idx in modified_pixels:
            saliency_map[pixel_idx] = -1 # A negative value to ensure it's not chosen
        
        # Find the pixel with the highest saliency score
        pixel_to_change = torch.argmax(saliency_map)
        
        # If no valid pixel can be found, stop the attack
        if saliency_map[pixel_to_change] <= 0:
            print("No suitable pixels found. Attack failed.")
            return adversarial_image.detach(), False
        
        # Modify the selected pixel
        # Add the perturbation to the selected pixel
        adversarial_image.data.view(-1)[pixel_to_change] += theta
        
        # Clamp the pixel value to be within the valid range [0, 1]
        adversarial_image.data = torch.clamp(adversarial_image.data, 0, 1)
        
        # Add the modified pixel to the set
        modified_pixels.add(pixel_to_change.item())
        
        # Check if the adversarial image is still valid
        if torch.equal(original_image, adversarial_image):
            print("No change applied. Attack failed.")
            return adversarial_image.detach(), False
            
    # Attack failed if the loop completes without success
    print("Attack failed. Maximum perturbations reached.")
    return adversarial_image.detach(), False


## Perform the attack:

In [23]:
# Remove human and car like objects from categories
labels_to_remove = ["Faces", "Faces_easy", "Motorbikes", "car_side"]
class_names = caltech_dataset.categories
for c in labels_to_remove:
    class_names.remove(c)

In [24]:
# Set the model
model = resnet34

print("Performing a JSMA attack on the dataset...\n")

# Perform the attack on each image in the dataloader
try:
    for image, label in iter(caltech_dataloader):
        target_label = random.choice(class_names)

        # Perform the attack
        # Run the attack
        adversarial_image, success = jsma_attack(
            model=model,
            original_image=image,
            original_label=label,
            target_label=target_label,
            theta=1.0, # Perturbation strength
            epsilon=0.2,  # Max percentage of pixels to change
            mask_labels=labels_to_remove
        )
        if success:
            print(f"Success!!")
        else:
            pass
except:
    pass

print("Done")

Performing a JSMA attack on the dataset...

Done


# Phase 3

## Extra Functions:

In [31]:
def show_transformed_image(transformed_image_array):
    # Reverses the transformations and displays the image using matplotlib.pyplot.
    # Inverse the normalization
    mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
    std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
    
    # De-normalize: (image * std) + mean
    denormalized_image = transformed_image_array * std + mean
    
    # Transpose back from (C, H, W) to (H, W, C) for plotting
    plottable_image = denormalized_image.transpose((1, 2, 0))
    
    # Clip values to the valid [0, 1] range to prevent issues with display
    plottable_image = np.clip(plottable_image, 0, 1)
    
    # Return the original image
    return plottable_image


# Visualization function for Grad-CAM
# Edge detection
def get_canny_edge(img, threshold1=30, threshold2=80):
    # Turn the image from RGB to Grey scale
    gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
    gray *= 255
    gray = gray.astype(np.uint8)

    # Gaussian blur
    gray = cv.GaussianBlur(gray, (7,7), 0)

    # Get edge
    edge = 255 - cv.Canny(gray, threshold1, threshold2)
    edge = np.stack([edge]*3, axis=-1)/255

    return edge


# Plot the image and Grad-CAM
def plot_gradcam(img, visualization, title='Grad-CAM'):
    fig, ax = plt.subplot(1, 2, figsize=(10,10))

    ax[0].imshow(img)
    ax[0].set_title(title)

    ax[1].imshow(visualization)
    ax[1].set_title(title)

    for a in ax:
        a.set_xticks([])
        a.set_yticks([])

## Occlusion based salency map:

In [None]:
# Get a batch of images
for image, label in caltech_dataloader:
    display_image = image
    target = label
    break

In [30]:
# Define the occlusion object
occlusion = Occlusion(model)

# Set model to evaluation mode
model.eval()
with torch.no_grad():
    pred = model(display_image)

# Apply occlusion
attributions = occlusion.attribute(
    display_image,
    strides=16,
    sliding_window_shapes=(3, 16, 16)
)

# Convert to numpy and plot the saliency map
saliency_map = attributions.squeeze().cpu().detach().numpy()
print(saliency_map.shape) # (3, 224, 224)

# Select one channel as they are all the same
saliency_map = saliency_map[0]

print("Min:", saliency_map.min())
print("Max:", saliency_map.max())

(6464, 3, 224, 224)
Min: -2.085144
Max: 1.56184


In [48]:
show_transformed_image(np.array(display_image[0, 0]))

  show_transformed_image(np.array(display_image[0, 0]))


array([[[1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        ...,
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436]],

       [[1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        ...,
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436]],

       [[1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        ...,
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436]],

       ...,

       [[1.        , 0.95975546, 0.91200436],
        [1.        , 0.95975546, 0.91200436],
        [1.        , 0

In [None]:
# Iterate for images in batch
for image in display_image:
    image = np.round(np.clip(np.array(image * 255), 0, 255))
    # Create edge image
    grey_image = cv.cvtColor(image, cv.COLOR_RGB2GRAY)
    edge_image = cv.Canny(grey_image, 50, 100)

    fig, ax = plt.subplots(1, 2, figsize=(12, 5))

    # Display image and prediction
    ax[0].imshow(image)
    ax[0].axvline(x=target, color='r',label='Target')
    ax[0].axvline(x=pred, color='g',label='Prediction')
    ax[0].legend()

    ax[0].set_yticks([])
    ax[0].set_xlabel('Direction',size=15)

    # Display saliency map
    ax[1].imshow(edge_image , cmap='gray')
    ax[1].set_xticks([])
    ax[1].set_yticks([])

    # Normalize saliency map values
    abs_max = np.max(np.abs(saliency_map)) 

    # Overlay saliency map with color bar
    saliency_plot = ax[1].imshow(saliency_map, 
                                alpha=0.9,
                                cmap='coolwarm', 
                                vmin=-abs_max, 
                                vmax=abs_max)

    # Create colorbar
    cbar = fig.colorbar(saliency_plot, ax=ax[1], fraction=0.046, pad=0.04)
    cbar.set_label("Saliency Value")

## Grad-CAM:

In [51]:
# Set model to eval mode
model.eval()

# Define target layer and class
# Specific to ResNet-34
target_layer = [model.layer4]

# Get the GradCAM heatmap
cam = GradCAM(model=model, target_layers=target_layer)
heatmap = cam(input_tensor=image.squeeze().cpu().detach(), targets=target)

# Plot the heatmap and the image
edge = get_canny_edge(image)
visulaization = show_cam_on_image(edge, heatmap[0], use_rgb=True)

plot_gradcam(image, visulaization)

TypeError: 'Tensor' object is not callable

# Phase 4:

# Phase 5: