In [None]:
import os
import math
import csv
import pickle
from urllib import request
import scipy.stats as st

import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import torchvision
from torchvision import datasets, models, transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score

device = torch.device("cuda:0")

In [None]:
# Download data: Use gdown to download data files from Google Drive
import gdown
ids = [
    '1XHEWIiTv9Czjn9RJ6IHu_fWXfrteks5l',
    '1gwa_5bTO3dchDlC3WZ3nt_0VvwBkvFfi',
    '1DCUuuy20k-dNbUnCyi0HI9O7qy8hOpE5'
]

outputs = [
    'train.csv',
    'test.csv',
    'img.zip'
]

for i, o in zip(ids, outputs):
    gdown.download(id=i, output=o, quiet=False)

In [None]:
# Unzip the downloaded image file
!unzip -qq "/content/img.zip"

In [None]:
## load image metadata (Image_ID, true label, and target label) from a CSV file
def load_ground_truth(fname):
    image_id_list = []
    label_ori_list = []
    label_tar_list = []

    df = pd.read_csv(fname)
    for _, row in df.iterrows():
        image_id_list.append( row['ImageId'] )
        label_ori_list.append( int(row['TrueLabel']) - 1 )
        label_tar_list.append( int(row['TargetClass']) - 1 )
    gt = pickle.load(request.urlopen('https://gist.githubusercontent.com/yrevar/6135f1bd8dcf2e0cc683/raw/d133d61a09d7e5a3b36b8c111a8dd5c4b5d560ee/imagenet1000_clsid_to_human.pkl'))
    return image_id_list,label_ori_list,label_tar_list, gt

## simple Module to normalize an image using given mean and standard deviation
class Normalize(nn.Module):
    def __init__(self, mean, std):
        super(Normalize, self).__init__()
        self.mean = torch.Tensor(mean)
        self.std = torch.Tensor(std)
    def forward(self, x):
        return (x - self.mean.type_as(x)[None,:,None,None]) / self.std.type_as(x)[None,:,None,None]

In [None]:
# Data Preparation
# Set up normalization and transformation for images
norm = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
trn = transforms.Compose([transforms.ToTensor(),])
# Load image metadata
ids, origins, targets, gt = load_ground_truth('train.csv')

# Set parameters
batch_size = 20
max_iterations = 100
input_path = 'images/'
epochs = int(np.ceil(len(ids) / batch_size))

img_size = 299
lr = 2 / 255 #step size
epsilon = 16 # L_inf norm bound

In [None]:
# Load pretrained models
resnet = models.resnet50(weights="IMAGENET1K_V1").eval()
vgg = models.vgg16_bn(weights="IMAGENET1K_V1").eval()

# Freeze model parameters
for param in resnet.parameters():
    param.requires_grad = False
for param in vgg.parameters():
    param.requires_grad = False

# Move models to the GPU
resnet.to(device)
vgg.to(device)

# Set random seed for reproducibility
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True

In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
from PIL import Image
from tqdm import tqdm

# Function for input diversity (DI-FGSM)
def input_diversity(image, prob=0.5):
    if torch.rand(1).item() > prob:
        return image  # Skip diversity with probability (1 - prob)

    batch_size, channels, height, width = image.shape
    # Random resizing
    rnd = np.random.randint(299, 330)  # Random size in [299, 330)
    resized_image = F.interpolate(image, size=(rnd, rnd), mode='bilinear', align_corners=False)

    # Random padding to 330x330
    pad_top = np.random.randint(0, 330 - rnd)
    pad_bottom = 330 - rnd - pad_top
    pad_left = np.random.randint(0, 330 - rnd)
    pad_right = 330 - rnd - pad_left
    padded_image = F.pad(resized_image, (pad_left, pad_right, pad_top, pad_bottom), mode='constant', value=0)

    # Crop back to 299x299
    cropped_image = padded_image[:, :, :299, :299]
    return cropped_image

# Pre-compute Gaussian kernel for TI-FGSM
def gaussian_kernel(size=5, sigma=1.0):
    """
    Create a Gaussian kernel for convolution.
    """
    ax = np.arange(-size // 2 + 1, size // 2 + 1)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
    kernel = kernel / np.sum(kernel)
    return torch.tensor(kernel, dtype=torch.float32)

# Apply Gaussian kernel to gradients
def apply_ti_gradient(grad, kernel):
    """
    Apply translation-invariant Gaussian blur to the gradient.
    """
    channels, _, k_h, k_w = kernel.shape
    return F.conv2d(grad, kernel, padding=(k_h // 2, k_w // 2), groups=channels)

# Define Gaussian kernel
kernel_size = 15
sigma = 1.0
kernel = gaussian_kernel(kernel_size, sigma).unsqueeze(0).unsqueeze(0).repeat(3, 1, 1, 1).to(device)  # 3 channels (RGB)

# Momentum factor for MI-FGSM
momentum = 1.0

# Initialize lists to store predictions, labels, and original indices
preds_ls = []
labels_ls = []
origin_ls = []

# Clear GPU cache
torch.cuda.empty_cache()

# Loop through epochs
for k in tqdm(range(epochs), total=epochs):
    batch_size_cur = min(batch_size, len(ids) - k * batch_size)  # Determine the current batch size
    # Initialize tensors for the original images and the perturbations
    X_ori = torch.zeros(batch_size_cur, 3, 299, 299).to(device)  # Match the original image size
    delta = torch.zeros_like(X_ori, requires_grad=True).to(device)
    # Momentum vector
    g = torch.zeros_like(delta).to(device)

    # Load and transform images
    for i in range(batch_size_cur):
        X_ori[i] = trn(Image.open(input_path + ids[k * batch_size + i] + '.png'))
    # Extract the original indices for the current batch
    ori_idx = origins[k * batch_size:k * batch_size + batch_size_cur]

    # Determine the least-likely class for each image
    with torch.no_grad():
        logits = resnet(norm(X_ori))
        least_likely_class = torch.argmin(logits, dim=1)

    labels = least_likely_class.to(device)

    # Adversarial attack loop
    for t in range(max_iterations):
        # Apply input diversity
        diversified_image = input_diversity(X_ori + delta, prob=0.5)
        # Compute the logits (predictions) of the ResNet model for the diversified input
        logits = resnet(norm(diversified_image))
        # Calculate the cross-entropy loss between the logits and the least-likely class labels
        loss = nn.CrossEntropyLoss(reduction='sum')(logits, labels)
        # Perform backpropagation to compute the gradients of the loss with respect to delta
        loss.backward()

        # Apply TI-FGSM (blur gradients)
        grad = apply_ti_gradient(delta.grad, kernel)

        # Add momentum (MI-FGSM)
        grad = grad / torch.norm(grad, p=2)  # Normalize gradient
        g = momentum * g + grad  # Accumulate gradients with momentum of 1.0

        # Update delta using the momentum-integrated gradient
        delta.data = delta.data - lr * torch.sign(g)

        # Clamp the values of delta (perturbation) to ensure they stay within the specified L_inf norm bound
        delta.data = delta.data.clamp(-epsilon / 255, epsilon / 255)

        # Reset the gradients of delta to zero to prevent accumulation in the next iteration
        delta.grad.zero_()

    # Normalize perturbed images
    X_pur = norm(X_ori + delta)
    # Get predictions from the VGG model for the perturbed images
    preds = torch.argmax(vgg(X_pur), dim=1)

    # Store Results
    preds_ls.append(preds.cpu().numpy())
    labels_ls.append(labels.cpu().numpy())
    origin_ls.append(ori_idx)


In [None]:
df = pd.DataFrame({
    'origin': [a for b in origin_ls for a in b],
    'pred': [a for b in preds_ls for a in b],
    'label': [a for b in labels_ls for a in b]
})

df.head()

In [None]:
# Show accuracy
accuracy_score(df['label'], df['pred'])
accuracy_score(df['origin'], df['pred'])

In [None]:
# Save results to CSV
df.to_csv('submission.csv')

In [None]:
# Visualization of some adversarial examples
def viz(img_A, img_B, origins, labels, gt, preds):
    for img_a, img_b, origin, label, pred in zip(img_A, img_B, origins, labels, preds):
        img_a = img_a.permute(1, 2, 0)
        img_b = img_b.permute(1, 2, 0)

        fig, (axA, axB) = plt.subplots(1, 2, figsize=(10,3))
        axA.imshow(img_a)
        axA.set_title("True label: " + gt[origin])
        axB.imshow(img_b)
        axB.set_title("Target: " + gt[label])

        result = 'Failed' if pred != label else 'Success'
        caption = f'Pred: {gt[pred]} -> {result}'
        fig.text(0.5, -0.05, caption, wrap=True, horizontalalignment='center', fontsize=12)

        plt.show()

viz(X_ori.cpu().detach(), X_pur.cpu().detach(), ori_idx, labels.cpu().numpy(), gt, preds.cpu().numpy())