### Import Model Architectures
Custom Torch Models need to be instantiated for evaluation. The model_architectures.py file contains the model architectures so we can abstract it and focus only on the evaluations.

In [1]:
from model_architectures import *
import model_architectures
from attacks import * 
# Print available classes to verify our model architectures were imported
print(dir(model_architectures))

Using device: cuda
Using device: cuda
['BasicBlock', 'DataLoader', 'F', 'Load', 'ResNetCIFAR', 'ResNetMNIST', 'ResnetSVHN', 'TensorDataset', 'Tester', 'Visualizer', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'cifar_directory', 'conv3x3', 'current_directory', 'device', 'mnist_directory', 'nn', 'np', 'os', 'pd', 'pickle', 'plt', 'scipy', 'svhn_directory', 'torch']


### Loading the Model Weights
Using our model artifacts we load the weights back into the model so we have our pre-trained models to test our perturbations against.

In [2]:
mnist_resnet_model = ResNetMNIST(BasicBlock, [2, 2, 2, 2], num_classes=10, grayscale=True).to(device)
cifar_resnet_model = ResNetCIFAR(BasicBlock, [2, 2, 2, 2], num_classes=10, grayscale=False).to(device)
svhn_resnet_model = ResnetSVHN(BasicBlock, [2, 2, 2, 2], num_classes=10, grayscale=False).to(device)

mnist_resnet_model.load_state_dict(torch.load("artifacts/resnet18_mnist_model.pth"))
cifar_resnet_model.load_state_dict(torch.load("artifacts/resnet18_cifar_model.pth"))
svhn_resnet_model.load_state_dict(torch.load("artifacts/resnet18_svhn_model.pth"))

<All keys matched successfully>

In [3]:
# Set models to evaluation mode
mnist_resnet_model.eval()
cifar_resnet_model.eval()
svhn_resnet_model.eval()

ResnetSVHN(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True

### Loading/Visualizing Data
Functionality to load the test dataset and labels as numpy arrays and visualize any given image from the numpy array has been implemented in the model_architectures.py file for easy access.

In [4]:
loader = Load()
# mnist_test_images, mnist_test_labels = loader.load_mnist_test_images()
cifar10_test_images, cifar10_test_labels = loader.load_cifar10_test_images()
# svhn_test_images, svhn_test_labels = loader.load_svhn_test_images()

In [5]:
# Test visualizer with an image available in test data
viz = Visualizer()
# # MNIST
# i = 1
# print(f"MNIST Shape: {mnist_test_images[i].shape}")
# print(f"MNIST Label: {mnist_test_labels[i]}")
# viz.show(mnist_test_images[i])

# # CIFAR-10
# label_mapping = {
#     0: 'airplane',
#     1: 'automobile',
#     2: 'bird',
#     3: 'cat',
#     4: 'deer',
#     5: 'dog',
#     6: 'frog',
#     7: 'horse',
#     8: 'ship',
#     9: 'truck'
# }
# print(f"\nCIFAR-10 Shape: {cifar10_test_images[i].shape}")
# print(f"CIFAR-10 Label: {label_mapping[cifar10_test_labels[i]]}")
# viz.show(cifar10_test_images[i])

# # SVHN
# print(f"\nSVHN Shape: {svhn_test_images[i].shape}")
# print(f"SVHN Label: {svhn_test_labels[i]}")
# viz.show(svhn_test_images[i])


### Testing Our Models (Unperturbed Data)
The data is converted to PyTorch tensors and loaded with a Data Loader for the model to be evaluated. The model can only take in Data Loaders to iterate through the data so after perturbations, we have to load it with the data loader and then evaluate the model. We can verify our models by evaluating the clean test sets and checking the accuracy is equal to our expected accuracies: 99% for MNIST, 76% for CIFAR10, and 93% for SVHN. 

In [6]:
# test_loader_mnist = loader.convert_mnist_numpy_to_tensor(mnist_test_images[:256], mnist_test_labels[:256])
test_loader_cifar10 = loader.convert_cifar10_numpy_to_tensor(cifar10_test_images[:256], cifar10_test_labels[:256])
# test_loader_svhn = loader.convert_svhn_numpy_to_tensor(svhn_test_images[:256], svhn_test_labels[:256])
tester = Tester()

In [7]:

# acc_mnist = tester.test(mnist_resnet_model, test_loader_mnist)
# print(f'Test Accuracy MNIST: {acc_mnist * 100:.2f}%')

In [8]:
# acc_cifar10 = tester.test(cifar_resnet_model, test_loader_cifar10)
# print(f'Test Accuracy CIFAR10: {acc_cifar10 * 100:.2f}%')

In [9]:
# acc_svhn = tester.test(svhn_resnet_model, test_loader_svhn)
# print(f'Test Accuracy SVHN: {acc_svhn * 100:.2f}%')

### Perturbing an Image and Testing Accuracy
As a simple test we'll just flip the image so it's reversed. In this process we use perturb to modify the images and then reload it with a Data Loader and test it against our model.


In [10]:
# def test_flip(images):
#     """
#     Flip images along the specified axis.

#     Parameters:
#     - images: numpy array with shape (num_images, channels, height, width)
#     - axis: Axis along which to flip the images (0 for vertical, 1 for horizontal)

#     Returns:
#     - Perturbed images
#     """
#     flip_axis = 1

#     perturbed_images = np.empty_like(images)
#     for i in range(images.shape[0]):
#         perturbed_image = np.flip(images[i, 0, :, :], axis=flip_axis)
#         perturbed_images[i, 0, :, :] = perturbed_image
#     return perturbed_images


# flipped_images_array = test_flip(mnist_test_images)
# # Show example of the image after being flipped
# viz.show(flipped_images_array[1])

# flipped_images_tensor = loader.convert_mnist_numpy_to_tensor(flipped_images_array, mnist_test_labels)
# acc_mnist_flipped = tester.test(mnist_resnet_model, flipped_images_tensor)
# print(f'Test Accuracy MNIST (Flipped): {acc_mnist_flipped * 100:.2f}%')
# # Accuracy drops 50%

### Fast Gradient Sign Method
The ``fgsm_attack`` function takes three
inputs, *image* is the original clean image ($x$), *epsilon* is
the pixel-wise perturbation amount ($\epsilon$), and *data_grad*
is gradient of the loss w.r.t the input image
($\nabla_{x} J(\mathbf{\theta}, \mathbf{x}, y)$). The function
then creates perturbed image as

\begin{align}perturbed\_image = image + epsilon*sign(data\_grad)\end{align}

 \begin{align}= x + \epsilon * sign(\nabla_{x} J(\mathbf{\theta}, \mathbf{x}, y))\end{align}

In [11]:
def test_fgsm(model, test_loader, epsilon):
    correct = 0
    total = 0

    adv_examples = []
    batch = 0
    
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        batch += 1
        print(f"Batch: {batch}, Epsilon: {epsilon}, Correct: {correct}")
        for image, label in zip(images, labels):
            image = image.unsqueeze(0)
            label = label.unsqueeze(0)
            image.requires_grad = True
            output, _ = model(image)

            # print(outputs)

            _, init_pred = torch.max(output.data, 1)

            if not torch.equal(init_pred, label):
                continue
            
            loss = F.nll_loss(output, label)
            model.zero_grad()
            loss.backward()
            data_grad = image.grad.data
            perturbed_data = fgsm_attack(image, epsilon, data_grad)

            output_final, _ = model(perturbed_data)
            _, final_pred = torch.max(output_final.data, 1)
            if torch.equal(final_pred, label):
                correct += 1
                if epsilon == 0 and len(adv_examples) < 5:
                    adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                    adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
            else:
                # Save some adv examples for visualization later
                if len(adv_examples) < 5:
                    adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                    adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
            total +=1 

    accuracy = correct / total
    print(f"Epsilon: {epsilon}\tTest Accuracy = {correct} / {total} = {accuracy}")
    return accuracy, adv_examples

In [12]:
epsilons = [0, .05, .1, .15, .2, .25, .5]
accuracies = []
examples = []

# # Run test for each epsilon
# for eps in epsilons:
#     acc, ex = test_fgsm(mnist_resnet_model, test_loader_mnist, eps)
#     accuracies.append(acc)
#     examples.append(ex)

# print(accuracies)

In [13]:
# plt.figure(figsize=(5,5))
# plt.plot(epsilons, accuracies, "*-")
# plt.yticks(np.arange(0, 1.1, step=0.1))
# plt.xticks(np.arange(0, .55, step=0.05))
# plt.title("Accuracy vs Epsilon")
# plt.xlabel("Epsilon")
# plt.ylabel("Accuracy")
# plt.show()


In [14]:
# cnt = 0
# plt.figure(figsize=(8,10))
# for i in range(len(epsilons)):
#     for j in range(len(examples[i])):
#         cnt += 1
#         plt.subplot(len(epsilons),len(examples[0]),cnt)
#         plt.xticks([], [])
#         plt.yticks([], [])
#         if j == 0:
#             plt.ylabel(f"Eps: {epsilons[i]}", fontsize=14)
#         orig,adv,ex = examples[i][j]
#         plt.title(f"{orig} -> {adv}")
#         plt.imshow(ex, cmap="gray")
# plt.tight_layout()
# plt.show()

In [15]:
def test_deepfool(model, test_loader, overshoot=0.02):
    correct = 0
    total = 0

    adv_examples = []
    batch = 0
    
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        batch += 1
        print(f"Batch: {batch}, Correct: {correct}")
        for image, label in zip(images, labels):
            image = image.unsqueeze(0)
            label = label.unsqueeze(0)
            image.requires_grad = True
            output, _ = model(image)

            # print(outputs)

            _, init_pred = torch.max(output.data, 1)

            if not torch.equal(init_pred, label):
                continue
            
            perturbed_image, final_pred, r_total, iter = deepfool_attack(image, model, overshoot=0.02, max_iterations=100)
            print(f"Perturbed Iteration: {iter}")
            if torch.equal(final_pred, label):
                correct += 1
            total +=1 
            # TODO: Remove
    accuracy = correct / total
    print(f"Test Accuracy = {correct} / {total} = {accuracy}")
    return accuracy, adv_examples

In [16]:
accuracy = test_deepfool(cifar_resnet_model, test_loader_cifar10)

Batch: 1, Correct: 0
Perturbed Iteration: 22
Perturbed Iteration: 15
Perturbed Iteration: 77
Perturbed Iteration: 14
Perturbed Iteration: 66
Perturbed Iteration: 28
Perturbed Iteration: 70
Perturbed Iteration: 57
Perturbed Iteration: 18
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbed Iteration: 93
Perturbed Iteration: 32
Perturbed Iteration: 43
Perturbed Iteration: 56
Perturbed Iteration: 100
Perturbed Iteration: 48
Perturbed Iteration: 45
Perturbed Iteration: 15
Perturbed Iteration: 100
Perturbed Iteration: 41
Perturbed Iteration: 53
Perturbed Iteration: 93
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbed Iteration: 25
Perturbed Iteration: 38
Perturbed Iteration: 8
Perturbed Iteration: 56
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbed Iteration: 61
Perturbed Iteration: 79
Perturbed Iteration: 100
Perturbed Iteration: 2
Perturbed Iteration: 72
Perturbed Iteration: 100
Perturbed Iteration: 100
Perturbe