In [1]:
%run pretrained-model.ipynb

CUDA Available:  True
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): Softmax(dim=1)
  )
)


In [2]:
# FGSM attack 
def fgsm(image, label, epsilon=0.1):
    """Perform FGSM attack on an image

    image:     The input image X
    label:     The image label y
    epsilon:   The adversarial perturbation size
    
    return:    The adversatial image X*
    """ 
    
    # Setting this is important for the attack
    image.requires_grad = True
    
    # Obtain the gradients
    output = model(image.to(device))
    loss = F.nll_loss(output, label)
    model.zero_grad()
    loss.backward()

    # Create the perturbed image by adjusting each pixel of the input image
    image = image + epsilon * image.grad.data.sign()
    
    return torch.clamp(image, 0, 1).detach() # Return the clipped image

In [3]:
correct = 0
test_loader_iter = iter(test_loader)

for j in range(1000):
    image, label = next(test_loader_iter)
    
    initial_prediction = torch.argmax(model(image.to(device))).item()
    # Don't bother attacking if the image is already misclassified
    if initial_prediction != label:
        continue

    # Generate FGSM adversarial example
    adv_image = fgsm(image   = image.to(device), 
                     label   = label.to(device), 
                     epsilon = 0.1)

    final_prediction = torch.argmax(model(adv_image)).item()

    # Correct if the prediction is the target label
    if final_prediction != label:
        correct += 1
    
# Calculate final accuracy for this epsilon    
accuracy = correct/1000

In [4]:
accuracy

0.736