In [1]:
import numpy as np
from torch.autograd import Variable
import torch as torch
import copy
from torch.autograd.gradcheck import zero_gradients
import os
os.chdir('..')
from dataloader import *
from models.vgg import VGG11

In [2]:
def adv_attack(image, label, model, I, num_classes=10, overshoot=0.02, max_iter=50):
    '''https://github.com/LTS4/DeepFool/tree/master/Python'''
    
    pert_image = copy.deepcopy(image)
    w = np.zeros(image.shape)
    r_tot = np.zeros(image.shape)

    loop_i = 0

    x = Variable(pert_image, requires_grad=True)
    fs, _ = model.forward(x)
    fs_list = [fs[0,I[k]] for k in range(num_classes)]
    k_i = label

    while k_i == label and loop_i < max_iter:

        pert = np.inf
        fs[0, I[0]].backward(retain_graph=True)
        grad_orig = x.grad.data.detach().cpu().numpy().copy()

        for k in range(1, num_classes):
            zero_gradients(x)

            fs[0, I[k]].backward(retain_graph=True)
            cur_grad = x.grad.data.detach().cpu().numpy().copy()

            # set new w_k and new f_k
            w_k = cur_grad - grad_orig
            f_k = (fs[0, I[k]] - fs[0, I[0]]).data.detach().cpu().numpy()

            pert_k = abs(f_k)/np.linalg.norm(w_k.flatten())

            # determine which w_k to use
            if pert_k < pert:
                pert = pert_k
                w = w_k

        # compute r_i and r_tot
        # Added 1e-4 for numerical stability
        r_i =  (pert+1e-4) * w / np.linalg.norm(w)
        r_tot = np.float32(r_tot + r_i)

        pert_image = image + (1+overshoot)*torch.from_numpy(r_tot).cuda()

        x = Variable(pert_image, requires_grad=True)
        fs, _ = model.forward(x)
        k_i = np.argmax(fs.data.detach().cpu().numpy().flatten())

        loop_i += 1
    
#     print(k_i, label)
    return pert_image

In [3]:
def deepfool_attack(model, device, dataloader, num_classes=10):

    # Accuracy counter
    correct = 0
    total = 0
    adv_examples = []
    ct_save = 0
    
    adv_cat = torch.tensor([])
    
    # Loop over all examples in test set
    for data, target in dataloader:
        data, target = data.to(device), target.to(device)
        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        f_image, _ = model.forward(Variable(data, requires_grad=True))
        f_image = f_image.detach().cpu().numpy().flatten()
        I = (np.array(f_image)).flatten().argsort()[::-1]

        init_pred = I[0]
        
        if init_pred.item() != target.item():  # initially was incorrect --> no need to generate adversary
            continue

        # Call Attack
        perturbed_data = adv_attack(data, init_pred, model, I, num_classes=num_classes, overshoot=0.02, max_iter=50)

        # Re-classify the perturbed image
        model.eval()
        with torch.no_grad():
            output, _ = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1]
        if final_pred.item() == target.item():
            correct += 1  # still correct
            
        else:  # attack is successful
            if final_pred.item() == 3:
                adv_cat = torch.cat([adv_cat, perturbed_data.detach().cpu()], dim=0)

        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append((init_pred.item(), final_pred.item(), adv_ex))
        total += 1
        
    torch.save(adv_cat, './data/adv_cat_deepfool.pt')
    # Calculate final accuracy
    final_acc = correct / float(len(dataloader))
    print("Test Accuracy = {} / {} = {}".format(correct, total, final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples



In [4]:
if __name__ == '__main__':
    '''load cifar10 dataset'''
    trainloader, testloader, classes = load_cifar_data()

    '''define model'''
    print("CUDA Available: ", torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    
    # Initialize the network
    model = VGG11()
    model.to(device)

    '''load saved model'''
    model.load_state_dict(torch.load('./checkpoints/model.th')['state_dict'])
    model.eval()

Files already downloaded and verified
Files already downloaded and verified
CUDA Available:  True


In [5]:
'''generate and save adversarial examples'''
accuracies = []
examples = []

acc, ex = deepfool_attack(model, device, testloader)

Test Accuracy = 0 / 7867 = 0.0
