In [2]:
import numpy as np
from torch.autograd import Variable
import torch as torch
import copy
from torch.autograd.gradcheck import zero_gradients
import os
os.chdir('..')
from dataloader import *
from models.vgg import VGG11
import torch.optim as optim
import random

In [242]:
'''https://github.com/ast0414/adversarial-example/blob/master/craft.py'''
def compute_jacobian(inputs, output):
    num_classes = output.size()[1]

    jacobian = torch.zeros(num_classes, *inputs.size()).cuda()
    
    for i in range(num_classes):
        zero_gradients(inputs)
        output[0, i].backward(retain_graph=True)
        jacobian[i] = inputs.grad.data

    return torch.transpose(jacobian, dim0=0, dim1=1)


def saliency_map(jacobian, search_space, target_index):
    jacobian = jacobian.squeeze(0)
    alpha = jacobian[target_index]
    beta = jacobian.sum(0) - alpha

    mask1 = torch.ge(alpha, 0.0)
    mask2 = torch.le(beta, 0.0)

    mask = torch.mul(torch.mul(mask1, mask2), search_space)

    saliency_map = torch.mul(torch.mul(alpha, torch.abs(beta)), mask.float())
    saliency_map = saliency_map.sum(0).sum(0)
    
    row_idx, col_idx = (saliency_map == torch.max(saliency_map)).nonzero()[0]
    return row_idx, col_idx


def adv_attack(model, input_tensor, source_class, target_class, max_iter=100, clip_min=0.0, clip_max=1.0):

    # Make a clone since we will alter the values
    input_features = copy.deepcopy(input_tensor)
    x = Variable(input_features, requires_grad=True)
    count = 0

    output, _ = model.forward(x)
    _, source_class = torch.max(output.data, 1)
    
    # if attack is successful or reach the maximum number of iterations
    while (count < max_iter) and (source_class.item() != target_class.item()):
        
        search_space = (x.data[0] > clip_min) & (x.data[0] < clip_max)
        
        # Calculate Jacobian
        jacobian = compute_jacobian(x, output)
        
        # get the highest saliency map's index
        row_idx, col_idx = saliency_map(jacobian, search_space, target_class)
        
        # increase to its maximum value
        x.data[0, :, row_idx, col_idx] = clip_max
        
        # recompute prediction
        output, _ = model.forward(x)
        source_class = output.max(1, keepdim=True)[1]

        count += 1

    return x

In [243]:
def jsma_attack(model, device, dataloader):

    # Accuracy counter
    correct = 0
    total = 0
    adv_examples = []
    ct_save = 0
    adv_cat = torch.tensor([])
    
    # Loop over all examples in test set
    for data, target in dataloader:
        data, target = data.to(device), target.to(device)
        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True
        
        # Forward pass the data through the model
        output, _ = model.forward(data)
        init_pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
        
        if init_pred.item() != target.item():  # initially was incorrect --> no need to generate adversary
            continue
        
        # Call Attack
        # randomly select a target class
        target_class = init_pred
        while target_class == init_pred:
            target_class = torch.randint(0, output.size()[1], (1,)).to(device)
        perturbed_data = adv_attack(model, data, source_class=init_pred, target_class=target_class)
    
        # Re-classify the perturbed image
        model.eval()
        with torch.no_grad():
            output, _ = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1]
        if final_pred.item() == target.item():
            correct += 1  # still correct
        else:  # attack is successful
            if final_pred.item() == 3:
                adv_cat = torch.cat([adv_cat, perturbed_data.detach().cpu()], dim=0)
                torch.save(adv_cat, './data/adv_cat_jsma.pt')

        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append((init_pred.item(), final_pred.item(), adv_ex))
        total += 1

    # Calculate final accuracy for this epsilon
    final_acc = correct / float(len(dataloader))
    print("Test Accuracy = {} / {} = {}".format(correct, total, final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples



In [233]:
if __name__ == '__main__':
    '''load cifar10 dataset'''
    trainloader, testloader, classes = load_cifar_data()

    '''define model'''
    print("CUDA Available: ", torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    
    # Initialize the network
    model = VGG11()
    model.to(device)

    '''load saved model'''
    model.load_state_dict(torch.load('./checkpoints/model.th')['state_dict'])
    model.eval()

Files already downloaded and verified
Files already downloaded and verified
CUDA Available:  True


In [244]:
'''generate and save adversarial examples'''
accuracies = []
examples = []

acc, ex = jsma_attack(model, device, testloader)

Test Accuracy = 507 / 7867 = 0.0507
