In [1]:
import numpy as np
from torch.autograd import Variable
import torch as torch
import copy
from torch.autograd.gradcheck import zero_gradients
import os
os.chdir('..')
from dataloader import *
from models.vgg import VGG11
import torch.optim as optim

In [6]:
'''https://github.com/Harry24k/CW-pytorch/blob/master/CW.ipynb'''
def adv_attack(images, labels, model, device, targeted=False, c=1e-4, kappa=0, max_iter=1000, learning_rate=0.01):

    images, labels = images.to(device), labels.to(device)

    # Define f-function
    def f(x) :

        outputs, _ = model(x)
        one_hot_labels = torch.eye(len(outputs[0]))[labels].to(device)

        i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
        j = torch.masked_select(outputs, one_hot_labels.byte().bool())
        
        # If targeted, optimize for making the other class most likely 
        if targeted :
            return torch.clamp(i-j, min=-kappa)
        
        # If untargeted, optimize for making the other class most likely 
        else :
            return torch.clamp(j-i, min=-kappa)
    
    w = torch.zeros_like(images, requires_grad=True).to(device)
    optimizer = optim.Adam([w], lr=learning_rate)
    prev = 1e10
    
    for step in range(max_iter) :

        a = 1/2*(nn.Tanh()(w) + 1)

        loss1 = nn.MSELoss(reduction='sum')(a, images)
        loss2 = torch.sum(c*f(a))

        cost = loss1 + loss2

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        # Early Stop when loss does not converge.
        if step % (max_iter//10) == 0 :
            if cost > prev :
                print('Attack Stopped due to CONVERGENCE....')
                return a
            prev = cost
        
        print('- Learning Progress : %2.2f %%        ' %((step+1)/max_iter*100), end='\r')

    attack_images = 1/2*(nn.Tanh()(w) + 1)

    return attack_images

In [9]:
def cw_attack(model, device, dataloader):

    # Accuracy counter
    correct = 0
    total = 0
    adv_examples = []
    ct_save = 0
    adv_cat = torch.tensor([])
    
    # Loop over all examples in test set
    for data, target in dataloader:
        data, target = data.to(device), target.to(device)
        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output, _ = model(data)
        init_pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
        
        if init_pred.item() != target.item():  # initially was incorrect --> no need to generate adversary
            continue

        # Call Attack
        perturbed_data = adv_attack(data, target, model, device)

        # Re-classify the perturbed image
        model.eval()
        with torch.no_grad():
            output, _ = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1]
        if final_pred.item() == target.item():
            correct += 1  # still correct
        else:  # attack is successful
            if final_pred.item() == 3:
                adv_cat = torch.cat([adv_cat, perturbed_data.detach().cpu()], dim=0)
                torch.save(adv_cat, './data/adv_cat_cw.pt')

        adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
        adv_examples.append((init_pred.item(), final_pred.item(), adv_ex))
        total += 1

    # Calculate final accuracy for this epsilon
    final_acc = correct / float(len(dataloader))
    print("Test Accuracy = {} / {} = {}".format(correct, total, final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples



In [4]:
if __name__ == '__main__':
    '''load cifar10 dataset'''
    trainloader, testloader, classes = load_cifar_data()

    '''define model'''
    print("CUDA Available: ", torch.cuda.is_available())
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    
    # Initialize the network
    model = VGG11()
    model.to(device)

    '''load saved model'''
    model.load_state_dict(torch.load('./checkpoints/model.th')['state_dict'])
    model.eval()

Files already downloaded and verified
Files already downloaded and verified
CUDA Available:  True


In [None]:
'''generate and save adversarial examples'''
accuracies = []
examples = []

acc, ex = cw_attack(model, device, testloader)

Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE....
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONVERGENCE.... 
Attack Stopped due to CONV