In [1]:
""" File to generate an LPIPS attack in the infinity norm 
(specifically to beat the Madry challenge)

Steps to get this done:
-1) Import a buncha things 
 0) Load up my dataset, normalizer, adversarially trained net 
 1) Build attack parameters
 2) Check efficacy on small dataset 
 3) Build madry dataset 
"""
print





In [2]:
# Universal import block 
# Block to get the relative imports working 
import os
import sys 
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
import torch.nn as nn

import config
import prebuilt_loss_functions as plf
import loss_functions as lf 
import utils.pytorch_utils as utils
import utils.image_utils as img_utils
import cifar10.cifar_loader as cifar_loader
import cifar10.cifar_resnets as cifar_resnets
import adversarial_attacks as aa
import adversarial_training as advtrain
import adversarial_evaluation as adveval
import utils.checkpoints as checkpoints

In [3]:
# Block 0: load dataset, normalizer, adversarially trained net 
val_loader = cifar_loader.load_cifar_data('val', normalize=False, batch_size=128, use_gpu=True)

cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                           std=config.CIFAR10_STDS)

base_model = cifar_resnets.resnet32()
adv_trained_net = checkpoints.load_state_dict_from_filename('half_trained_madry.th', base_model)
eval_obj = adveval.AdversarialEvaluation(adv_trained_net, cifar_normer)



Files already downloaded and verified


In [8]:
# Block 1: build attack parameters 
ATTACK_KWARGS = {'l_inf_bound': 8.0/255.0, 
                 'step_size': 0.5/255.0,
                 'num_iterations': 20, 
                 'random_init': True, 
                 'signed': True, 
                 'verbose': False}
ATTACK_SPECIFIC_PARAMS = {'attack_kwargs': ATTACK_KWARGS}

def build_attack_loss(classifier, normalizer, lpips_penalty):
    """ Builds a regularized loss function for use in PGD 
    Takes in (perturbed_examples, labels) and returns 
    XEntropy(perturbed_examples, labels) + hyperparam * LPIPS(examples, perturbed_examples)
    """    
    return plf.PerceptualXentropy(classifier, normalizer=normalizer, 
                                  regularization_constant=lpips_penalty, 
                                  use_gpu=True)

attack_params = {}
penalties = [0.0, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
for penalty in penalties:
    loss_obj = build_attack_loss(adv_trained_net, cifar_normer, -penalty)
    attack_obj = aa.LInfPGD(adv_trained_net, cifar_normer, loss_obj, use_gpu=True)
    attack_param = advtrain.AdversarialAttackParameters(attack_obj, 1.0, 
                                                        attack_specific_params=ATTACK_SPECIFIC_PARAMS)
    attack_params[str(penalty)] = attack_param 

    
eval_obj.evaluate_ensemble(val_loader, attack_params, use_gpu=True, num_minibatches=10)

Starting minibatch 0...
	 (mb: 0) evaluating 10.0...
	 (mb: 0) evaluating 1.0...
	 (mb: 0) evaluating 0.1...
	 (mb: 0) evaluating 0.0...
	 (mb: 0) evaluating 0.01...
	 (mb: 0) evaluating 100.0...
	 (mb: 0) evaluating 0.001...
Starting minibatch 1...
	 (mb: 1) evaluating 10.0...
	 (mb: 1) evaluating 1.0...
	 (mb: 1) evaluating 0.1...
	 (mb: 1) evaluating 0.0...
	 (mb: 1) evaluating 0.01...
	 (mb: 1) evaluating 100.0...
	 (mb: 1) evaluating 0.001...
Starting minibatch 2...
	 (mb: 2) evaluating 10.0...
	 (mb: 2) evaluating 1.0...
	 (mb: 2) evaluating 0.1...
	 (mb: 2) evaluating 0.0...
	 (mb: 2) evaluating 0.01...
	 (mb: 2) evaluating 100.0...
	 (mb: 2) evaluating 0.001...
Starting minibatch 3...
	 (mb: 3) evaluating 10.0...
	 (mb: 3) evaluating 1.0...
	 (mb: 3) evaluating 0.1...
	 (mb: 3) evaluating 0.0...
	 (mb: 3) evaluating 0.01...
	 (mb: 3) evaluating 100.0...
	 (mb: 3) evaluating 0.001...
Starting minibatch 4...
	 (mb: 4) evaluating 10.0...
	 (mb: 4) evaluating 1.0...
	 (mb: 4) evalu

{'0.0': 0.496875,
 '0.001': 0.49609375,
 '0.01': 0.49921875,
 '0.1': 0.5,
 '1.0': 0.553125,
 '10.0': 0.74140625,
 '100.0': 0.82265625,
 'ground': 0.83359375}

In [None]:

# Step 2: l2 norms 
# --- make l2 + xentropy loss 
class L2Xentropy(lf.RegularizedLoss):
    """ Xentropy loss with a regularization based on l2 distance """

    def __init__(self, classifier, normalizer=None,
                 regularization_constant=-100.0):
        partial_xentropy = lf.PartialXentropy(classifier, normalizer=normalizer)
        l2_reg = lf.L2Regularization(None)

        super(L2Xentropy, self).__init__({'xentropy': partial_xentropy,
                                                  'l2_reg': l2_reg},
                                                  {'xentropy': 1.0,
                                                   'l2_reg':
                                                       regularization_constant})
# --- /make l2 + xentropy loss
l2_loss = L2Xentropy(adv_trained_net, normalizer=cifar_normer)
l2_attack_obj = aa.LInfPGD(adv_trained_net, cifar_normer, l2_loss, use_gpu=True)
l2_param = advtrain.AdversarialAttackParameters(l2_attack_obj, 1.0, attack_specific_params=ATTACK_SPECIFIC_PARAMS)
output = eval_obj.evaluate_ensemble(val_loader, {'l2': l2_param}, use_gpu=True)



In [None]:
mini_ex, mini_label = next(iter(val_loader))

In [None]:
from torch.autograd import Variable
# print l2_loss.losses['l2_reg'].fix_im.is_cuda
vini_ex = Variable(mini_ex.cuda(), requires_grad=True)
vini_label = Variable(mini_label.cuda(), requires_grad=False)

loss_out = l2_loss.forward(vini_ex, vini_label)

print loss_out
