In [1]:
import torch
import torch.backends.cudnn as cudnn
from torchvision import transforms, datasets


from networks.resnet_big import SupConResNet,LinearClassifier
from losses import SupConLoss
from adv_train import PGDCons , PGDConsMulti

from main_ce import set_loader
from adv_train import PGDAttack
import torch.optim as optim
import torch.nn as nn
from torchvision import transforms, datasets

from autoattack import AutoAttack

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

In [2]:
def set_model_linear():
    model = SupConResNet(name='resnet18')
    criterion = torch.nn.CrossEntropyLoss()

    classifier = LinearClassifier(name='resnet18', num_classes=10)

    ckpt = torch.load('save/SupCon/cifar10_models/SupCon_cifar10_resnet18_lr_0.1_decay_0.0005_bsz_300_temp_0.07_trial_0,_pgdMultiTrue_pgd_train_steps10_normalizeout_cosine_warm/last.pth',
    map_location='cpu')
    state_dict = ckpt['model']
    classifier_state = torch.load('classifier_trades_NormalizeOut_bsz256.pth', map_location='cpu' )
    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            model.encoder = torch.nn.DataParallel(model.encoder, device_ids = [0,1])
        else:
            new_state_dict = {}
            for k, v in state_dict.items():
                k = k.replace("module.", "")
                new_state_dict[k] = v
            state_dict = new_state_dict
        model = model.cuda()
        classifier = classifier.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True


        model.load_state_dict(state_dict)
        classifier.load_state_dict(classifier_state)

    return model, classifier, criterion


def set_loader_linear():

    mean = (0.4914, 0.4822, 0.4465)
    std = (0.2023, 0.1994, 0.2010)

    normalize = transforms.Normalize(mean=mean, std=std)

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        # normalize,
    ])

    val_transform = transforms.Compose([
        transforms.ToTensor(),
        # normalize,
    ])

    train_dataset = datasets.CIFAR10(root='./datasets/',
                                        transform=train_transform,
                                        download=True)
    val_dataset = datasets.CIFAR10(root='./datasets/',
                                    train=False,
                                    transform=val_transform)


    train_sampler = None
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=16, shuffle=(train_sampler is None),
        num_workers=8, pin_memory=True, sampler=train_sampler)
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1000, shuffle=False,
        num_workers=8, pin_memory=True)

    return train_loader, val_loader


def set_optimizer( model):
    optimizer = optim.SGD(model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0005)
    return optimizer

In [3]:
class ClassifierModel(nn.Module):
    """Linear classifier"""
    def __init__(self, encoder, linearClassifier):
        super(ClassifierModel, self).__init__()
        self.encoder = encoder
        self.linearClassifier = linearClassifier
        self.mu = torch.Tensor([0.4914, 0.4822, 0.4465]).float().view(3, 1, 1).cuda()
        self.sigma = torch.Tensor((0.2023, 0.1994, 0.2010)).float().view(3, 1, 1).cuda()

    def forward(self, x):
        x = (x - self.mu) / self.sigma
        return self.linearClassifier(self.encoder(x))

In [4]:
train_loader, val_loader = set_loader_linear()

# build model and criterion
model, classifier, criterion = set_model_linear()

# build optimizer
optimizer = set_optimizer(classifier)

# test_attack = PGDAttack(model, classifier, eps=8./255., alpha = 2./255., steps=50)

CModel = ClassifierModel(model.encoder, classifier)

Files already downloaded and verified


In [5]:
import os
save_dir = './results'
# create save dir
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# load attack    

adversary = AutoAttack(CModel, norm='Linf', eps=8./255., version='standard', log_path='./log_file.txt')
adversary.attacks_to_run = ['apgd-ce']
l = [x for (x, y) in val_loader]
x_test = torch.cat(l, 0)
l = [y for (x, y) in val_loader]
y_test = torch.cat(l, 0)

# example of custom version
# if version == 'custom':
#     adversary.attacks_to_run = ['apgd-ce', 'fab']
    # adversary.apgd.n_restarts = 2
    # adversary.fab.n_restarts = 2

# run attack and save images
with torch.no_grad():
        adv_complete = adversary.run_standard_evaluation(x_test, y_test,bs=500)

setting parameters for standard version
using standard version including apgd-ce
initial accuracy: 79.16%
apgd-ce - 1/16 - 195 out of 500 successfully perturbed
apgd-ce - 2/16 - 216 out of 500 successfully perturbed
apgd-ce - 3/16 - 208 out of 500 successfully perturbed
apgd-ce - 4/16 - 206 out of 500 successfully perturbed
apgd-ce - 5/16 - 209 out of 500 successfully perturbed
apgd-ce - 6/16 - 195 out of 500 successfully perturbed
apgd-ce - 7/16 - 192 out of 500 successfully perturbed
apgd-ce - 8/16 - 195 out of 500 successfully perturbed
apgd-ce - 9/16 - 222 out of 500 successfully perturbed
apgd-ce - 10/16 - 208 out of 500 successfully perturbed
apgd-ce - 11/16 - 211 out of 500 successfully perturbed
apgd-ce - 12/16 - 213 out of 500 successfully perturbed
apgd-ce - 13/16 - 205 out of 500 successfully perturbed
apgd-ce - 14/16 - 212 out of 500 successfully perturbed
apgd-ce - 15/16 - 202 out of 500 successfully perturbed
apgd-ce - 16/16 - 170 out of 416 successfully perturbed
robust 

In [8]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        # calculate outputs by running images through the network
        outputs = CModel(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 84 %


In [10]:
test_attack.set_normalization_used(mean = (0.4914, 0.4822, 0.4465), std = (0.2023, 0.1994, 0.2010))

In [12]:
images, labels = next(iter(train_loader))
images = images.cuda(non_blocking=True)
labels = labels.cuda(non_blocking=True)
bsz = labels.shape[0]
adv_images = test_attack(images, labels)

In [12]:
with torch.no_grad():
    features = model.encoder(images)
output = classifier(features.detach())
loss = criterion(output, labels)
loss

tensor(2.3043, device='cuda:0', grad_fn=<NllLossBackward0>)

In [13]:
with torch.no_grad():
    features = model.encoder(adv_images)
output = classifier(features.detach())
loss = criterion(output, labels)
loss

tensor(2.3116, device='cuda:0', grad_fn=<NllLossBackward0>)

In [13]:
output = CModel(images)
loss = criterion(output, labels)
loss

tensor(0.7145, device='cuda:0', grad_fn=<NllLossBackward0>)

In [14]:
output = CModel(adv_images)
loss = criterion(output, labels)
loss

tensor(0.7714, device='cuda:0', grad_fn=<NllLossBackward0>)

In [6]:
features = model(images)
f1, f2 = torch.split(features, [bsz, bsz], dim=0) #f1 and f2 -> torch.Size([bsz, 128]
features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) #torch.Size([bsz, 2 (view), 128(feature dim)])
loss = criterion(features, labels)
loss

tensor(3.7763, device='cuda:0', grad_fn=<MeanBackward0>)

In [24]:
features = model(images)
features.shape

torch.Size([16, 128])

In [9]:
multi_atc = PGDConsMulti(model, eps=8./255, alpha=2./225, steps=10, random_start=True)

In [10]:
attacks = multi_atc(images, labels, loss = criterion)

device:  cuda:0


In [11]:
len(attacks)

10

In [37]:
images.shape

torch.Size([16, 3, 32, 32])

In [12]:
attacks[0].shape

torch.Size([16, 3, 32, 32])

In [None]:
features = model(images)
f1, f2 = torch.split(features, [bsz, bsz], dim=0) #f1 and f2 -> torch.Size([bsz, 128]
features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) #torch.Size([bsz, 2 (view), 128(feature dim)])
loss = criterion(features, labels)

In [13]:
attacks.append(images)
len(attacks)

11

In [14]:
attacks = torch.cat(attacks, dim=0)

In [16]:
attacks.shape

torch.Size([176, 3, 32, 32])

In [17]:
features = model(attacks)
features.shape

torch.Size([176, 128])

In [18]:
pgd_steps = 10

In [19]:
fs = torch.split(features, [bsz for i in range(2*pgd_steps + 2)], dim=0)

In [20]:
fs[0].shape

torch.Size([8, 128])

In [21]:
features = torch.cat([f.unsqueeze(1) for f in fs], dim=1) #torch.Size([bsz, 2 (view), 128(feature dim)])
loss = criterion(features, labels)
loss

tensor(7.4490, device='cuda:0', grad_fn=<MeanBackward0>)

In [22]:
features.shape

torch.Size([8, 22, 128])