In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision
import torchvision.transforms as transforms

import numpy as np

In [2]:
class ConvNet(nn.Module):
    def __init__(self, num_classes, return_intermediate=False, minus_max=False):
        super(ConvNet, self).__init__()
        self.return_intermediate = return_intermediate
        self.minus_max = minus_max

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d((2, 2))
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.layer3 = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(),
        )

        self.layer4 = nn.Sequential(
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        self.layer5 = nn.Sequential(
            nn.Linear(256, num_classes)
        )

        self.layers = [self.layer1, self.layer2, self.layer3, self.layer4]

    def forward(self, x):
        relus = list()

        output = self.layer1(x)
        relus.append(output)

        output = self.layer2(output)
        output = output.view(-1, 128 * 4 * 4)
        relus.append(output)

        output = self.layer3(output)
        relus.append(output)

        output = self.layer4(output)
        relus.append(output)

        logits = self.layer5(output)
        
        if self.minus_max:
            logits = logits - torch.max(logits, dim=1, keepdim=True)[0]

        if self.return_intermediate:
            return logits, relus
        else:
            return logits

In [3]:
from advertorch.attacks import GradientSignAttack
from advertorch.utils import predict_from_logits


torch.manual_seed(1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ConvNet(num_classes=10, return_intermediate=False, minus_max=False).to(device)

MODEL_PATH = 'model_best.pth.tar'
checkpoint = torch.load(MODEL_PATH)
model.load_state_dict(checkpoint['state_dict'])

model.eval()


transform = transforms.ToTensor()
testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, pin_memory=True, shuffle=False)


adversary = GradientSignAttack(
    model, loss_fn=nn.CrossEntropyLoss(reduction='sum'), eps=0.3,
    clip_min=0.0, clip_max=1.0, targeted=False)

# run an evaluation of our model against adversarial examples
correct = 0
total = 0
for xs, ys in test_loader:
    xs, ys = xs.to(device), ys.to(device)
    adv = adversary.perturb(xs, ys)
    correct += (predict_from_logits(model(adv)) == ys).cpu().detach().numpy().sum()
    total += len(xs)
    
acc = float(correct) / total
print(" [*] Model Robustness Evaluation: Accuracy %.3f" % (acc * 100))

 [*] Model Robustness Evaluation: Accuracy 9.800


In [4]:
from advertorch.attacks import GradientSignAttack
from advertorch.utils import predict_from_logits


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ConvNet(num_classes=10, return_intermediate=False, minus_max=True).to(device)


MODEL_PATH = 'model_best.pth.tar'
checkpoint = torch.load(MODEL_PATH)
model.load_state_dict(checkpoint['state_dict'])


model.eval()

transform = transforms.ToTensor()
testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, pin_memory=True, shuffle=False)


adversary = GradientSignAttack(
    model, loss_fn=nn.CrossEntropyLoss(reduction='sum'), eps=0.3,
    clip_min=0.0, clip_max=1.0, targeted=False)

# run an evaluation of our model against adversarial examples
correct = 0
total = 0
for xs, ys in test_loader:
    xs, ys = xs.to(device), ys.to(device)
    adv = adversary.perturb(xs, ys)
    correct += (predict_from_logits(model(adv)) == ys).cpu().detach().numpy().sum()
    total += len(xs)
    
acc = float(correct) / total
print(" [*] Model Robustness Evaluation: Accuracy %.3f" % (acc * 100))

 [*] Model Robustness Evaluation: Accuracy 9.800
