## 1. Requirement

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from art.attacks import FastGradientMethod
from art.attacks import DeepFool
from art.attacks import SaliencyMapMethod
from art.attacks import ProjectedGradientDescent
from art.classifiers import PyTorchClassifier
from art.utils import load_mnist

## 2. Model

In [2]:
class MnistModel(nn.Module):
    def __init__(self):
        super(MnistModel, self).__init__()
        # mnist의 경우 28*28의 흑백이미지(input channel=1)이다.
        self.conv1 = nn.Conv2d(1, 32, kernel_size = 5, padding=2)
        # feature map의 크기는 14*14가 된다
        # 첫번재 convolution layer에서 나온 output channel이 32이므로 2번째 input도 32
        self.conv2 = nn.Conv2d(32, 64, kernel_size = 5, padding=2)
        # feature map의 크기는 7*7이 된다
        # fc -> fully connected, fc는 모든 weight를 고려해서 만들기 때문에 cnn에서는 locally connected를 이용하여 만든다.
        # nn.Linear에서는 conv를 거친 feature map을 1차원으로 전부 바꿔서 input을 한다. 이게 64*7*7
        self.fc1 = nn.Linear(64*7*7, 1024)
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 64*7*7) # linear에 들어갈 수 있도록 reshape
        x = F.relu(self.fc1(x)) # fully connected에 relu 적용
        x = F.dropout(x, training=self.training) # 가중치 감소만으로는 overfit을 해결하기가 어려움, 그래서 뉴런의 연결을 임의로 삭제
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

## 3. Load MNIST dataset

In [3]:
(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

## 4. Load the models

In [4]:
is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')
if is_cuda: print("CUDA available!")

CUDA available!


In [5]:
models = []

model_path = './model/mnist_um_art.pth'
model = MnistModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
models.append(model)

print("clean model loaded")

clean model loaded


In [6]:
model_path = './model/mnist_fgsm_art.pth'
model = MnistModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
models.append(model)

print("Linf trained model1 loaded")

Linf trained model1 loaded


In [7]:
model_path = './model/mnist_pgd_art.pth'
model = MnistModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
models.append(model)

print("Linf trained model2 loaded")

Linf trained model2 loaded


In [8]:
model_path = './model/mnist_jsma_art.pth'
model = MnistModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
models.append(model)

print("L0 trained model loaded")

L0 trained model loaded


In [9]:
model_path = './model/mnist_deepfool_art.pth'
model = MnistModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
models.append(model)

print("L2 trained model loaded")

L2 trained model loaded


## 5. Create the ART classifier

In [10]:
criterion = nn.CrossEntropyLoss()

# 0-um, 1-fgsm, 2-fgsm, 3-pgd, 4-jsma, 5-deepfool
classifiers = []
for model in models:
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
    classifier = PyTorchClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), loss=criterion,
                               optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10)
    classifiers.append(classifier)

print("classifiers created")

classifiers created


## 6. Evaluate classifier on benign test examples

In [11]:
benign_accuracy = []

print('model #:\t0-um, 1-fgsm, 2-fgsm, 3-pgd, 4-jsma, 5-deepfool\n')
for idx, (classifer) in enumerate(classifiers):
    predictions = classifier.predict(x_test)
    accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
    print('model #{}\tAccuracy on benign test examples: {}%'.format(idx, accuracy * 100))
    benign_accuracy.append(accuracy)

model #:	0-um, 1-fgsm, 2-fgsm, 3-pgd, 4-jsma, 5-deepfool

model #0	Accuracy on benign test examples: 99.26%
model #1	Accuracy on benign test examples: 99.25%
model #2	Accuracy on benign test examples: 99.15%
model #3	Accuracy on benign test examples: 99.27%
model #4	Accuracy on benign test examples: 99.27%


## 7. Generate adversarial test examples

In [12]:
adv_attacks = []
clean = classifiers[0]

Linf1_attack = FastGradientMethod(classifier=clean, eps=0.3)
Linf1_x_test_adv = Linf1_attack.generate(x=x_test)
adv_attacks.append(Linf1_x_test_adv)

print('FGSM example generated')

FGSM example generated


In [13]:
Linf2_attack = ProjectedGradientDescent(classifier=clean, eps=0.3)
Linf2_x_test_adv = Linf2_attack.generate(x=x_test)
adv_attacks.append(Linf2_x_test_adv)

print('PGD example generated')

PGD example generated


In [14]:
L0_attack = SaliencyMapMethod(classifier=clean)
L0_x_test_adv = L0_attack.generate(x=x_test)
adv_attacks.append(L0_x_test_adv)

print('JSMA example generated')

JSMA example generated


In [15]:
L2_attack = DeepFool(classifier=clean)
L2_x_test_adv = L2_attack.generate(x=x_test)
adv_attacks.append(L2_x_test_adv)

print('Deepfool example generated')

Deepfool example generated


## 8. Evaluate each models accuracy

In [18]:
models_accuracy = []

print('model #:\t0-um, 1-fgsm, 2-fgsm, 3-pgd, 4-jsma, 5-deepfool')
print('test #:\t0-fgsm, 1-pgd, 2-jsma, 3-deepfool')

# test order: 0-fgsm, 1-pgd, 2-jsma, 3-deepfool
for i, (classifier) in enumerate(classifiers):
    print('\nmodel #{}'.format(i))
    model_accuracy = []
    for j, (x_test_adv) in enumerate(adv_attacks):
        predictions = classifier.predict(x_test_adv)
        accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
        print('Accuracy on model with adversarial test #{} examples: {}%'.format(j, accuracy * 100))
        model_accuracy.append(accuracy)
    models_accuracy.append(model_accuracy)

model #:	0-um, 1-fgsm, 2-fgsm, 3-pgd, 4-jsma, 5-deepfool
test #:	0-fgsm, 1-pgd, 2-jsma, 3-deepfool

model #0
Accuracy on model with adversarial test #0 examples: 3.02%
Accuracy on model with adversarial test #1 examples: 0.74%
Accuracy on model with adversarial test #2 examples: 52.959999999999994%
Accuracy on model with adversarial test #3 examples: 40.550000000000004%

model #1
Accuracy on model with adversarial test #0 examples: 98.63%
Accuracy on model with adversarial test #1 examples: 85.88%
Accuracy on model with adversarial test #2 examples: 48.05%
Accuracy on model with adversarial test #3 examples: 79.19%

model #2
Accuracy on model with adversarial test #0 examples: 86.50999999999999%
Accuracy on model with adversarial test #1 examples: 98.59%
Accuracy on model with adversarial test #2 examples: 68.92%
Accuracy on model with adversarial test #3 examples: 86.66%

model #3
Accuracy on model with adversarial test #0 examples: 11.31%
Accuracy on model with adversarial test #1 ex

## 9. Result

In [20]:
import pandas as pd
from pandas import DataFrame

data = {
    'clean': models_accuracy[0],
    'Linf1': models_accuracy[1],
    'Linf2': models_accuracy[2],
    'L0': models_accuracy[3],
    'L2': models_accuracy[4],
}

columns = ['clean', 'Linf1', 'Linf2', 'L0', 'L2']
idx = ['FGSM', 'PGD', 'JSMA', 'Deepfool']
DataFrame(data, columns=columns, index=idx)

Unnamed: 0,clean,Linf1,Linf2,L0,L2
FGSM,0.0302,0.9863,0.8651,0.1131,0.8408
PGD,0.0074,0.8588,0.9859,0.0314,0.6292
JSMA,0.5296,0.4805,0.6892,0.7189,0.7995
Deepfool,0.4055,0.7919,0.8666,0.6754,0.9874
