In [1]:
import keras
from keras.datasets import mnist
from keras.models import model_from_json
from keras.utils import np_utils
from keras.optimizers import RMSprop

from keras import backend as K

import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
def generate_p(x, label, model):
    class_output = model.output[:, int(label)]
    
    grads = K.gradients(class_output, model.input)[0]
    gradient_function = K.function([model.input], [grads])

    grads_val = gradient_function([x.reshape(1, 784)])
    
    p = np.sign(grads_val)
    
    return p

In [3]:
def generate_adv(x, label, model, eps):
    p = generate_p(x, label, model)
    adv = (x - eps*p).clip(min=0, max=1).reshape(1, 784)
    
    return adv

In [4]:
def predict(x, model):
    pred = model.predict(x.reshape(1,784), batch_size=1)
    pred_class = np.argmax(pred)
    pred_per = max(pred[0])
    
    return pred_class, pred_per

In [5]:
eps = 0.3

In [6]:
num_classes = 10

In [7]:
max_n = 100

In [8]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784).astype('float32') / 255
X_test  = X_test.reshape(10000, 784).astype('float32') / 255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

y_train_catego = keras.utils.to_categorical(y_train, num_classes)
y_test_catego = keras.utils.to_categorical(y_test, num_classes)

60000 train samples
10000 test samples


In [9]:
# モデルを読み込む
model = model_from_json(open('mnist_mlp_model.json').read())

# 学習結果を読み込む
model.load_weights('mnist_mlp_weights.h5')

model.summary();

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

W0108 12:04:11.156069 140736235160512 deprecation_wrapper.py:119] From /Users/ryuto/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0108 12:04:11.181239 140736235160512 deprecation_wrapper.py:119] From /Users/ryuto/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0108 12:04:11.204370 140736235160512 deprecation_wrapper.py:119] From /Users/ryuto/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:131: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0108 12:04:11.205335 140736235160512 deprecation_wrapper.py:119] From /Users/ryuto/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placehol

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [10]:
score = model.evaluate(X_test, y_test_catego, verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 0.10931812194355357
Test accuracy : 0.9842


In [11]:
def generate_adv_list(x_list, y_list, model, eps):
    adv_list = []
    
    for x, y in zip(x_list, y_list):
        adv = generate_adv(x, y, model, eps).reshape(784)
        adv_list.append(adv)
        
    return np.array(adv_list)

In [12]:
adv_test = generate_adv_list(X_test[:max_n], y_test[:max_n], model, eps)
adv_test.shape

(100, 784)

In [13]:
score = model.evaluate(adv_test, y_test_catego[:max_n], verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 15.22770034790039
Test accuracy : 0.0


In [14]:
score = model.evaluate(X_test[:max_n], y_test_catego[:max_n], verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 4.2783838143805045e-05
Test accuracy : 1.0


In [15]:
adv_train = generate_adv_list(X_train[:max_n], y_train[:max_n], model, eps)
adv_train.shape

(100, 784)

In [17]:
batch_size = 128
epochs = 20

In [21]:
history = model.fit(adv_train, y_train_catego[:max_n],
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_test, y_test_catego))
score = model.evaluate(X_test, y_test_catego, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model_json_str = model.to_json()
open('advs_train_model.json', 'w').write(model_json_str)
model.save_weights('advs_train_weights.h5');
print('model saved.')


W0108 12:49:36.702812 140736235160512 deprecation.py:323] From /Users/ryuto/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 100 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss: 0.3247232620380819
Test accuracy: 0.917
model saved.


In [22]:
score = model.evaluate(X_test, y_test_catego, verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 0.3247232620380819
Test accuracy : 0.917


In [23]:
score = model.evaluate(adv_test, y_test_catego[:max_n], verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 4.980520210266113
Test accuracy : 0.38


In [24]:
score = model.evaluate(X_test[:max_n], y_test_catego[:max_n], verbose=0)
print('Test loss :', score[0])
print('Test accuracy :', score[1])

Test loss : 0.2931195068359375
Test accuracy : 0.92
