In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
from six.moves import xrange

import logging
import tensorflow as tf
from tensorflow.python.platform import flags

from cleverhans.utils_mnist import data_mnist
from cleverhans.utils import to_categorical
from cleverhans.utils import set_log_level
from cleverhans.utils_tf import model_train, model_eval, batch_eval
from cleverhans.attacks import FastGradientMethod
from cleverhans.attacks_tf import jacobian_graph, jacobian_augmentation

from cleverhans_tutorials.tutorial_models import make_basic_cnn, MLP
from cleverhans_tutorials.tutorial_models import Flatten, Linear, ReLU, Softmax
from cleverhans.utils import TemporaryLogLevel

  from ._conv import register_converters as _register_converters


In [2]:
def setup_tutorial():
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    return True

In [3]:
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
              nb_epochs, batch_size, learning_rate, rng):
    # CNNモデルの定義(これはblackbox modelとして扱う)
    model = make_basic_cnn()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    # MNISTのデータで学習する
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    model_train(sess, x, y, predictions, X_train, Y_train, args=train_params, rng=rng)

    # 正当なMNISTデータに対する評価
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params)
    print('Test accuracy of black-box on legitimate test examples: ' + str(accuracy))

    return model, predictions, accuracy

In [4]:
def substitute_model(img_rows=28, img_cols=28, nb_classes=10):
    # 攻撃者側が用意するモデル
    input_shape = (None, img_rows, img_cols, 1)
    # 一般的なDNNモデル
    layers = [Flatten(),
              Linear(200),
              ReLU(),
              Linear(200),
              ReLU(),
              Linear(nb_classes),
              Softmax()]

    return MLP(layers, input_shape)

In [5]:
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda,
              rng):
    # 攻撃者が用意したモデルを定義
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            model_train(sess, x, y, preds_sub, X_sub,
                        to_categorical(Y_sub, nb_classes),
                        init_all=False, args=train_params, rng=rng)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          lmbda_coef * lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': batch_size}
            bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev],
                                  args=eval_params)[0]
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub

In [6]:
def mnist_blackbox(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_classes=10, batch_size=128,
                   learning_rate=0.001, nb_epochs=1, holdout=150, data_aug=6,
                   nb_epochs_s=1, lmbda=0.1):

    set_log_level(logging.DEBUG)
    accuracies = {}
    assert setup_tutorial()

    sess = tf.Session()

    # Get MNIST data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    X_sub = X_test[:holdout]
    Y_sub = np.argmax(Y_test[:holdout], axis=1)
    X_test = X_test[holdout:]
    Y_test = Y_test[holdout:]

    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    rng = np.random.RandomState([2017, 8, 30])

    # blackboxモデルのシミュレート
    # prep_bbox内部では学習しているが、実際にはApplicationなどの戻り値から
    print("Preparing the black-box model.")
    prep_bbox_out = prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
                              nb_epochs, batch_size, learning_rate,
                              rng=rng)
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # 攻撃者側のモデルで学習
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, X_sub, Y_sub,
                              nb_classes, nb_epochs_s, batch_size,
                              learning_rate, data_aug, lmbda, rng=rng)
    model_sub, preds_sub = train_sub_out

    # Evaluate the substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, X_test, Y_test, args=eval_params)
    accuracies['sub'] = acc

    # 攻撃はFast Gradient Sign Method (FGSM) を利用する
    # また、攻撃者が用意したモデルでAdversarial Examplesを生成する
    fgsm_par = {'eps': 0.3, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1.}
    fgsm = FastGradientMethod(model_sub, sess=sess)
    eval_params = {'batch_size': batch_size}
    x_adv_sub = fgsm.generate(x, **fgsm_par)

    # 攻撃者が用意したモデルで作成したAdversarial Exsamplesでblackboxモデルを評価する
    accuracy = model_eval(sess, x, y, model(x_adv_sub), X_test, Y_test, args=eval_params)
    print('Test accuracy of oracle on adversarial examples generated using the substitute: ' + str(accuracy))
    accuracies['bbox_on_sub_adv_ex'] = accuracy

    return accuracies

In [None]:
mnist_blackbox()

Extracting /tmp/train-images-idx3-ubyte.gz
Extracting /tmp/train-labels-idx1-ubyte.gz
Extracting /tmp/t10k-images-idx3-ubyte.gz
Extracting /tmp/t10k-labels-idx1-ubyte.gz
X_train shape: (60000, 28, 28, 1)
X_test shape: (10000, 28, 28, 1)
Preparing the black-box model.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Defined TensorFlow model graph.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



[INFO 2018-05-30 06:21:29,915 cleverhans] Epoch 0 took 122.29941654205322 seconds
[INFO 2018-05-30 06:21:29,916 cleverhans] Completed model training.


Test accuracy of black-box on legitimate test examples: 0.9825380710659899
Training the substitute model.
Defined TensorFlow model graph for the substitute.
Substitute training epoch #0


[INFO 2018-05-30 06:21:33,905 cleverhans] Epoch 0 took 0.033161163330078125 seconds
[INFO 2018-05-30 06:21:33,909 cleverhans] Completed model training.


Augmenting substitute training data.
Labeling substitute training data.
Substitute training epoch #1


[INFO 2018-05-30 06:21:38,686 cleverhans] Epoch 0 took 0.03509664535522461 seconds
[INFO 2018-05-30 06:21:38,688 cleverhans] Completed model training.


Augmenting substitute training data.
Labeling substitute training data.
Substitute training epoch #2


[INFO 2018-05-30 06:21:50,994 cleverhans] Epoch 0 took 0.0447850227355957 seconds
[INFO 2018-05-30 06:21:50,996 cleverhans] Completed model training.


Augmenting substitute training data.
Labeling substitute training data.
Substitute training epoch #3


[INFO 2018-05-30 06:22:26,987 cleverhans] Epoch 0 took 0.07931804656982422 seconds
[INFO 2018-05-30 06:22:26,988 cleverhans] Completed model training.


Augmenting substitute training data.
Labeling substitute training data.
Substitute training epoch #4


[INFO 2018-05-30 06:28:07,103 cleverhans] Epoch 0 took 0.42368578910827637 seconds
[INFO 2018-05-30 06:28:07,122 cleverhans] Completed model training.


Augmenting substitute training data.
