In [None]:
!pip install adversarial-robustness-toolbox

In [None]:
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

# ラッパーおよびユーティリティをインポートする
from art.estimators.classification.keras import KerasClassifier
from art.utils import load_mnist

# MNISTデータセットをロードする
(X_train, y_train), (X_test, y_test), \
    min_pixel_value, max_pixel_value = load_mnist()

nb_classes=10

# 攻撃対象のモデルを定義する
model = Sequential()
model.add(Conv2D(1,kernel_size=(7, 7), activation='relu', 
                 input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Flatten())
model.add(Dense(nb_classes, activation='softmax'))
model.compile(loss=categorical_crossentropy,
              optimizer=Adam(learning_rate=0.01),
              metrics=['accuracy'])

victim_classifier = KerasClassifier(model,
                                    clip_values=(0, 1), 
                                    use_logits=False)
victim_classifier.fit(X_train, y_train, nb_epochs=5, batch_size=128)

In [None]:
# 窃取先のモデルの雛形を定義する
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', 
                 input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
model.compile(loss=categorical_crossentropy,
              optimizer=Adam(learning_rate=0.01),
              metrics=['accuracy'])

thieved_classifier = KerasClassifier(model,
                                     clip_values=(0, 1), 
                                     use_logits=False)

In [None]:
# 攻撃手法をインポートする
from art.attacks.extraction.copycat_cnn import CopycatCNN

attack = CopycatCNN(classifier=victim_classifier,
                    batch_size_fit=16,
                    batch_size_query=16,
                    nb_epochs=10,
                    nb_stolen=1000)

# 攻撃結果として訓練済のサロゲートモデルを得る
thieved_classifier = attack.extract(x=X_train,
                                    thieved_classifier=thieved_classifier)

# 結果を表示する
victim_preds = np.argmax(victim_classifier.predict(x=X_train[:100]), 
                         axis=1)
thieved_preds = np.argmax(thieved_classifier.predict(x=X_train[:100]),
                          axis=1)
acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)
print('Accuracy of the surrogate model: {}%'.format(acc * 100))

In [None]:
# 改変前のX_testに対するスコアを表示する
preds = victim_classifier.predict(X_test)
acc = np.sum(np.argmax(preds, axis=1)
             == np.argmax(y_test, axis=1)) / len(y_test)
print('\nAccuracy on benign test examples: {}%'.format(acc * 100))

# 攻撃手法をインポートする
from art.attacks.evasion import FastGradientMethod

attack = FastGradientMethod(estimator=victim_classifier, eps=.1)

# 攻撃の結果としてAdversarial Exampleを得る
X_test_adv = attack.generate(x=X_test)

# 改変後のX_testに対するスコアを表示する
preds = victim_classifier.predict(X_test_adv)
acc = np.sum(np.argmax(preds, axis=1)
             == np.argmax(y_test, axis=1)) / len(y_test)
print('\nAccuracy on adversarial test examples: {}%'.format(acc * 100))

# 生成したAdversarial Exampleをプロットする
from matplotlib import pyplot as plt
plt.matshow(X_test_adv[0, :].reshape((28, 28)))
plt.clim(0, 1)

In [None]:
# 攻撃手法をインポートする
from art.attacks.evasion.carlini import CarliniL2Method

# ターゲット型攻撃だが、ランダムなターゲットを指定することもできる
from art.utils import random_targets

# ここではL2ノルム最小化を試みる
attack = CarliniL2Method(classifier=victim_classifier,
                         targeted=True,
                         max_iter=10)
params = {'y': random_targets(y_test, victim_classifier.nb_classes)}

# 攻撃の結果としてAdversarial Exampleを得る
X_test_adv = attack.generate(x=X_test, **params)

In [None]:
import lightgbm as lgb

# ラッパーおよびユーティリティをインポートする
from art.estimators.classification import LightGBMClassifier
from art.utils import load_mnist

# MNISTデータセットをロードする
(X_train, y_train), (X_test, y_test), \
    min_pixel_value, max_pixel_value = load_mnist()

# 今回は5枚の画像にのみ摂動を加える
X_test = X_test[0:5]
y_test = y_test[0:5]

nb_samples_train = X_train.shape[0]
nb_samples_test = X_test.shape[0]
X_train = X_train.reshape((nb_samples_train, 28 * 28))
X_test = X_test.reshape((nb_samples_test, 28 * 28))

# 攻撃対象のモデルを訓練する
params = {'objective': 'multiclass',
          'metric': 'multi_logloss',
          'num_class': 10}

lgb_train = lgb.Dataset(X_train, label=np.argmax(y_train, axis=1))
lgb_test = lgb.Dataset(X_test, label=np.argmax(y_test, axis=1))
model = lgb.train(params=params, train_set=lgb_train, num_boost_round=100, 
                  valid_sets=[lgb_test])

victim_classifier = LightGBMClassifier(model=model,
                                       clip_values=(min_pixel_value, max_pixel_value))

In [None]:
# 攻撃手法をインポートする
from art.attacks.evasion import ZooAttack

attack = ZooAttack(classifier=victim_classifier,
                   confidence=0.5,
                   targeted=False,
                   learning_rate=1e-1,
                   max_iter=200,
                   binary_search_steps=100,
                   initial_const=1e-1,
                   nb_parallel=250,
                   batch_size=1,
                   variable_h=0.01)

# 攻撃の結果としてAdversarial Exampleを得る
X_test_adv = attack.generate(x=X_test)

In [None]:
# 防御手法をインポートする
from art.defences.trainer.adversarial_trainer import AdversarialTrainer

adv_tranier = AdversarialTrainer(victim_classifier, attack)
adv_tranier.fit(X_train, y_train, batch_size=100, nb_epochs=2)