In [1]:
import tensorflow as tf

import numpy as np

from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import TensorFlowV2Classifier

In [2]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [3]:
mnist_model = tf.keras.models.load_model('models/mnist_model')

In [4]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [5]:
classifier = TensorFlowV2Classifier(
    model=mnist_model,
    nb_classes=10,
    input_shape=mnist_model.input_shape,
    loss_object=loss_fn
)

In [6]:
attack = FastGradientMethod(estimator=classifier)

In [7]:
x_test_adv = attack.generate(x=x_test)
x_train_adv = attack.generate(x=x_train)

In [8]:
mnist_model.fit(x_train_adv, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x149b7972828>

In [9]:
mnist_model.evaluate(x_test_adv,  y_test, verbose=2)

313/313 - 0s - loss: 0.3095 - accuracy: 0.9746


[0.3094581961631775, 0.9746000170707703]

In [10]:
classifier2 = TensorFlowV2Classifier(
    model=mnist_model,
    nb_classes=10,
    input_shape=mnist_model.input_shape,
    loss_object=loss_fn
)

In [11]:
predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))

Accuracy on adversarial test examples: 97.46000000000001%


In [12]:
mnist_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.6880 - accuracy: 0.7808


[0.6880194544792175, 0.7807999849319458]

In [13]:
x_test_all = np.concatenate((x_test, x_test_adv))
x_train_all = np.concatenate((x_train, x_train_adv))
y_test_all = np.concatenate((y_test, y_test))
y_train_all = np.concatenate((y_train, y_train))

In [14]:
all_model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

predictions = all_model(x_train[:1]).numpy()
predictions

tf.nn.softmax(predictions).numpy()

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()

all_model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])


In [15]:
all_model.fit(x_train_all, y_train_all, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x149ef005cc0>

In [16]:
all_model.evaluate(x_test_adv,  y_test, verbose=2)
all_model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - loss: 0.3441 - accuracy: 0.9775
313/313 - 0s - loss: 0.0868 - accuracy: 0.9753


[0.08676230162382126, 0.9753000140190125]