In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
import numpy as np

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Create a simple model
def create_model():
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Generate adversarial examples
def generate_adversarial_example(model, x, y, epsilon=0.1):
    x_tensor = tf.convert_to_tensor(x, dtype=tf.float32)
    y_tensor = tf.convert_to_tensor(y)

    with tf.GradientTape() as tape:
        tape.watch(x_tensor)
        prediction = model(x_tensor)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_tensor, prediction)

    gradient = tape.gradient(loss, x_tensor)
    adversarial_example = x_tensor + epsilon * tf.sign(gradient)
    adversarial_example = tf.clip_by_value(adversarial_example, 0, 1)  # Keep in [0,1] range
    return adversarial_example.numpy()

# Evaluate model on adversarial examples
def evaluate_adversarial(model, x, y, epsilon=0.1):
    x_adv = generate_adversarial_example(model, x, y, epsilon)
    _, accuracy = model.evaluate(x_adv, y, verbose=0)
    print(f"Accuracy on adversarial examples: {accuracy * 100:.2f}%")

# Adversarial training function
def adversarial_training(model, x_train, y_train, epochs=5, epsilon=0.1):
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for i in range(len(x_train)):
            x = x_train[i:i+1]
            y = y_train[i:i+1]
            # Generate adversarial example
            x_adv = generate_adversarial_example(model, x, y, epsilon)
            # Train on both original and adversarial examples
            model.train_on_batch(x, y)
            model.train_on_batch(x_adv, y)

# Create and train the model normally
model = create_model()
model.fit(x_train, y_train, epochs=3, validation_split=0.1)

# Evaluate on clean test data
_, clean_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Accuracy on clean test data: {clean_accuracy * 100:.2f}%")

# Evaluate on adversarial examples without adversarial training
evaluate_adversarial(model, x_test, y_test, epsilon=0.1)

# Adversarial training
adversarial_training(model, x_train, y_train, epochs=3, epsilon=0.1)

# Evaluate on clean test data after adversarial training
_, adv_trained_clean_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Accuracy on clean test data after adversarial training: {adv_trained_clean_accuracy * 100:.2f}%")

# Evaluate on adversarial examples after adversarial training
evaluate_adversarial(model, x_test, y_test, epsilon=0.1)


Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy on clean test data: 97.02%
Accuracy on adversarial examples: 13.94%
Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy on clean test data after adversarial training: 95.28%
Accuracy on adversarial examples: 73.40%
