In [1]:
import torch
import numpy as np
from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import PyTorchClassifier
from your_training_script import Net  # Replace with the name of your training script
import torch.nn as nn
from art.utils import load_mnist

# Step 1: Load the trained model
model = Net()
model.load_state_dict(torch.load("mnist_model.pth"))
model.eval()

# Step 2: Load preprocessing details
min_pixel_value, max_pixel_value = 0.0, 1.0  # Adjust if needed

# Step 3: Define the ART classifier
classifier = PyTorchClassifier(
    model=model,
    clip_values=(min_pixel_value, max_pixel_value),
    loss=nn.CrossEntropyLoss(),
    optimizer=None,  # Not needed for evaluation
    input_shape=(1, 28, 28),
    nb_classes=10,
)

# Step 4: Load the MNIST dataset
(_, _), (x_test, y_test), _, _ = load_mnist()
x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

# Step 5: Perform FGSM Attack
epsilon = 0.1  # Define perturbation strength
attack = FastGradientMethod(estimator=classifier, eps=epsilon)

# Generate adversarial examples
x_test_adv = attack.generate(x=x_test)

# Step 6: Evaluate the model on adversarial examples
predictions_adv = classifier.predict(x_test_adv)
accuracy_adv = np.sum(np.argmax(predictions_adv, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)

print("Accuracy on adversarial test examples (FGSM, epsilon={}): {:.2f}%".format(epsilon, accuracy_adv * 100))

# Step 7: Evaluate the model on benign test examples (optional, for comparison)
predictions_benign = classifier.predict(x_test)
accuracy_benign = np.sum(np.argmax(predictions_benign, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)

print("Accuracy on benign test examples: {:.2f}%".format(accuracy_benign * 100))


ModuleNotFoundError: No module named 'torch'

In [None]:
# run python robustness evaluation.py to run code
