In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

from art.estimators.classification import PyTorchClassifier
from art.utils import load_mnist
from art.attacks.evasion import FastGradientMethod  # Import the attack method


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1)
        self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
        self.fc_2 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 10)
        x = F.relu(self.fc_1(x))
        x = self.fc_2(x)
        return x


# Step 1: Load the MNIST dataset

(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

# Step 1a: Swap axes to PyTorch's NCHW format

x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

# Step 2: Create the model

model = Net()

# Step 3: Load the model's state dict from the specified path
model_path = "mnist_model.pth"
model.load_state_dict(torch.load(model_path))
model.eval()

# Step 3a: Define the loss function and optimizer (required for PyTorchClassifier)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Step 4: Create the ART classifier
classifier = PyTorchClassifier(
    model=model,
    clip_values=(min_pixel_value, max_pixel_value),
    loss=criterion,
    optimizer=optimizer,
    input_shape=(1, 28, 28),
    nb_classes=10,
)

# Step 5: Setup TensorBoard writer with a timestamped log directory
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
log_dir = f"./runs/mnist_evaluation_{timestamp}"
writer = SummaryWriter(log_dir=log_dir)

# Step 6: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Log test accuracy to TensorBoard
writer.add_scalar("Accuracy/test", accuracy, 1)

# Step 7: Evaluate on a small batch of training data for monitoring
predictions_train = classifier.predict(x_train[:100])  # Predict on a small batch
train_accuracy = np.sum(np.argmax(predictions_train, axis=1) == np.argmax(y_train[:100], axis=1)) / len(y_train[:100])
writer.add_scalar("Accuracy/train", train_accuracy, 1)

# Step 8: Attack the model using Fast Gradient Method (FGM)
attack = FastGradientMethod(estimator=classifier, eps=0.1)
x_test_adv = attack.generate(x=x_test)  # Generate adversarial examples

# Step 9: Evaluate on adversarial examples
predictions_adv = classifier.predict(x_test_adv)
accuracy_adv = np.sum(np.argmax(predictions_adv, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy_adv * 100))

# Log adversarial accuracy to TensorBoard
writer.add_scalar("Accuracy/adversarial_test", accuracy_adv, 1)

# Close the TensorBoard writer
writer.close()

Accuracy on benign test examples: 97.94%
Accuracy on adversarial test examples: 65.36999999999999%
