In [36]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [37]:
class SoftmaxRegression:
    def __init__(self, learning_rate=0.01, num_iters=1000, batch_size=100):
        self.learning_rate = learning_rate
        self.num_iters = num_iters
        self.batch_size = batch_size
        self.weights = None
        self.bias = None

    def softmax(self, logits):
        exps = np.exp(logits)
        return exps / np.sum(exps, axis=1, keepdims=True)

    def fit(self, X, y):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))
        y_hot = np.eye(num_classes)[y]

        self.weights = np.random.randn(num_features, num_classes)
        self.bias = np.zeros(num_classes)

        for _ in range(self.num_iters):
            # Mini-batch gradient descent
            for i in range(0, num_samples, self.batch_size):
                X_batch = X[i:i+self.batch_size]
                y_batch = y_hot[i:i+self.batch_size]

                scores = np.dot(X_batch, self.weights) + self.bias
                probs = self.softmax(scores)

                d_weights = np.dot(X_batch.T, (probs - y_batch)) / self.batch_size
                d_bias = np.mean(probs - y_batch, axis=0)

                self.weights -= self.learning_rate * d_weights
                self.bias -= self.learning_rate * d_bias

    def predict(self, X):
        scores = np.dot(X, self.weights) + self.bias
        probs = self.softmax(scores)
        return np.argmax(probs, axis=1)

    def score(self, X, y):
        '''
        Parameters
        ----------
        X: array of inputs of size [num_samples, num_features]
        y: array of targets of size [num_samples]

        Use trained model to compute predictions for given inputs. Use the provided labels to compute accuracy.
        '''
        scores = np.dot(X, self.weights) + self.bias
        probs = self.softmax(scores)
        return np.count_nonzero(np.argmax(probs, axis=1) == y) / len(y)
    
    def fgsm_adversarial_example(self, X, y, epsilon=0.1):
        num_samples, _ = X.shape
        adv_X = np.copy(X)

        for i in range(num_samples):
            # Compute gradients of loss w.r.t. input
            x = X[i:i+1, :]  # select one sample at a time
            y_hot = np.zeros((1, self.weights.shape[1]))
            y_hot[0, y[i]] = 1  # one-hot encoding of true label
            scores = np.dot(x, self.weights) + self.bias
            probs = self.softmax(scores)
            loss = self.ce_loss(probs, y_hot)
            grad = np.dot(self.weights, (probs - y_hot).T).T  # gradient w.r.t input

            # Create adversarial example
            adv_x = x + epsilon * np.sign(grad)
            adv_x = np.clip(adv_x, 0, 1)  # Ensure the adversarial example remains within the valid input range
            adv_X[i] = adv_x

        return adv_X

    def ce_loss(self, y_hat, y):
        '''
        Parameters
        ----------
        y_hat: array of predicted probabilities of size [num_samples, num_classes]
        y: array of true one-hot vectors of size [num_samples, num_classes]
        '''
        return -np.mean(y * np.log(y_hat))

In [38]:
import torch
from torchvision import datasets, transforms
import numpy as np

# Load the MNIST dataset
mnist_train = datasets.MNIST('./data', train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST('./data', train=False, download=True, transform=transforms.ToTensor())

# Prepare data for training
X_train = mnist_train.data.numpy().reshape(-1, 28*28) / 255.0
y_train = mnist_train.targets.numpy()

In [39]:
# Train the model
model = SoftmaxRegression(learning_rate=0.1, num_iters=1000, batch_size=100)
model.fit(X_train, y_train)

# Prepare data for testing
X_test = mnist_test.data.numpy().reshape(-1, 28*28) / 255.0
y_test = mnist_test.targets.numpy()

Accuracy: 0.9243


In [48]:
adv_examples = model.fgsm_adversarial_example(X_test, y_test, epsilon=0.01)
original_predictions = model.predict(X_test)
adv_predictions = model.predict(adv_examples)
original_accuracy = model.score(X_test, y_test)
adv_accuracy = model.score(adv_examples, y_test)

print("Original Accuracy:", original_accuracy)
print("Adversarial Example Accuracy:", adv_accuracy)

Original Accuracy: 0.9243
Adversarial Example Accuracy: 0.6719


### As we can see, the accuracy for the adversarial dataset is much lower. 