# EXAM SCRIPTS

## Imports

In [None]:
# all imports in all notebooks
try:
    import secmlt
    import sklearn
    import matplotlib
    import foolbox
except ImportError:
    print("Installing the dependancies")
    %pip install git+https://github.com/pralab/secml-torch
    %pip install scikit-learn
    %pip install matplotlib
    %pip install foolbox

import torch
import secmlt
import numpy as np
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Subset, TensorDataset
from secmlt.models.pytorch.base_pytorch_nn import BasePytorchClassifier
from secmlt.models.pytorch.base_pytorch_trainer import BasePyTorchTrainer
from secmlt.metrics.classification import Accuracy

from secmlt.adv.backends import Backends
from secmlt.adv.evasion.perturbation_models import LpPerturbationModels
from secmlt.adv.evasion.pgd import PGD
from secmlt.adv.evasion.fmn import FMN

from secmlt.trackers.trackers import Tracker
from secmlt.trackers.trackers import MULTI_SCALAR #for class Sample2DTracker

import torchvision.datasets
from torchvision.models import get_model
import requests, io
from PIL import Image
import torchvision.transforms as T
import json
from secmlt.optimization.constraints import MaskConstraint

import random

## Functions implemented by the professor

In [None]:
def plot_data(X, y, title="Data Distribution"):
    """
    Plot the data points colored by their classes.

    Args:
        X: Input features as tensor or numpy array
        y: Labels as tensor or numpy array
        title: Plot title
    """
    # Convert tensors to numpy if needed
    if torch.is_tensor(X):
        X = X.detach().numpy() # I store in X the datatype I want
    if torch.is_tensor(y):
        y = y.detach().numpy()

    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.show()

# -----------------------------------------------------------------------------
def plot_decision_regions(model, X, y, title="Decision Regions"):
    """
    Plot decision regions for a PyTorch model.

    Args:
        model: PyTorch model (must be in eval mode) (model=Net())
        X: Input features as tensor
        y: Labels as tensor
        title: Plot title
    """
    # Convert tensors to numpy for plotting
    X_np = X.detach().numpy()
    y_np = y.detach().numpy()

    # Create a mesh grid
    x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
    y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                        np.linspace(y_min, y_max, 200))

    # Get predictions for each point in the mesh
    X_mesh = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()])
    with torch.no_grad():
        model.eval()
        Z = model(X_mesh)
        Z = torch.argmax(Z, dim=1)
    Z = Z.numpy().reshape(xx.shape)

    # Plot decision regions
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='viridis')

    # Plot data points
    scatter = plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np,
                         cmap='viridis')

    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.show()

# -----------------------------------------------------------------------------
def plot_attack_path(model, X, y, x_init, deltas, epsilon, title="Attack Path on Decision Regions"):
    """
    Plots decision regions, the initial sample as a green large circle, the attack path, and the last sample as a red cross. Optionally plots the L2 epsilon ball.

    Args:
        model: Trained PyTorch model.
        X: Input features (torch.Tensor) for decision region background.
        y: Labels (torch.Tensor) for coloring points.
        x_init: Initial sample (torch.Tensor, shape [2] or [1,2]).
        deltas: Attack deltas (torch.Tensor, shape [2, N] or [N, 2]).
        epsilon: (Optional) Radius of the L2 ball to plot.
        title: Plot title.
    """
    import matplotlib.pyplot as plt

    # Prepare mesh grid for decision regions
    X_np = X.detach().numpy()
    y_np = y.detach().numpy()
    x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
    y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    X_mesh = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()])
    with torch.no_grad():
        model.eval()
        Z = model(X_mesh)
        Z = torch.argmax(Z, dim=1)
    Z = Z.numpy().reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='viridis')
    plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='viridis', edgecolor='k', s=30, alpha=0.7)

    # Initial sample as a green large circle
    x_init_np = x_init.detach().cpu().numpy()
    if x_init_np.ndim == 2:
        x_init_np = x_init_np[0]


    # Plot L2 epsilon ball if epsilon is provided
    if epsilon is not None:
        circle = plt.Circle((x_init_np[0], x_init_np[1]), epsilon, color='red', fill=False, linestyle='--', linewidth=2, label=f'L2 Ball (ε={epsilon})')
        plt.gca().add_patch(circle)

    # Compute attack path: x_init + deltas (assume deltas shape [2, N])
    if deltas.shape[0] == 2:
        path = x_init_np.reshape(2, 1) + deltas.detach().cpu().numpy()
        path = path.T
    else:
        path = x_init_np + deltas.detach().cpu().numpy()
    plt.plot(path[:, 0], path[:, 1], color='black', linewidth=2, marker='o', markersize=4, label='Attack Path')

    # Last sample as a red cross
    last_sample = path[-1]
    plt.scatter(x_init_np[0], x_init_np[1], marker='o', color='green', s=250, edgecolor='black', linewidths=2, label='Initial Sample', zorder = 2)
    plt.scatter(last_sample[0], last_sample[1], marker='x', color='red', s=200, linewidths=4, label='Final Sample', zorder = 2)

    ax = plt.gca()
    ax.set_aspect('equal', adjustable='box')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

# -----------------------------------------------------------------------------
class Sample2DTracker(Tracker):
    def __init__(self) -> None:
        """Create adversarial 2D sample tracker."""
        super().__init__("Sample", MULTI_SCALAR)

        self.tracked = []

    def track(
        self,
        iteration: int,
        loss: torch.Tensor,
        scores: torch.Tensor,
        x_adv: torch.Tensor,
        delta: torch.Tensor,
        grad: torch.Tensor,
    ) -> None:
        self.tracked.append(x_adv)

# -----------------------------------------------------------------------------
def secmlt_plot_attack_path(model, X, y, x_init, x_advs, epsilon, title="Attack Path on Decision Regions"):
    """
    Plots decision regions, the initial sample as a green large circle, the attack path, and the last sample as a red cross. Optionally plots the L2 epsilon ball.

    Args:
        model: Trained PyTorch model.
        X: Input features (torch.Tensor) for decision region background.
        y: Labels (torch.Tensor) for coloring points.
        x_init: Initial sample (torch.Tensor, shape [2] or [1,2]).
        deltas: Attack deltas (torch.Tensor, shape [2, N] or [N, 2]).
        epsilon: (Optional) Radius of the L2 ball to plot.
        title: Plot title.
    """
    import matplotlib.pyplot as plt

    x_advs = x_advs.T.squeeze().numpy()
    # Prepare mesh grid for decision regions
    X_np = X.detach().numpy()
    y_np = y.detach().numpy()
    x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
    y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    X_mesh = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()])
    with torch.no_grad():
        Z = model(X_mesh)
        Z = torch.argmax(Z, dim=1)
    Z = Z.numpy().reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='viridis')
    plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='viridis', edgecolor='k', s=30, alpha=0.7)

    # Initial sample as a green large circle
    x_init_np = x_init.detach().cpu().numpy()
    if x_init_np.ndim == 2:
        x_init_np = x_init_np[0]


    # Plot L2 epsilon ball if epsilon is provided
    if epsilon is not None:
        circle = plt.Circle((x_init_np[0], x_init_np[1]), epsilon, color='red', fill=False, linestyle='--', linewidth=2, label=f'L2 Ball (ε={epsilon})')
        plt.gca().add_patch(circle)

    # Concatenate the initial point to the attack path
    path = np.vstack([x_init_np, x_advs])
    plt.plot(path[:, 0], path[:, 1], color='black', linewidth=2, marker='o', markersize=4, label='Attack Path')

    # Last sample as a red cross
    last_sample = path[-1]
    plt.scatter(path[0,0], path[0,1], marker='o', color='green', s=250, edgecolor='black', linewidths=2, label='Initial Sample', zorder = 2)
    plt.scatter(last_sample[0], last_sample[1], marker='x', color='red', s=200, linewidths=4, label='Final Sample', zorder = 2)

    ax = plt.gca()
    ax.set_aspect('equal', adjustable='box')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

# -----------------------------------------------------------------------------
def compare_attack_paths(model, X, y, x_init, path1, path2, epsilon, label1="PGD", label2="FMN"):
    """
    Plots decision regions, the initial sample as a green large circle, the attack path, and the last sample as a red cross. Optionally plots the L2 epsilon ball.

    Args:
        model: Trained PyTorch model.
        X: Input features (torch.Tensor) for decision region background.
        y: Labels (torch.Tensor) for coloring points.
        x_init: Initial sample (torch.Tensor, shape [2] or [1,2]).
        deltas: Attack deltas (torch.Tensor, shape [2, N] or [N, 2]).
        epsilon: (Optional) Radius of the L2 ball to plot.
        title: Plot title.
    """

    path1 = path1.T.squeeze().numpy()
    path2 = path2.T.squeeze().numpy()
    # Prepare mesh grid for decision regions
    X_np = X.detach().numpy()
    y_np = y.detach().numpy()
    x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
    y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    X_mesh = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()])
    with torch.no_grad():
        Z = model(X_mesh)
        Z = torch.argmax(Z, dim=1)
    Z = Z.numpy().reshape(xx.shape)
    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap='viridis')
    plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='viridis', edgecolor='k', s=30, alpha=0.7)

    # Initial sample as a green large circle
    x_init_np = x_init.detach().cpu().numpy()
    if x_init_np.ndim == 2:
        x_init_np = x_init_np[0]


    # Plot L2 epsilon ball if epsilon is provided
    if epsilon is not None:
        circle = plt.Circle((x_init_np[0], x_init_np[1]), epsilon, color='red', fill=False, linestyle='--', linewidth=2, label=f'L2 Ball (ε={epsilon})')
        plt.gca().add_patch(circle)
    
    # Concatenate the initial point to the attack path
    path1 = np.vstack([x_init_np, path1])
    path2 = np.vstack([x_init_np, path2])
    plt.plot(path1[:, 0], path1[:, 1], color='black', linewidth=2, marker='o', markersize=4, label=label1)
    plt.plot(path2[:, 0], path2[:, 1], color='black', linewidth=2, marker='o', markersize=4, label=label2)

    # Last sample as a red cross
    last_sample = path1[-1]
    last_sample2 = path2[-1]
    plt.scatter(path1[0,0], path1[0,1], marker='o', color='green', s=250, edgecolor='black', linewidths=2, label='Initial Sample', zorder = 2)
    plt.scatter(last_sample[0], last_sample[1], marker='x', color='red', s=200, linewidths=4, label=label1 + ' Final Sample', zorder = 2)
    plt.scatter(last_sample2[0], last_sample2[1], marker='x', color='blue', s=200, linewidths=4, label=label2 + ' Final Sample', zorder = 2)
    
    ax = plt.gca()
    ax.set_aspect('equal', adjustable='box')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.title("Comparison between attacks")
    plt.legend()
    plt.tight_layout()
    plt.show()

# -----------------------------------------------------------------------------



## Important functions or classes 

Creating a dataloader

In [None]:
# # Creating a set (should be done twice if required training and test sets)
X_tr, y_tr = make_blobs(n_samples=500,
                  centers=[(0, 1), (0, 0), (1, 1)],
                  n_features=2,
                  cluster_std=0.2,
                  random_state=999)

X_ts, y_ts = make_blobs(n_samples=100,
                  centers=[(0, 1), (0, 0), (1, 1)],
                  n_features=2,
                  cluster_std=0.2,
                  random_state=999)

# Visualize the data
#plot_data(X_tr, y_tr, title="Training Data Distribution")

# Convert to PyTorch tensors
X_tr = torch.FloatTensor(X_tr)
y_tr = torch.LongTensor(y_tr)

X_ts = torch.FloatTensor(X_ts)
y_ts = torch.LongTensor(y_ts)

# Creating dataloader
tr_dataset = TensorDataset(X_tr, y_tr)
tr_dataloader = DataLoader(tr_dataset, batch_size=32, shuffle=False)

ts_dataset = TensorDataset(X_ts, y_ts)
ts_dataloader = DataLoader(ts_dataset, batch_size=32, shuffle=False)

Neural Network definition and usage

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(2, 16)  # Input layer to hidden layer
        self.fc2 = torch.nn.Linear(16, 16) # Hidden layer to hidden layer
        self.fc3 = torch.nn.Linear(16, 3)  # Hidden layer to output layer
        self.relu = torch.nn.ReLU()         # ReLU activation function -> gives non linearity to my DNN
        # self.sigmoid = torch.nn.Sigmoid()  # in case the actiovation function requird in ste sigmoid

        # we started from a 2D dataset (first number of the first layer) and we arrived at 3 classes (last number of the last layer)
        # NB: numbers should be coherent since from one layer to another I make a dot product (sizes matters)!!
        # relu(x) = max(0, x) -> introdices NON LINEARITY

    # for each sample I have a vector of 3 components -> numbers between 0 and 1 = probability to belonging to a lable -> RAW OUTPUT OF THE NN (LOGICS)
    # I can take the argmax of the vector and I get the lable to which the sample belongs

    def forward(self, x): # how data flows from the first layer to the output
        x = self.relu(self.fc1(x))  # Apply first layer and ReLU
        x = self.relu(self.fc2(x))  # Apply second layer and ReLU
        x = self.fc3(x)             # Apply output layer (no activation here)
        return x
   
    
model = Net()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # lr = learning rate = dimension of the correcting pace I apply to correct the direction I am moving torward
# optimizer = torch.optim.SGD(model2.parameters(), lr=0.1, momentum=0.9) in case it is required SGD optimizer
criterion = torch.nn.CrossEntropyLoss() # exponential
trainer = BasePyTorchTrainer(optimizer=optimizer, loss=criterion, epochs=100)

secmlt_model = BasePytorchClassifier(model, trainer=trainer)
secmlt_model.train(tr_dataloader) 

# Accuracy
accuracy = Accuracy()(secmlt_model, ts_dataloader)
print("Test accuracy: {:.2f}%".format(accuracy * 100))
#plot_decision_regions(secmlt_model.model, X_ts, y_ts, title="Decision Regions with SecML Torch")


# 01 - EVASION ATTACKS

Difference between targeted and untargeted evasion attacks:
- TARGETED: 
    - As a parameter I put yt (target class)
    - The loss is computed between outputs and yt
    - delta.delta = delta - alpha * gradient
- UNTARGETED:
    - As a parameter I get y, the original class
    - The loss is computed between outputs and y
    - delta.delta = delta + alpha * gradient
- In SecML library, the difference is made by
    - UNTARGETED: y_target=None
    - TARGETED: y_target = class I want as target

In [None]:
epsilon = 0.7
alpha = 1e-2
num_iter=500
sample_tracker = Sample2DTracker()

# ATTACK
PGD_attack = PGD(
    perturbation_model=LpPerturbationModels.L2 ,
    epsilon=epsilon,
    num_steps=num_iter,
    step_size=alpha,
    random_start=False, # I might start on a random point
    #########################
    y_target=None, #### UNTARGETED
    #########################
    backend=Backends.NATIVE, # not needed for the course
    #trackers=None,
    trackers=[sample_tracker],
    #ub = 2,
    #lb = -2,
)

# If I want to attack the whole dataloader
#adv_dataloader = PGD_attack(secmlt_model, ts_dataloader)

# x_adv = adv_dataloader.dataset[0][0]
# y_pred = model(x_adv).argmax(dim=-1)
# print("Predicted label:", y_pred.item(), "\nReal label:",y_ts[0].item())

# If I want to attack a simple sample
single_sample_loader = DataLoader(TensorDataset(X_ts[0:1,:], y_ts[0:1]), shuffle=False)
adv_dataloader = PGD_attack(secmlt_model, single_sample_loader)
#secmlt_plot_attack_path(secmlt_model.model, X_ts, y_ts, X_ts[0,:], sample_tracker.get(), epsilon, title="Attack Path on Decision Regions")


# 02 - MINIMUM NORM AND MAXIMUM CONFIDENCE ATTACKS

## Minimum-norm

Without SecML library, in the untargeted_l2_min_norm(...), I have  
```
outputs = model(x + delta)  
if outputs.argmax(dim=-1) != y:  
    break 
```
Using SecML: we use the Fast Minimum Norm CITE

In [None]:
path_tracker = Sample2DTracker()
alpha = 1e-2
num_iter=500
perturbation_model = LpPerturbationModels.L2

single_sample_loader = DataLoader(TensorDataset(X_ts[0:1,:], y_ts[0:1]), batch_size=1, shuffle=False)
fmn_attack = FMN(
    perturbation_model=perturbation_model,
    num_steps=num_iter,
    step_size=alpha,
    y_target=None,
    lb=-2,
    ub=2,
    backend = Backends.NATIVE,
    trackers=[path_tracker]
)

adv_dataloader_mn = fmn_attack(secmlt_model, single_sample_loader)
x_adv_mn = adv_dataloader_mn.dataset[0][0]
y_pred_mn = model(x_adv_mn).argmax(dim=-1)
print("Predicted label:", y_pred_mn.item(), "\nReal label:",y_ts[0].item())
#secmlt_plot_attack_path(secmlt_model.model, X_ts, y_ts, X_ts[0,:], path_tracker.get(), None, title="Attack Path on Decision Regions")

Difference with Maximim-confidence attack (computed with PGD)

In [None]:
path_tracker_pgd = Sample2DTracker()
alpha = 1e-2
num_iter=500
epsilon = 0.7
single_sample_loader = DataLoader(TensorDataset(X_ts[0:1,:], y_ts[0:1]), batch_size=1, shuffle=False)

pgd_attack = PGD(
    perturbation_model=LpPerturbationModels.L2,
    num_steps=num_iter,
    step_size=alpha,
    y_target=None,
    epsilon=epsilon,
    lb=-2,
    ub=2,
    backend = Backends.NATIVE,
    trackers=[path_tracker_pgd]
)

adv_dataloader_mc = pgd_attack(secmlt_model, single_sample_loader)
x_adv_mc = adv_dataloader_mc.dataset[0][0]
y_pred_mc = model(x_adv_mc).argmax(dim=-1)
print("Predicted label:", y_pred_mc.item(), "\nReal label:",y_ts[0].item())

#compare_attack_paths(secmlt_model.model, X_ts, y_ts, X_ts[0,:], path_tracker_pgd.get(), path_tracker.get(), epsilon=epsilon)

### Norms
- More sparse: L1 (little rectangle)
- L2: euclidean norm, circular
- LINF: infinity norm, all the big rectangular

# 03 - SECURITY EVALUATIONS OF ML MODELS

To compare 2 ML models, we create another network to allow comparisons

In [None]:
# 2nd network
class Net2(torch.nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.fc1 = torch.nn.Linear(2, 4)  # Input layer to hidden layer
        self.fc2 = torch.nn.Linear(4, 4)
        self.fc3 = torch.nn.Linear(4, 3)
        self.relu = torch.nn.ReLU()        

    def forward(self, x):
        x = self.relu(self.fc1(x))  # Apply first layer and ReLU
        x = self.relu(self.fc2(x))  # Apply second layer and ReLU
        x = self.fc3(x)             # Apply output layer (no activation here)
        return x
    
tr_dataloader1 = DataLoader(TensorDataset(X_tr, y_tr), batch_size=32, shuffle=False)
ts_dataloader1 = DataLoader(TensorDataset(X_ts, y_ts), batch_size=5, shuffle=False)

model1 = Net()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
trainer = BasePyTorchTrainer(optimizer=optimizer, loss=criterion, epochs=50)
secmlt_model1 = BasePytorchClassifier(model1, trainer=trainer)
secmlt_model1.train(tr_dataloader1)

model2 = Net2()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.01)
trainer = BasePyTorchTrainer(optimizer=optimizer, loss=criterion, epochs=50)
secmlt_model2 = BasePytorchClassifier(model2, trainer=trainer)
secmlt_model2.train(tr_dataloader1)

accuracy1 = Accuracy()(secmlt_model1, ts_dataloader1)
accuracy2 = Accuracy()(secmlt_model2, ts_dataloader1)
print("Test accuracy model 1: {:.2f}%".format(accuracy1 * 100))
print("Test accuracy model 2: {:.2f}%".format(accuracy2 * 100))

## Security Evaluation Function and plot

### Maximum-confidence

In [None]:
# Repeating attacks with increasing strenght -> aproximate all pox scenarios
def security_evaluation(model, dataloader, epsilons, iterations=50, stepsize=1e-1):
    sec_eval_data = []
    for e in epsilons:
        # using PGD attack -> makes sense using only UNTARGETED attacks (and  MAXIMUM CONFIDENCE)
        attack = PGD(
            perturbation_model=LpPerturbationModels.L2,
            num_steps=iterations,
            step_size=stepsize,
            y_target=None,
            epsilon=e,
            lb=-2,ub=2,
            backend = Backends.NATIVE)
    
        adv_dataloader = attack(model, dataloader)
        robust_accuracy = Accuracy()(model, adv_dataloader)
        sec_eval_data.append(robust_accuracy)
    return sec_eval_data

epsilons=np.linspace(0, 3, 10)
sec_eval_model1 = security_evaluation(secmlt_model1, ts_dataloader1, epsilons=epsilons, iterations = 100)
sec_eval_model2 = security_evaluation(secmlt_model2, ts_dataloader1, epsilons=epsilons, iterations = 100)

In [None]:
# plt.plot(epsilons, sec_eval_model1, label='SecEval 1')
# plt.plot(epsilons, sec_eval_model2, label='SecEval 2')
# plt.xlabel('Epsilon')
# plt.ylabel('Robust Accuracy')
# plt.ylim(0, 1)
# plt.legend()
# plt.show()

### Minimum-norm

In [None]:
def min_norm_security_evaluation(model, dataloader, iterations=300, stepsize=1e-1):
    fmn_attack = FMN(
        perturbation_model=LpPerturbationModels.L2,
        num_steps=iterations,
        step_size=stepsize,
        y_target=None,
        lb=-2,
        ub=2
    )
    adv_loader = fmn_attack(model, dataloader)
    # ensure that all attacks are successful
    print(f"Accuracy after attack:{Accuracy()(model, adv_loader) * 100:.2f}")
    distances = torch.norm(adv_loader.dataset.tensors[0] - dataloader.dataset.tensors[0], dim=-1, p=2)
    # we now count how many samples are evading the model with less then each budget, iteratively
    robust_accuracy = [(distances < d).sum().item() / adv_loader.dataset.tensors[0].shape[0] for d in distances]
    robust_accuracy.sort(reverse=True)
    distances = distances.tolist()
    distances.sort()
    return robust_accuracy, distances

min_norm_seceval_1, dist1 = min_norm_security_evaluation(secmlt_model1, ts_dataloader1)
min_norm_seceval_2, dist2 = min_norm_security_evaluation(secmlt_model2, ts_dataloader1)

In [None]:
# plt.plot(dist1, min_norm_seceval_1, label='MN SecEval 1')
# plt.plot(dist2, min_norm_seceval_2, label='MN SecEval 2')
# plt.xlabel('Epsilon')
# plt.ylabel('Robust Accuracy')
# plt.ylim(0, 1)
# plt.legend()
# plt.show()

# 05 - DEFENDING AGAINST ADVERSARIAL EXAMPLES

### Using FGSM

Here we use FGSM (Fast Gradient Sign Method), which computes the gradient toward the class I want to reach, normalizes it in the LINF boc and moves there -> just 1 large step attack (non interative)

In [113]:
epochs = 1000
lr = 1e-3
epsilon = 0.3 # we are estimating the amount of noise the attack could use

model= Net()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in tr_dataloader:
        # FGSM: Compute adversarial samples with 1-iteration attack
        model.eval()
        x = inputs.clone().detach().requires_grad_(True)
        outputs = model(x)
        loss = criterion(outputs, labels)
        loss.backward()
        gradient = x.grad.data # gradient that maximizes the error (max ascent of the loss)
        inputs_adv = x.data + epsilon * torch.sign(gradient)

        # Compute loss and backpropagate as usual
        model.train()
        optimizer.zero_grad()
        outputs = model(inputs_adv)
        loss = criterion(outputs, labels)
        loss.backward()                 
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(tr_dataloader.dataset)
    if (epoch+1) % 100 == 0:
        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

at_model = BasePytorchClassifier(model)

Epoch 100, Loss: 0.5008
Epoch 200, Loss: 0.4953
Epoch 300, Loss: 0.4867
Epoch 400, Loss: 0.4847
Epoch 500, Loss: 0.4840
Epoch 600, Loss: 0.4740
Epoch 700, Loss: 0.4771
Epoch 800, Loss: 0.4687
Epoch 900, Loss: 0.4686
Epoch 1000, Loss: 0.4742


In [116]:
#plot_decision_regions(model, X_ts, y_ts, title="Decision Regions after Adversarial Training")

### Using LINF PGD

In [111]:
from secmlt.adv.evasion.pgd import PGD
from secmlt.adv.backends import Backends
from secmlt.adv.evasion.perturbation_models import LpPerturbationModels

pgd_attack_ex2 = PGD(
    perturbation_model=LpPerturbationModels.LINF,
    num_steps=10,
    step_size=0.01,
    epsilon=0.6,
    lb=-2, ub=2,
    backend=Backends.NATIVE
)

epochs = 2000
lr = 1e-3

model= Net()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
trainer = BasePyTorchTrainer(optimizer=optimizer, loss=criterion, epochs=epochs)

at_model = BasePytorchClassifier(model, trainer=trainer)

for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in tr_dataloader:
        # Compute adversarial samples with PGD
        model.eval()
        current_tr_dataloader = DataLoader(TensorDataset(inputs, labels), batch_size=32)
        inputs_adv = pgd_attack_ex2(at_model, current_tr_dataloader)  
        x_adv, y_adv = next(iter(inputs_adv)) 

        # Compute loss and backpropagate as usual
        model.train()
        optimizer.zero_grad()
        outputs = model(x_adv)
        loss = criterion(outputs, labels)
        loss.backward()                 
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(tr_dataloader.dataset)
    if (epoch+1) % 100 == 0:
        print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

adv_model = BasePytorchClassifier(model)

Epoch 100, Loss: 0.0881
Epoch 200, Loss: 0.0865
Epoch 300, Loss: 0.0851
Epoch 400, Loss: 0.0840
Epoch 500, Loss: 0.0833
Epoch 600, Loss: 0.0828
Epoch 700, Loss: 0.0815
Epoch 800, Loss: 0.0810
Epoch 900, Loss: 0.0803
Epoch 1000, Loss: 0.0802
Epoch 1100, Loss: 0.0813
Epoch 1200, Loss: 0.0790
Epoch 1300, Loss: 0.0779
Epoch 1400, Loss: 0.0774
Epoch 1500, Loss: 0.0770
Epoch 1600, Loss: 0.0773
Epoch 1700, Loss: 0.0761
Epoch 1800, Loss: 0.0759
Epoch 1900, Loss: 0.0752
Epoch 2000, Loss: 0.0749


In [115]:
#plot_decision_regions(model, X_ts, y_ts, title="Decision Regions after Adversarial Training")