<a href="https://colab.research.google.com/github/manuelboi/MLsec_project/blob/main/MLsec_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Models:
1. Fixing Data Augmentation to Improve Adversarial Robustness
2. Robust Learning Meets Generative Models: Can Proxy Distributions Improve Adversarial Robustness?
3. MMA Training: Direct Input Space Margin Maximization through Adversarial Training

# General imports

In [None]:
%%capture

# Various
import matplotlib.pyplot as plt
import time
import importlib.util
import re

# Pytorch
import torch
from torch.utils.data import DataLoader, Subset
from torch.nn import CrossEntropyLoss
from torch.optim import SGD, Optimizer
from torch.optim.lr_scheduler import StepLR
import torchvision
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms

# RobustBench
!pip install git+https://github.com/manuelboi/robustbench
from robustbench.utils import load_model
from robustbench.eval import benchmark
from robustbench.data import load_cifar10
from robustbench.model_zoo.enums import ThreatModel

# Smoothing
!git clone https://github.com/matteoturnu/smoothing.git
!conda create -n smoothing
!conda activate smoothing
!conda install pytorch torchvision cudatoolkit=10.0 -c pytorch
!conda install scipy pandas statsmodels matplotlib seaborn
!pip install setGPU
from smoothing.code.core import Smooth
from smoothing.code.train import train, test
from smoothing.code.train_utils import AverageMeter, accuracy

# Setup

In [2]:
%%capture

# Set labels
labels_dct = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer", 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}

# Use GPU if available, otherwise use CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("Computing device used: ", device)

# Preparing trainset and testset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Set trainset and testset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

'''
# Define the number of samples you want to take from the whole dataset
num_samples = 100

# Create a subset of the dataset with the chosen number of samples
indices = list(range(num_samples))
train_subset = Subset(trainset, indices)
'''

# Preparing trainloader and testloader
batch_size = 64
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# Load test samples for predict and certify
n_test_samples_benchmark = 1000
x_test, y_test = load_cifar10(n_test_samples_benchmark)
x_test, y_test = x_test.to(device), y_test.to(device)

# Setting various parameters
epochs = 3 # used for training
sigma = 0.25 # gaussian noise standard deviation
n_examples = 20 # to be perturbed by AutoAttack
eps_L2 = 0.5 # epsilon of the perturbancy for L2 norm
eps_Linf = 8/255 # epsilon of the perturbancy with Linf norm
version = 'custom'
attacks_to_run=['apgd-ce', 'apgd-dlr', 'fab'] # Attacks to run on the models

# Functions

Function to compact donwload and configuration of the model

In [3]:
def load_configure_model(name, dataset, threat_model, device):
  # Download model from robustbench
  model = load_model(model_name=name, dataset=dataset, threat_model=threat_model)

  # Set model to the chosen device
  model = model.to(device)

  # Set model in eval mode
  model.eval()

  return model

Function for smooth model training

In [4]:
def train_model(trainloader, model, epochs, sigma, device):
  criterion = CrossEntropyLoss().to(device)
  optimizer = SGD(model.parameters())
  scheduler = StepLR(optimizer, step_size=30)

  for epoch in range(epochs):
    scheduler.step(epoch)
    before = time.time()
    train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, sigma, device)
    after = time.time()

  return model

Function for measuring performances on smooth model

In [5]:
def certify(model, sigma, x_text, y_test, L):
  n_classes = 10
  alpha = 0.05 # (1 - alpha) is the confidence level (in this case is 95%)
  n0 = 10 # number of samples for selection
  n = 20 # number of samples for estimation (certify) (too few samples but computation time is strongly affected with more)

  smooth_model = Smooth(model, n_classes, sigma)

  top_classes = list()
  radiuses = list()
  for x, y in zip(x_test, y_test):
    top_class = smooth_model.predict(x, n0, alpha, batch_size=n0, device=device, L=L)
    top_class, radius = smooth_model.certify(x, n0, n, alpha, batch_size=n0, device=device, L=L)
    top_classes.append(top_class)
    radiuses.append(radius)

  top_classes = torch.tensor(top_classes, dtype=torch.float64).to(device)
  accuracy = torch.mean(top_classes == y_test, dtype=torch.float64)
  print("Radiuses list: ", radiuses)
  radius = torch.mean(torch.Tensor(radiuses), dtype=torch.float64)


  print("Top classes: ", top_classes)
  print("Y classes: ", y_test)
  print(f"Average radious found by certify: {float(radius.item())}")
  percentage = float(accuracy.item()) * 100
  print(f"Accuracy found by certify function: {percentage:.2f}%")

Function to compact test of the model with certification and AutoAttack

In [6]:
def test_model(model, sigma, x_test, y_test, norm, threat_model, n_examples, eps, batch_size, device, version, attacks_to_run):
  # Model prediction and certification on smoothed samples
  certify(model, sigma, x_test, y_test, norm)

  # AutoAttack on given model
  benchmark(model,
            threat_model=threat_model,
            n_examples=n_examples,
            eps=eps,
            batch_size=batch_size,
            device=device,
            version=version,
            attacks_to_run=attacks_to_run)

# Fixing Data Augmentation to Improve Adversarial Robustness (WideResNet-70-16)

L2

In [None]:
# Model loading
model_1_L2 = load_configure_model(name='Rebuffi2021Fixing_70_16_cutmix_extra', dataset='cifar10', threat_model='L2', device=device)

# Test on stock model
test_model(model_1_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_1_L2 = train_model(trainloader, model_1_L2, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_1_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

Linf

In [None]:
# Model loading
model_1_Linf = load_configure_model(name='Rebuffi2021Fixing_70_16_cutmix_extra', dataset='cifar10', threat_model='Linf', device=device)

# Test on stock model
test_model(model_1_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_1_Linf = train_model(trainloader, model_1_Linf, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_1_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, device, version, attacks_to_run)

# Robust Learning Meets Generative Models: Can Proxy Distributions Improve Adversarial Robustness? (ResNet-18)

L2

In [7]:
# Model loading
model_2_L2 = load_configure_model(name='Sehwag2021Proxy_R18', dataset='cifar10', threat_model='L2', device=device)

# Test on stock model
test_model(model_2_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_2_L2 = train_model(trainloader, model_2_L2, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_2_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

Downloading models/cifar10/L2/Sehwag2021Proxy_R18.pt (gdrive_id=1zPjjZj9wujBNkAmHHHIikem6_aIjMhXG).


Downloading...
From (original): https://drive.google.com/uc?id=1zPjjZj9wujBNkAmHHHIikem6_aIjMhXG
From (redirected): https://drive.google.com/uc?id=1zPjjZj9wujBNkAmHHHIikem6_aIjMhXG&confirm=t&uuid=a1acc821-0a70-4fde-92cc-4c3db520fb7a
To: /content/models/cifar10/L2/Sehwag2021Proxy_R18.pt
100%|██████████| 44.8M/44.8M [00:01<00:00, 29.8MB/s]
  checkpoint = torch.load(model_path, map_location=torch.device('cpu'))


Radiuses list:  [0.027892358752471248, 0.2710835305139194, 0.2710835305139194, 0.0, 0.2710835305139194, 0.2710835305139194, 0.1437700531119232, 0.0, 0.2710835305139194, 0.0, 0.0, 0.2710835305139194, 0.2710835305139194, 0.2710835305139194, 0.2710835305139194, 0.2710835305139194, 0.2710835305139194, 0.0, 0.06267171043754752, 0.2710835305139194, 0.0, 0.1437700531119232, 0.2710835305139194, 0.2710835305139194, 0.10062108582463443, 0.2710835305139194, 0.1963528800840948, 0.2710835305139194, 0.0, 0.2710835305139194, 0.2710835305139194, 0.0, 0.06267171043754752, 0.2710835305139194, 0.0, 0.2710835305139194, 0.0, 0.1963528800840948, 0.2710835305139194, 0.0, 0.10062108582463443, 0.1437700531119232, 0.0, 0.2710835305139194, 0.0, 0.2710835305139194, 0.0, 0.0, 0.1437700531119232, 0.2710835305139194, 0.2710835305139194, 0.06267171043754752, 0.2710835305139194, 0.0, 0.1963528800840948, 0.1963528800840948, 0.1437700531119232, 0.1437700531119232, 0.0, 0.1963528800840948, 0.06267171043754752, 0.19635288


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.10/dist-packages/autoattack/checks.py", line 100, in check_dynamic
    sys.settrace(tracefunc)



Clean accuracy: 100.00%
using custom version including apgd-ce, apgd-dlr, fab.



sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/local/lib/python3.10/dist-packages/autoattack/checks.py", line 102, in check_dynamic
    sys.settrace(None)



initial accuracy: 100.00%
apgd-ce - 1/1 - 2 out of 20 successfully perturbed
robust accuracy after APGD-CE: 90.00% (total time 8.5 s)
apgd-dlr - 1/1 - 0 out of 18 successfully perturbed
robust accuracy after APGD-DLR: 90.00% (total time 17.5 s)
fab - 1/1 - 0 out of 18 successfully perturbed
robust accuracy after FAB: 90.00% (total time 101.7 s)
max L2 perturbation: 0.50000, nan in tensor: 0, max: 1.00000, min: 0.00000
robust accuracy: 90.00%
Adversarial accuracy: 90.00%




Epoch: [0][0/782]	Time 0.361 (0.361)	Data 0.132 (0.132)	Loss 0.1065 (0.1065)	Acc@1 98.438 (98.438)	Acc@5 100.000 (100.000)
Epoch: [0][1/782]	Time 0.026 (0.194)	Data 0.002 (0.067)	Loss 0.0960 (0.1012)	Acc@1 100.000 (99.219)	Acc@5 100.000 (100.000)
Epoch: [0][2/782]	Time 0.051 (0.146)	Data 0.003 (0.046)	Loss 0.1182 (0.1069)	Acc@1 96.875 (98.438)	Acc@5 100.000 (100.000)
Epoch: [0][3/782]	Time 0.055 (0.123)	Data 0.001 (0.035)	Loss 0.1349 (0.1139)	Acc@1 98.438 (98.438)	Acc@5 100.000 (100.000)
Epoch: [0][4/782]	Time 0.056 (0.110)	Data 0.002 (0.028)	Loss 0.1200 (0.1151)	Acc@1 95.312 (97.812)	Acc@5 100.000 (100.000)
Epoch: [0][5/782]	Time 0.056 (0.101)	Data 0.003 (0.024)	Loss 0.1406 (0.1193)	Acc@1 96.875 (97.656)	Acc@5 100.000 (100.000)
Epoch: [0][6/782]	Time 0.056 (0.094)	Data 0.003 (0.021)	Loss 0.0913 (0.1153)	Acc@1 100.000 (97.991)	Acc@5 100.000 (100.000)
Epoch: [0][7/782]	Time 0.058 (0.090)	Data 0.002 (0.019)	Loss 0.1605 (0.1210)	Acc@1 92.188 (97.266)	Acc@5 100.000 (100.000)
Epoch: [0][8/7

Linf

In [None]:
# Model loading
model_2_Linf = load_configure_model(name='Sehwag2021Proxy_R18', dataset='cifar10', threat_model='Linf', device=device)

# Test on stock model
test_model(model_2_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_2_Linf = train_model(trainloader, model_2_Linf, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_2_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, version, attacks_to_run)

Downloading models/cifar10/Linf/Sehwag2021Proxy_R18.pt (gdrive_id=1-ZgoSlD_AMhtXdnUElilxVXnzK2DcHuu).


Downloading...
From (original): https://drive.google.com/uc?id=1-ZgoSlD_AMhtXdnUElilxVXnzK2DcHuu
From (redirected): https://drive.google.com/uc?id=1-ZgoSlD_AMhtXdnUElilxVXnzK2DcHuu&confirm=t&uuid=2c132801-935f-48b2-b23f-2d5dbeb09588
To: /content/models/cifar10/Linf/Sehwag2021Proxy_R18.pt
100%|██████████| 44.8M/44.8M [00:02<00:00, 15.2MB/s]


# MMA Training: Direct Input Space Margin Maximization through Adversarial Training (WideResNet-28-4)

L2

In [None]:
# Model loading
model_3_L2 = load_configure_model(name='Ding2020MMA', dataset='cifar10', threat_model='L2', device=device)

# Test on stock model
test_model(model_3_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_3_L2 = train_model(trainloader, model_3_L2, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_3_L2, sigma, x_test, y_test, 'L2', ThreatModel.L2, n_examples, eps_L2, batch_size, device, version, attacks_to_run)

Linf

In [None]:
# Model loading
model_3_Linf = load_configure_model(name='Ding2020MMA', dataset='cifar10', threat_model='Linf', device=device)

# Test on stock model
test_model(model_3_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, device, version, attacks_to_run)

# Smoothed model training
smoothed_model_3_Linf = train_model(trainloader, model_3_Linf, epochs, sigma, device)

# Test on smoothed model
test_model(smoothed_model_3_Linf, sigma, x_test, y_test, 'Linf', ThreatModel.Linf, n_examples, eps_Linf, batch_size, version, attacks_to_run)