# Pessimistic and Optimistic Bound on the Robustness Radius

In this notebook we try to study the different bounds of the robustness radius given by several methods (lip certificate, adv attacks, CROWN).

In [1]:
import os
os.environ["KERAS_BACKEND"] = "torch"

In [2]:
import keras
from deel.lip.layers import (
    SpectralDense,
    SpectralConv2D,
    ScaledL2NormPooling2D,
    FrobeniusDense,
)
from keras.models import Sequential
# from deel.lip.model import Sequential
from deel.lip.activations import GroupSort
from deel.lip.losses import MulticlassHKR, MulticlassKR
from keras.layers import Input, Flatten, Dense
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.utils import to_categorical
import numpy as np
import keras.ops as K
from keras.utils import to_categorical
import matplotlib.pyplot as plt

### Load MNIST08 Dataset

the dataset has ten classes.

In [3]:
# first we select the two classes
selected_classes = [0, 8]  # must be two classes as we perform binary classification


def prepare_data(x, y, class_a=0, class_b=8):
    """
    This function convert the MNIST data to make it suitable for our binary classification
    setup.
    """
    # select items from the two selected classes
    mask = (y == class_a) + (
        y == class_b
    )  # mask to select only items from class_a or class_b
    x = x[mask]
    y = y[mask]
    x = x.astype("float32")
    y = y.astype("float32")
    # convert from range int[0,255] to float32[-1,1]
    x /= 255
    x = x.reshape((-1, 28, 28, 1))
    # change label to binary classification {-1,1}
    y[y == class_a] = 1.0
    y[y == class_b] = 0.0
    return x, y.reshape((-1, 1))


# now we load the dataset
(x_train, y_train_ord), (x_test, y_test_ord) = mnist.load_data()
# prepare the data
x_train, y_train = prepare_data(
    x_train, y_train_ord, selected_classes[0], selected_classes[1]
)
x_test, y_test = prepare_data(
    x_test, y_test_ord, selected_classes[0], selected_classes[1]
)
y_test_ord = y_test[:,0]
y_train_ord = y_train[:,0]
y_test = to_categorical(y_test)
y_train = to_categorical(y_train)

In [4]:
x_train = np.transpose(x_train,(0,3,1,2))
x_test = np.transpose(x_test,(0,3,1,2))

In [None]:
vanilla_model = keras.models.load_model("/home/aws_install/robustess_project/lip_models/demo3_FC_vanilla_MNIST08_channelfirst_False_disj_Neurons.keras")
vanilla_model.compile(
    # decreasing alpha and increasing min_margin improve robustness (at the cost of accuracy)
    # note also in the case of lipschitz networks, more robustness require more parameters.
    loss=MulticlassHKR(alpha=100, min_margin=0.25),
    optimizer=Adam(1e-4),
    metrics=["accuracy", MulticlassKR()],)
vanilla_model.summary()

In [29]:
layer = vanilla_model.layers[-1]

In [30]:
new_dense = Dense(units=4, activation=None, use_bias=True)

In [31]:
vanilla_model_bis = Sequential(vanilla_model.layers[:-1] + [new_dense])

In [32]:
new_dense(layer.input) # compile and erase weights

<KerasTensor shape=(None, 4), dtype=float32, sparse=False, ragged=False, name=keras_tensor_26>

In [33]:
w_temp = np.zeros((16,4), dtype = 'float32')

In [34]:
b_temp = np.zeros((4,))
b_temp[2:] = -10000

In [35]:
w = layer.get_weights()[0] #(16,2)

In [36]:
w.shape

(16, 2)

In [37]:
w_temp[:,:2] = w

In [38]:
new_dense.set_weights([w_temp,b_temp])

In [39]:
vanilla_model_bis.summary()


In [40]:
vanilla_model_bis.layers[-1].get_weights()[0]-w_temp

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)

In [41]:
vanilla_model_bis.predict(x_test[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step


array([[-2.5456893e+00,  2.5312362e+00, -1.0000000e+04, -1.0000000e+04]],
      dtype=float32)

In [6]:
vanilla_model.predict(x_test[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


array([[-2.5456893,  2.5312362]], dtype=float32)

In [7]:
# strategy: first
# we select a sample from each class.
images_list = []
labels_list = []
# select only a few element from the test set
# selected = np.random.choice(len(y_test_ord), 500)
sub_y_test_ord = y_test_ord[:400]
sub_x_test = x_test[:400]
# drop misclassified elements
misclassified_mask = K.equal(
    K.argmax(vanilla_model.predict(sub_x_test, verbose=0), axis=-1), sub_y_test_ord
)
sub_x_test = sub_x_test[misclassified_mask.detach().cpu().numpy()]
sub_y_test_ord = sub_y_test_ord[misclassified_mask.detach().cpu().numpy()]
for i in range(2):
    # select the 20 firsts elements of the ith label
    label_mask = sub_y_test_ord == i
    x = sub_x_test[label_mask][:100]
    y = sub_y_test_ord[label_mask][:100]
    # convert it to tensor for use with foolbox
    images = K.convert_to_tensor(x.astype("float32"), dtype="float32")
    labels = K.convert_to_tensor(y, dtype="int64")
    # repeat the input 10 times, one per misclassification target
    for j in range(100):
        images_list.append(images[j])
        labels_list.append(labels[j])
images = K.convert_to_tensor(images_list)
labels = K.convert_to_tensor(labels_list)
labels.shape

torch.Size([200])

images shape = (nombre classes, nbr de points , channels, dim img 1, dim img 2)

### Get the Pessimistic Radius via Lip Constraints (l2 norm)

In [22]:
# def compute_binary_certificate(images, model, L=1):    
#     values = model(images)[:,0]
#     certificates = np.abs(values.detach().cpu().numpy())/L
#     return certificates   

In [8]:
def compute_certificate(images, model, L=1):    
    values, _ = K.top_k(model(images), k=2)
    certificates = (values[:, 0] - values[:, 1]) / (np.sqrt(2)*L)
    return certificates.detach().cpu().numpy()   

In [9]:
lip_radius = compute_certificate(images, vanilla_model)

In [10]:
lip_radius[:10]

array([1.8217466, 2.0785058, 2.117814 , 3.9895344, 3.5977595, 3.214799 ,
       2.146869 , 2.9655879, 3.3513803, 1.6075324], dtype=float32)

### Get Optimistic Radius via AutoAttacks

In [24]:
import keras.ops as K
import matplotlib.pyplot as plt
import torchattacks
import torch
import torch.nn as nn
import torchattacks
from robustbench.utils import clean_accuracy

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
lip_radius[:1]

array([1.4315052], dtype=float32)

In [26]:
import torch

In [29]:
var = torch.tensor(images[:1].cuda(), requires_grad=True)

y = vanilla_model(var)

  var = torch.tensor(images[:1].cuda(), requires_grad=True)


In [30]:
select_output = y[0, 0]
select_output.backward()
gradient_vanilla = var.grad.cpu().detach().numpy()

In [31]:
var_bis = torch.tensor(images[:1].cuda(), requires_grad=True)

y_bis = vanilla_model_bis(var_bis)

  var_bis = torch.tensor(images[:1].cuda(), requires_grad=True)


In [32]:
select_output = y_bis[0, 0]
select_output.backward()
gradient_bis = var_bis.grad.cpu().detach().numpy()

In [33]:
gradient_bis-gradient_vanilla

array([[[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

FGSM ne donne pas la même attaque sur les deux modèles

In [34]:
atk = torchattacks.FGSM(vanilla_model,eps=1)
adv_image_bis1 = atk(images[:1], labels[:1])

atk = torchattacks.FGSM(vanilla_model_bis,eps=1)
adv_image_bis2 = atk(images[:1], labels[:1])

(adv_image_bis2 - adv_image_bis1).min()

tensor(0., device='cuda:0')

Pareil PGD l2

In [35]:
atk = torchattacks.PGDL2(vanilla_model_bis, eps=2, alpha=2/5, steps=10, random_start=False)
adv_image_bis1 = atk(images[:1], labels[:1])

atk = torchattacks.PGDL2(vanilla_model, eps=2, alpha=2/5, steps=10, random_start=False)
adv_image_bis2 = atk(images[:1], labels[:1])

(adv_image_bis2 - adv_image_bis1).min()

tensor(0., device='cuda:0')

Attention quand on vient chercher si l'attaque a fonctionné il faut faire argmax sur les deux premiers neuronnes uniquement !

In [36]:
K.argmax(vanilla_model(adv_image_bis1)[:2], axis=1) == labels[:1]

tensor([False], device='cuda:0')

In [56]:
lip_radius[:1]

array([1.4315052], dtype=float32)

In [136]:
atk = torchattacks.AutoAttack(vanilla_model_bis, norm='L2', eps=1.521, version="standard", n_classes=2, verbose=True, seed=1)
adv_image_2 = atk(images[:1], labels[:1])

-------------------------- running L2-attack with epsilon 1.5210 --------------------------
initial accuracy: 100.00%
parameters:  10 2 1 1
iteration: 0 - Best loss: 0.146943
iteration: 1 - Best loss: 0.593290
iteration: 2 - Best loss: 0.689178
iteration: 3 - Best loss: 0.689902
iteration: 4 - Best loss: 0.689902
iteration: 5 - Best loss: 0.689902
iteration: 6 - Best loss: 0.689902
iteration: 7 - Best loss: 0.689902
iteration: 8 - Best loss: 0.689902
iteration: 9 - Best loss: 0.689902
restart 0 - robust accuracy: 100.00% - cum. time: 0.1 s
-------------------------- running L2-attack with epsilon 1.5210 --------------------------
initial accuracy: 100.00%
parameters:  10 2 1 1
iteration: 0 - Best loss: -0.000184
iteration: 1 - Best loss: -0.000021
iteration: 2 - Best loss: -0.000001
iteration: 3 - Best loss: -0.000000
iteration: 4 - Best loss: -0.000000
iteration: 5 - Best loss: -0.000000
iteration: 6 - Best loss: -0.000000
iteration: 7 - Best loss: -0.000000
iteration: 8 - Best loss: 

In [128]:
atk = torchattacks.AutoAttack(vanilla_model_bis, norm='L2', eps=1.522, version="standard", n_classes=4, verbose=True, seed=1)
adv_image_4 = atk(images[:1], labels[:1])

-------------------------- running L2-attack with epsilon 1.5220 --------------------------
initial accuracy: 100.00%
parameters:  10 2 1 1
iteration: 0 - Best loss: 0.146959
iteration: 1 - Best loss: 0.593803
iteration: 2 - Best loss: 0.689830
iteration: 3 - Best loss: 0.690554
iteration: 4 - Best loss: 0.690554
iteration: 5 - Best loss: 0.690554
iteration: 6 - Best loss: 0.690554
iteration: 7 - Best loss: 0.690554
iteration: 8 - Best loss: 0.690554
iteration: 9 - Best loss: 0.690554
restart 0 - robust accuracy: 100.00% - cum. time: 0.0 s
-------------------------- running L2-attack with epsilon 1.5220 --------------------------
initial accuracy: 100.00%
parameters:  10 2 1 1
iteration: 0 - Best loss: -0.000184
iteration: 1 - Best loss: -0.000021
iteration: 2 - Best loss: -0.000001
iteration: 3 - Best loss: 0.000000
iteration: 4 - Best loss: 0.000000
iteration: 5 - Best loss: 0.000000
iteration: 6 - Best loss: 0.000000
iteration: 7 - Best loss: 0.000000
iteration: 8 - Best loss: 0.000

In [137]:
(adv_image_4 - adv_image_2).max()

tensor(0.2026, device='cuda:0', grad_fn=<MaxBackward1>)

In [130]:
K.argmax(vanilla_model(adv_image_2), axis=1)

tensor([1], device='cuda:0', dtype=torch.int32)

In [131]:
K.argmax(vanilla_model(adv_image_4), axis=1) == labels[:1]

tensor([False], device='cuda:0')

In [132]:
labels[:1]

tensor([0], device='cuda:0')

In [133]:
vanilla_model_bis(adv_image)

tensor([[ 9.8077e-01, -1.0437e+00, -1.0000e+04, -1.0000e+04]], device='cuda:0',
       grad_fn=<AddBackward0>)

In [None]:
K.argmax(vanilla_model_bis(adv_image), axis=1)

tensor([2], device='cuda:0', dtype=torch.int32)

In [None]:
def single_compute_optimistic_radius_PGD(image, target, certificate, model, n_iter = 10):

    # we find a multiple of the certificate such as we find an adversarial attack
    max_iter = 30
    optimistic_radius = 0
    k=0
    while (optimistic_radius == 0):
        k = k+1
        eps_current = k*certificate[0]
        atk_van = torchattacks.PGDL2(model, eps=eps_current, alpha=eps_current/5, steps=10, random_start=True)
        adv_image = atk_van(image, target)
        # return 0 if the attack doesn't work
        optimistic_radius = (image - adv_image).square().sum(dim=(1, 2, 3)).sqrt() if (keras.ops.argmax(vanilla_model(adv_image), axis=1) != target) else 0
    print("We found an adversarial attack for eps=", k*certificate)
    initial_eps = k*certificate[0]
    # We use dichotomy algorithm to fine the smallest optimistic radius
    print("Launching Dichotomy :")
    d_up = k*certificate[0]
    d_low = 0
    attack_succeeded = False
    i= 0
    while (i<n_iter or attack_succeeded == False) and i<max_iter:
        eps_current = (d_up+d_low)/2
        atk_van = torchattacks.PGDL2(model, eps=eps_current, alpha=eps_current/5, steps=10, random_start=True)
        adv_image = atk_van(image, target)
        # return 0 if the attack doesn't work
        optimistic_radius = (image - adv_image).square().sum(dim=(1, 2, 3)).sqrt() if (keras.ops.argmax(vanilla_model(adv_image), axis=1) != target) else 0
        if optimistic_radius == 0:
            attack_succeeded = False
            d_low = eps_current
        else:
            attack_succeeded = True
            d_up = eps_current
        print(i, eps_current, attack_succeeded)
        i = i+1
    # traitement non convergence
    if i==max_iter:
        eps_current = initial_eps
    return eps_current


In [None]:
single_compute_optimistic_radius_PGD(images[2:3], labels[2:3], lip_radius[2:3], vanilla_model, n_iter = 10)

We found an adversarial attack for eps= [5.998551]
Launching Dichotomy :
0 2.9992754 False
1 4.4989133 True
2 3.7490945 True
3 3.374185 True
4 3.1867304 True
5 3.0930028 False
6 3.1398666 False
7 3.1632986 True
8 3.1515827 True
9 3.1457248 True


np.float32(3.1457248)

In [None]:
def single_compute_optimistic_radius_AA(image, target, certificate, model, n_iter = 10):
    # we find a multiple of the certificate such as we find an adversarial attack
    optimistic_radius = 0
    k=0
    while (optimistic_radius == 0):
        k = k+1
        eps_current = k*certificate[0]
        atk = torchattacks.AutoAttack(model, norm='L2', eps=eps_current, version="standard", n_classes=2)
        print(type(atk._autoattack))
        atk._autoattack.attacks.pop(1)
        print(atk._autoattack.attacks)
        # print(type(atk._attacks))
        # atk.apgd_t = False  # Désactive APGD targeted
        # atk.apgd_ce = True  # Active APGD Cross-Entropy
        # atk.apgd_mt = False  # Désactiver APGD Multi-targeted
        atk = torchattacks.AutoAttack(model, norm='L2', eps=eps_current, version="standard", n_classes=2)
        adv_image = atk(image, target)
        optimistic_radius = (image - adv_image).square().sum(dim=(1, 2, 3)).sqrt()
        print(optimistic_radius)
    print("We found an adversarial attack for eps=", k*certificate)

    # We use dichotomy algorithm to fine the smallest optimistic radius
    print("Launching Dichotomy :")
    d_up = k*certificate[0]
    d_low = 0
    attack_succeeded = False
    i= 0
    while (i<n_iter or attack_succeeded == False):
        eps_current = (d_up+d_low)/2
        atk = torchattacks.AutoAttack(model, norm='L2', eps=eps_current)
        adv_image = atk(image, target)
        optimistic_radius = (image - adv_image).square().sum(dim=(1, 2, 3)).sqrt()
        if optimistic_radius == 0:
            attack_succeeded = False
            d_low = eps_current
        else:
            attack_succeeded = True
            d_up = eps_current
        print(i, eps_current, attack_succeeded)
        i = i+1
    return eps_current

In [None]:
single_compute_optimistic_radius_AA(images[2:3], labels[2:3], lip_radius[2:3], vanilla_model, n_iter = 10)

<class 'torchattacks.wrappers.multiattack.MultiAttack'>
[APGD(model_name=Sequential, device=cuda:0, attack_mode=default, targeted=False, normalization_used=False, eps=2.9992754459381104, steps=10, norm=L2, n_restarts=1, seed=1744118861.6536021, loss=ce, eot_iter=1, thr_decr=0.75, verbose=False), FAB(model_name=Sequential, device=cuda:0, attack_mode=default, targeted=False, normalization_used=False, norm=L2, n_restarts=1, eps=2.9992754459381104, alpha_max=0.1, eta=1.05, beta=0.9, steps=10, verbose=False, seed=1744118861.6536927, target_class=None, multi_targeted=True, n_target_classes=1), Square(model_name=Sequential, device=cuda:0, attack_mode=default, targeted=False, normalization_used=False, norm=L2, n_queries=5000, eps=2.9992754459381104, p_init=0.8, n_restarts=1, seed=1744118861.6537397, verbose=False, loss=margin, rescale_schedule=True)]


IndexError: index -3 is out of bounds for dimension 1 with size 2

In [None]:
eps_PGD = []
eps_AA = []

In [None]:
for i in range(2,10):
    eps_AA.append(single_compute_optimistic_radius_AA(images[i:i+1], labels[i:i+1], lip_radius[i:i+1], vanilla_model, n_iter = 10))

IndexError: index -3 is out of bounds for dimension 1 with size 2

In [None]:
# for i in range(len(images)):
for i in range(10):
    eps_AA.append(single_compute_optimistic_radius_PGD(images[i:i+1], labels[i:i+1], lip_radius[i:i+1], vanilla_model, n_iter = 10))

We found an adversarial attack for eps= [2.3915052]
Launching Dichotomy :
0 1.1957526 False
1 1.7936289 False
2 2.092567 True
3 1.943098 False
4 2.0178325 False
5 2.0551996 True
6 2.0365162 False
7 2.045858 False
8 2.0505288 True
9 2.0481935 False
10 2.0493612 True
We found an adversarial attack for eps= [0.47378108]
Launching Dichotomy :
0 0.23689054 False
1 0.3553358 False
2 0.41455844 False
3 0.44416976 False
4 0.45897543 True
5 0.4515726 True
6 0.44787118 True
7 0.44602048 True
8 0.44509512 False
9 0.4455578 False
10 0.44578916 True
We found an adversarial attack for eps= [1.2187829]
Launching Dichotomy :
0 0.60939145 False
1 0.9140872 False
2 1.0664351 False
3 1.142609 False
4 1.180696 False
5 1.1997395 True
6 1.1902177 True
7 1.1854569 False
8 1.1878374 False
9 1.1890275 False
10 1.1896226 False
11 1.1899202 True
We found an adversarial attack for eps= [1.2469541]
Launching Dichotomy :
0 0.62347704 False
1 0.9352156 False
2 1.0910848 True
3 1.0131502 False
4 1.0521176 False
5 1.0

In [None]:
# Print results
print("Image #     Certificate     Distance to adversarial")
print("---------------------------------------------------")
for i in range(10):
    print(f"Image {i}        {lip_radius[i]:.3f}                {eps_AA[i]:.2f}")

Image #     Certificate     Distance to adversarial
---------------------------------------------------
Image 0        0.342                2.01
Image 1        0.068                0.45
Image 2        0.203                2.05
Image 3        0.178                0.45
Image 4        0.294                2.02
Image 5        0.296                0.46
Image 6        0.070                1.21
Image 7        0.274                1.08
Image 8        0.219                1.71
Image 9        0.580                1.72


In [None]:
eps_AA

[np.float32(2.0128279),
 np.float32(0.44785672),
 np.float32(2.0478597),
 np.float32(0.45296064)]

### Generating Dataframe

In [None]:
import pandas as pd

In [None]:
# penser à tout convertir en numpy
total_points = 200

# Création du DataFrame avec une colonne d'index de 1 à 200
df = pd.DataFrame({
    'Index': np.arange(1, total_points + 1),
    'Label_GT': labels,  
    'Label_Predit': torch.argmax(vanilla_model(images), dim=1),  
    'Constante_Lipschitz': np.ones(total_points), 
    'Epsilon_Robuste': lip_radius,
    'Epsilon_Adv_AA': np.random.rand(total_points),
    'Epsilon_Adv_PGD': np.random.rand(total_points)
})

# Affichage des premières lignes du DataFrame
print(df.head())

   Index  Label_GT  Label_Predit  Constante_Lipschitz  Epsilon_Robuste  \
0      1         6             3             0.350912         0.372034   
1      2         6             9             0.191955         0.136486   
2      3         8             5             0.192445         0.531047   
3      4         7             7             0.536921         0.979334   
4      5         9             6             0.710001         0.329805   

   Epsilon_Adv_AA  Epsilon_Adv_PGD  
0        0.223536         0.717368  
1        0.694969         0.661540  
2        0.109589         0.149326  
3        0.061458         0.299948  
4        0.087260         0.351164  
