# Notebook contains summary of adversarial robustness of different model

In [1]:
import sys, os
d = os.path.dirname(os.getcwd())
sys.path.insert(0, d)

In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

import torch.nn as nn
import torch.nn.functional as F
from torch import optim

import torchvision
from torchvision.datasets import CIFAR10
from torchvision.transforms import transforms

from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from utils.attacks import fgsm, pgd
from utils.utils import plot_images
from utils.train_utils import eval_epoch
from models.resnet import ResNet, SparseResNet
from models.resnet_FReLU import FResNet, FSparseResNet

1.7.0
True


In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [4]:
test_transform = transforms.Compose([
    transforms.ToTensor(),
])

ds = CIFAR10('../data', train=False, target_transform=None, download=True, transform=test_transform)
test_loader = DataLoader(ds, 1000, shuffle=True)
for X, y in test_loader:
    X, y = X.to(device), y.to(device)
    break

Files already downloaded and verified


In [5]:
def clean_acc(model, loader):
    clean_correct = 0
    for X, y in loader:
        X, y = X.to(device), y.to(device)  
        clean_correct += (model(X).max(dim=1)[1] == y).sum().item()

    return round(clean_correct / len(loader.dataset), 4)

def adv_acc(model, loader, loss_fn, attack_method, attack_params):
    adv_correct = [0] * len(attack_params)
    for X, y in loader:
        X, y = X.to(device), y.to(device)
        for i, p in enumerate(attack_params):
            noise = attack_method(model, X, y, loss_fn, epsilon=p["epsilon"], 
                                  alpha=p["alpha"], num_iter=p["num_iter"])
            adv_correct[i] += (model(X+noise).max(dim=1)[1] == y).sum().item()

    return [round(a/len(loader.dataset), 4) for a in adv_correct]

In [6]:
attack1 = {
    "id": 0,
    "name": "eps=8,a=2,i=10",
    "epsilon": 8/255,
    "alpha": 2/255,
    "num_iter": 10
}

attack2 = {
    "id": 1,
    "name": "eps=4,a=2,i=10",
    "epsilon": 4/255,
    "alpha": 2/255,
    "num_iter": 10
}

params = [attack1, attack2]

models = ['ResNet', 'SparseResNet', 'Robust ResNet', 'Robust SparseResNet', 'ResNet_FReLU', 'SparseResNet_FReLU']
pd_clean, pd_adv = [], []

# Models trained on clean images
- ResNet
    - 8 convolutional layers
    - 5 linear layers
    - activation ReLU after all layers
- SparseResNet
    - 8 convolutional layers
        - with KWiners2d actiation where k set to 10%
    - 5 **sparse** linear layers, only 10% connections is active
        - with KWiners actiation where k set to 10%

**Model ResNet**

In [7]:
model = ResNet().to(device)
model.load_state_dict(torch.load("../saved/resnet.pt", map_location=device))

<All keys matched successfully>

In [8]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.9226
eps=8,a=2,i=10: 0.002
eps=4,a=2,i=10: 0.0991


**SparseResNet**

In [9]:
model = SparseResNet().to(device)
model.load_state_dict(torch.load("../saved/sparse_resnet.pt", map_location=device))

<All keys matched successfully>

In [10]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.9037
eps=8,a=2,i=10: 0.3083
eps=4,a=2,i=10: 0.5418


In [11]:
d = {
    'model': models[:2], 
    'clean img accuracy': pd_clean, 
}
pd_adv = np.array(pd_adv)
for p in params:
    d[p["name"]] = pd_adv[:, p["id"]]
    
df = pd.DataFrame(data=d)
df

Unnamed: 0,model,clean img accuracy,"eps=8,a=2,i=10","eps=4,a=2,i=10"
0,ResNet,0.9226,0.002,0.0991
1,SparseResNet,0.9037,0.3083,0.5418


### Clean Images

- both models, ResNet and his sparse version has high accuracy on clean image, which was over 90%

### White Box Attack

- model **ResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 0.2%
    - attack pgd (epsilon=4/255) lowered accuracy to about 1%
    
 
- model **SparseResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 30%
    - attack pgd (epsilon=4/255) lowered accuracy to about 54%
    
    
#### As we can see sparse version of ResNet done a great job, so let see what happens when on adversarial training

# Adversarial trained models

### attack pgd wth params:
- epsilon = 8/255
- alpha = 2/255
- num_iter = 10

In [12]:
pd_clean, pd_adv = [], []

**Robust ResNet**

In [13]:
model = ResNet().to(device)
model.load_state_dict(torch.load("../saved/resnet_robust_eps8.pt", map_location=device))

<All keys matched successfully>

In [14]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.729
eps=8,a=2,i=10: 0.4411
eps=4,a=2,i=10: 0.5959


**Robust SparseResNet**

In [15]:
model = SparseResNet().to(device)
model.load_state_dict(torch.load("../saved/sparse_resnet_robust_eps8.pt", map_location=device))

<All keys matched successfully>

In [16]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.7441
eps=8,a=2,i=10: 0.7108
eps=4,a=2,i=10: 0.743


In [17]:
d = {
    'model': models[2:4], 
    'clean img accuracy': pd_clean, 
}
pd_adv = np.array(pd_adv)
for p in params:
    d[p["name"]] = pd_adv[:, p["id"]]
    
df1 = pd.DataFrame(data=d)
df1

Unnamed: 0,model,clean img accuracy,"eps=8,a=2,i=10","eps=4,a=2,i=10"
0,Robust ResNet,0.729,0.4411,0.5959
1,Robust SparseResNet,0.7441,0.7108,0.743


- models were trained adversarialy with attack pgd with params (epsilon=8/255, alpha=2/255, iter=10)

### Clean Images

- Adversarial training gave accuracy over 70% in both case

### White Box Attack

- model **ResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 44%
    - attack pgd (epsilon=4/255) lowered accuracy to about 59%


- model **SparseResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 71%
    - attack pgd (epsilon=4/255) lowered accuracy to about 74%


# Adversarial trained models
### attack pgd with params
- epsilon = 4/255
- alpha = 2/255
- num_iter = 10

In [18]:
pd_clean, pd_adv = [], []

**Robust ResNet**

In [19]:
model = ResNet().to(device)
model.load_state_dict(torch.load("../saved/resnet_robust_eps4.pt", map_location=device))

<All keys matched successfully>

In [20]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.8227
eps=8,a=2,i=10: 0.3617
eps=4,a=2,i=10: 0.6169


**Robust SparseResNet**

In [21]:
model = SparseResNet().to(device)
model.load_state_dict(torch.load("../saved/sparse_resnet_robust_eps4.pt", map_location=device))

<All keys matched successfully>

In [22]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.8318
eps=8,a=2,i=10: 0.5817
eps=4,a=2,i=10: 0.7176


In [23]:
d = {
    'model': models[2:4], 
    'clean img accuracy': pd_clean, 
}
pd_adv = np.array(pd_adv)
for p in params:
    d[p["name"]] = pd_adv[:, p["id"]]
    
df2 = pd.DataFrame(data=d)
df2

Unnamed: 0,model,clean img accuracy,"eps=8,a=2,i=10","eps=4,a=2,i=10"
0,Robust ResNet,0.8227,0.3617,0.6169
1,Robust SparseResNet,0.8318,0.5817,0.7176


- models were trained adversarialy with attack pgd with params (epsilon=4/255, alpha=2/255, iter=10)

### Clean Images

- Adversarial training gave accuracy over 83% in both case

### White Box Attack

- model **ResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 36%
    - attack pgd (epsilon=4/255) lowered accuracy to about 61%


- model **SparseResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 58%
    - attack pgd (epsilon=4/255) lowered accuracy to about 71%


# Models with activation function FlattenReLU
- ResNet
    - 8 convolutional layers
        - ReLU after all layers
    - 5 linear layers
        - **FalttenReLU** after all layers
- SparseResNet
    - 8 convolutional layers
        - with KWiners2d actiation where k set to 10%
    - 5 sparse linear layers, only 10% connections is active
        - **FalttenReLU** after all layers

In [24]:
pd_clean, pd_adv = [], []

In [25]:
model = FResNet().to(device)
model.load_state_dict(torch.load("../saved/resnet_FReLU.pt", map_location=device))

<All keys matched successfully>

In [26]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.9016
eps=8,a=2,i=10: 0.0059
eps=4,a=2,i=10: 0.1597


In [27]:
model = FSparseResNet().to(device)
model.load_state_dict(torch.load("../saved/sparse_resnet_FReLU.pt", map_location=device))

<All keys matched successfully>

In [28]:
acc_clean = clean_acc(model, test_loader)
pd_clean.append(acc_clean)

acc_adver = adv_acc(model, test_loader, nn.CrossEntropyLoss(), pgd, params)
pd_adv.append(acc_adver)

print(f"clean accuracy: {acc_clean}")
for a, p in zip(acc_adver, params):
    print(f"{p['name']}: {a}")

clean accuracy: 0.8516
eps=8,a=2,i=10: 0.4364
eps=4,a=2,i=10: 0.5583


In [29]:
d = {
    'model': models[4:], 
    'clean img accuracy': pd_clean, 
}
pd_adv = np.array(pd_adv)
for p in params:
    d[p["name"]] = pd_adv[:, p["id"]]
    
df3 = pd.DataFrame(data=d)
df3

Unnamed: 0,model,clean img accuracy,"eps=8,a=2,i=10","eps=4,a=2,i=10"
0,ResNet_FReLU,0.9016,0.0059,0.1597
1,SparseResNet_FReLU,0.8516,0.4364,0.5583


### Clean Images

- Adversarial training gave accuracy over 85% in both case

### White Box Attack

- model **ResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 0.5%
    - attack pgd (epsilon=4/255) lowered accuracy to about 15%


- model **SparseResNet**
    - attack pgd (epsilon=8/255) lowered accuracy to about 43%
    - attack pgd (epsilon=4/255) lowered accuracy to about 55%
