In [1]:
import sys
sys.path.insert(0, '../')

import os
import torch
import numpy as np
import pandas as pd

from resnet import ResNet18
from train_resnet import get_cifar10_data

In [2]:
# %%capture

ALL_ATTACKS = [
    'nothing',
    'linf-gold-eps0.001-step10',
    'linf-gold-eps0.01-step10',
    'linf-gold-eps0.1-step10',
    'l2-gold-eps0.001-step10',
    'l2-gold-eps0.01-step10',
    'l2-gold-eps0.1-step10',
]

ALL_DEFENSES = [
    'nothing',
    'linf-eps0.001-step10',
    'linf-eps0.01-step10',
    'linf-eps0.1-step10',
    'l2-eps0.001-step10',
    'l2-eps0.01-step10',
    'l2-eps0.1-step10',
]

PREFIX = '../data/projected/'

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
resnet = ResNet18().to(device)
resnet.load_state_dict(torch.load('../logdirs/cifar10-40k_best.pt'))
resnet.eval()

_, _, testset = get_cifar10_data(bs=None, as_loader=False, path='../data')
testset = torch.utils.data.Subset(testset, list(range(1000)))
testlabels = [tup[1] for tup in testset]
testlabels = torch.stack(testlabels).to(device)


Files already downloaded and verified
Files already downloaded and verified


In [3]:
data = {}

for attack_name in ALL_ATTACKS:
    for defense_name in ALL_DEFENSES:
        
        fname = 'attack-{}-defense-{}.pt'.format(attack_name, defense_name)
#         print("Eval for {}".format(fname))
        
        projections = torch.load(os.path.join(PREFIX, fname))['projections']
        projections = torch.from_numpy(projections).to(device)
        
        proj_with_labels = torch.utils.data.TensorDataset(projections, testlabels)
        loader = torch.utils.data.DataLoader(proj_with_labels, batch_size=64)
        
        total_correct = 0
        total_tried = 0
        
        with torch.no_grad():
            for inp, target in loader:
                output = torch.argmax(resnet(inp), dim=1)
                total_correct += torch.sum(output == target).item()
                total_tried += target.shape[0]
        
        assert total_tried == 1000
        data[(attack_name, defense_name)] = total_correct / float(total_tried)
        

In [4]:
df_data = []

for attack_name in ALL_ATTACKS:
    
    data_row = [attack_name + '-attack']
    
    for defense_name in ALL_DEFENSES:
        data_row.append(data[(attack_name, defense_name)])
        
    df_data.append(data_row)
    
columns = [''] + [defense + '-defense' for defense in ALL_DEFENSES]
    
df = pd.DataFrame(df_data, columns=columns)

df

Unnamed: 0,Unnamed: 1,nothing-defense,linf-eps0.001-step10-defense,linf-eps0.01-step10-defense,linf-eps0.1-step10-defense,l2-eps0.001-step10-defense,l2-eps0.01-step10-defense,l2-eps0.1-step10-defense
0,nothing-attack,0.87,0.096,0.111,0.118,0.1,0.106,0.102
1,linf-gold-eps0.001-step10-attack,0.718,0.099,0.112,0.112,0.102,0.116,0.102
2,linf-gold-eps0.01-step10-attack,0.133,0.092,0.085,0.095,0.101,0.103,0.122
3,linf-gold-eps0.1-step10-attack,0.002,0.092,0.108,0.102,0.093,0.106,0.104
4,l2-gold-eps0.001-step10-attack,0.869,0.118,0.092,0.127,0.1,0.103,0.089
5,l2-gold-eps0.01-step10-attack,0.836,0.101,0.093,0.094,0.101,0.101,0.105
6,l2-gold-eps0.1-step10-attack,0.463,0.105,0.11,0.085,0.092,0.091,0.115
