In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from advertorch.context import ctx_noparamgrad_and_eval
from advertorch.attacks import LinfPGDAttack
import numpy as np
import random
import vgg

cifar10_train = datasets.CIFAR10("data", train=True, download=True, transform=transforms.ToTensor())
cifar10_test = datasets.CIFAR10("data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(cifar10_train, batch_size=100, shuffle=True)
test_loader = DataLoader(cifar10_test, batch_size=100, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [2]:
def epoch(loader, model, opt=None):
    total_loss, total_err = 0.,0.
    
    # Training Mode
    if opt:
        model.train()
        
        for X, y in loader:        
            X, y = X.to(device), y.to(device)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
            opt.zero_grad()
            loss.backward()
            opt.step()

            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
        
    # Evaluation Mode
    else:
        model.eval()
    
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
            
            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
        
    return total_err / len(loader.dataset), total_loss / len(loader.dataset)

In [3]:
def epoch_adversarial(loader, model, attack, opt=None, attack_prob=None, **kwargs):
    total_loss, total_err = 0.,0.
    
    # Training Mode
    if opt:
        model.train()
        
        for i, data in enumerate(loader):        
            X, y = data
            X, y = X.to(device), y.to(device)
            
            # attack with a certain prob
            if attack_prob and random.random() <= attack_prob:
                with ctx_noparamgrad_and_eval(model):
                    X = attack.perturb(X, y)
            
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
            opt.zero_grad()
            loss.backward()
            opt.step()
            
            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
        
    # Evaluation Mode
    else:
        model.eval()
        
        for i, data in enumerate(loader):
            X, y = data
            X, y = X.to(device), y.to(device)
            X = attack.perturb(X, y)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
        
            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
            
    return total_err / len(loader.dataset), total_loss / len(loader.dataset)

### Experiment 0: Training a Clean VGG16 on CIFAR10

In [4]:
torch.manual_seed(0)
np.random.seed(0)

model_clean = vgg.__dict__["vgg16"]()
model_clean = nn.DataParallel(model_clean)
model_clean.to(device)
opt = optim.SGD(model_clean.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_clean, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch(train_loader, model_clean, opt)
    test_err, test_loss = epoch(test_loader, model_clean)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_clean, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_clean.state_dict(), "stochastic_clean.pt")

0.891740,0.845200,0.907100
0.840900,0.853700,0.970600
0.770560,0.787900,0.968000
0.684820,0.632500,0.986600
0.607000,0.582100,0.995800
0.532240,0.474900,0.997600
0.451360,0.395000,0.997800
0.382500,0.367600,0.998400
0.328140,0.325300,0.998600
0.281540,0.294000,0.999300
0.242060,0.286500,0.995100
0.207160,0.274400,0.998900
0.176620,0.253200,0.990300
0.150920,0.248800,0.978400
0.128440,0.263600,0.980000
0.108860,0.236300,0.935200
0.091960,0.235800,0.953000
0.078200,0.238900,0.920200
0.064960,0.240200,0.877700
0.056180,0.232900,0.885600
0.046300,0.225800,0.861300
0.037260,0.231000,0.834900
0.037800,0.234900,0.862500
0.030040,0.226700,0.812000
0.024660,0.235300,0.847500
0.023680,0.224200,0.786800
0.020220,0.238800,0.842500
0.020880,0.226000,0.781800
0.017560,0.220800,0.751300
0.016240,0.219500,0.754000
0.016700,0.221200,0.749900
0.012780,0.212500,0.716900
0.015860,0.215200,0.707100
0.010020,0.220500,0.753000
0.012320,0.226000,0.739000
0.010200,0.221400,0.726300
0.011400,0.216500,0.689100
0

### Experiment 1: Stochastic Adversarial Training with 20% Attacked Probability

In [4]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked1 = vgg.__dict__["vgg16"]()
model_attacked1 = nn.DataParallel(model_attacked1)
model_attacked1.to(device)
opt = optim.SGD(model_attacked1.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked1, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked1, attack, opt, 0.2)
    test_err, test_loss = epoch(test_loader, model_attacked1)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked1, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked1.state_dict(), "stochastic_attacked1.pt")

0.891660,0.872900,0.900700
0.891900,0.900000,0.900000
0.883980,0.835300,0.888700
0.856780,0.867900,0.902200
0.813020,0.897100,0.903900
0.776940,0.690600,0.847500
0.703360,0.697200,0.901700
0.661420,0.577700,0.893400
0.629760,0.569200,0.908000
0.597300,0.506500,0.851500
0.568220,0.478300,0.849100
0.543560,0.455700,0.864300
0.514280,0.436200,0.885500
0.472540,0.398600,0.889200
0.464460,0.411000,0.860300
0.435200,0.373300,0.880600
0.423100,0.377500,0.865400
0.394220,0.354400,0.872300
0.367820,0.364600,0.892400
0.360420,0.334800,0.882400
0.315600,0.348800,0.886200
0.293380,0.365400,0.885000
0.290080,0.333400,0.878400
0.253440,0.328300,0.876100
0.240940,0.323400,0.867500
0.242320,0.329400,0.874700
0.233120,0.310200,0.887500
0.201820,0.316500,0.889400
0.182300,0.320800,0.876100
0.170960,0.323800,0.875700
0.162540,0.319500,0.886500
0.169700,0.318100,0.878500
0.168140,0.358300,0.871700
0.176200,0.324600,0.861900
0.151040,0.323400,0.864100
0.146060,0.347400,0.883700
0.157460,0.317600,0.861100
0

### Experiment 2: Stochastic Adversarial Training with 40% Attacked Probability

In [5]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked2 = vgg.__dict__["vgg16"]()
model_attacked2 = nn.DataParallel(model_attacked2)
model_attacked2.to(device)
opt = optim.SGD(model_attacked2.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked2, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked2, attack, opt, 0.4)
    test_err, test_loss = epoch(test_loader, model_attacked2)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked2, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked2.state_dict(), "stochastic_attacked2.pt")

0.895620,0.900000,0.900000
0.888580,0.899700,0.900000
0.900900,0.900000,0.899900
0.901900,0.893500,0.903100
0.894720,0.900400,0.900400
0.900200,0.900000,0.900000
0.880800,0.779900,0.875000
0.865740,0.801000,0.861500
0.865780,0.785000,0.859200
0.817280,0.825900,0.859700
0.778580,0.686700,0.809800
0.745660,0.637000,0.815200
0.723680,0.603600,0.798600
0.694100,0.558600,0.815600
0.670500,0.587800,0.846600
0.659840,0.578900,0.800800
0.635220,0.516100,0.802400
0.622920,0.498600,0.784100
0.603140,0.486400,0.808800
0.587740,0.456400,0.807900
0.568200,0.437600,0.829100
0.560960,0.412200,0.838600
0.534280,0.444000,0.823900
0.539920,0.413800,0.818400
0.507300,0.402500,0.829700
0.506800,0.403500,0.805700
0.481340,0.377700,0.854300
0.461240,0.409800,0.822100
0.457700,0.376300,0.821100
0.441340,0.360300,0.846200
0.434300,0.409100,0.826700
0.408640,0.372200,0.830700
0.411300,0.357500,0.834900
0.361280,0.359700,0.834100
0.370920,0.365300,0.846200
0.353560,0.357600,0.841300
0.346200,0.363400,0.832800
0

### Experiment 3: Stochastic Adversarial Training with 60% Attacked Probability

In [6]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked3 = vgg.__dict__["vgg16"]()
model_attacked3 = nn.DataParallel(model_attacked3)
model_attacked3.to(device)
opt = optim.SGD(model_attacked3.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked3, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked3, attack, opt, 0.6)
    test_err, test_loss = epoch(test_loader, model_attacked3)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked3, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked3.state_dict(), "stochastic_attacked3.pt")

0.899640,0.858200,0.884400
0.890140,0.861400,0.885600
0.899380,0.888700,0.897100
0.894360,0.839800,0.875100
0.859080,0.770500,0.828500
0.830380,0.784700,0.837500
0.796180,0.714200,0.815900
0.773720,0.686800,0.797400
0.758400,0.653000,0.790300
0.745520,0.637400,0.785000
0.725960,0.591100,0.801500
0.718800,0.592900,0.763700
0.704680,0.558400,0.784500
0.697740,0.551300,0.780500
0.680380,0.577800,0.781000
0.669780,0.507200,0.766900
0.662760,0.496800,0.790300
0.647200,0.483500,0.770800
0.638640,0.484300,0.761100
0.629460,0.503300,0.805400
0.620920,0.456800,0.779500
0.616820,0.436100,0.774800
0.606240,0.443400,0.762200
0.598380,0.439700,0.766500
0.585180,0.419600,0.778100
0.570840,0.423600,0.775800
0.560080,0.522100,0.832100
0.548760,0.427700,0.779800
0.541320,0.434600,0.760900
0.525600,0.427300,0.762800
0.513240,0.392900,0.799900
0.509420,0.390200,0.811000
0.488920,0.406000,0.794800
0.488220,0.431000,0.776600
0.471800,0.451300,0.825700
0.450520,0.395300,0.797200
0.446480,0.404900,0.805400
0

### Experiment 4: Stochastic Adversarial Training with 80% Attacked Probability

In [4]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked4 = vgg.__dict__["vgg16"]()
model_attacked4 = nn.DataParallel(model_attacked4)
model_attacked4.to(device)
opt = optim.SGD(model_attacked4.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked4, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked4, attack, opt, 0.8)
    test_err, test_loss = epoch(test_loader, model_attacked4)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked4, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked4.state_dict(), "stochastic_attacked4.pt")

0.902500,0.894000,0.901900
0.879380,0.900000,0.900000
0.902100,0.900000,0.900000
0.904020,0.900000,0.900000
0.899960,0.900200,0.900400
0.900740,0.900000,0.900000
0.902400,0.900000,0.900000
0.900020,0.900000,0.900000
0.901400,0.900000,0.900000
0.902260,0.900000,0.900000
0.899880,0.900000,0.900000
0.900880,0.900000,0.900000
0.904000,0.900000,0.900000
0.900340,0.900000,0.900000
0.900760,0.900000,0.900000
0.886580,0.900000,0.900000
0.868320,0.900000,0.900000
0.866840,0.900000,0.900000
0.838080,0.761200,0.814900
0.802540,0.753700,0.812200
0.787480,0.728700,0.800200
0.776500,0.665900,0.786100
0.765500,0.684400,0.781100
0.761900,0.642200,0.757900
0.754160,0.624700,0.768400
0.747320,0.632800,0.774000
0.737400,0.585000,0.762600
0.728820,0.558900,0.758500
0.723040,0.566000,0.749100
0.715080,0.543500,0.750300
0.709180,0.574400,0.749200
0.696880,0.535100,0.741900
0.690200,0.545200,0.765000
0.686440,0.538900,0.724300
0.672860,0.502500,0.749000
0.673820,0.505800,0.736500
0.663560,0.507300,0.740100
0

### Experiment 5: Stochastic Adversarial Training with 100% Attacked Probability

In [5]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked5 = vgg.__dict__["vgg16"]()
model_attacked5 = nn.DataParallel(model_attacked5)
model_attacked5.to(device)
opt = optim.SGD(model_attacked5.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked5, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked5, attack, opt, 1.0)
    test_err, test_loss = epoch(test_loader, model_attacked5)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked5, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked5.state_dict(), "stochastic_attacked5.pt")

0.903420,0.900000,0.900000
0.894160,0.837400,0.852600
0.892960,0.899900,0.900000
0.875420,0.811900,0.842500
0.836680,0.753400,0.805500
0.827180,0.756300,0.818300
0.809380,0.714800,0.797100
0.794100,0.710300,0.786000
0.788840,0.687600,0.783600
0.783140,0.716800,0.794400
0.777680,0.671800,0.768700
0.770620,0.661400,0.761000
0.769440,0.653100,0.756300
0.762760,0.639600,0.757300
0.758040,0.634500,0.751900
0.752920,0.639700,0.748300
0.749980,0.607900,0.740700
0.744340,0.602200,0.745000
0.740280,0.596900,0.733600
0.734180,0.591200,0.732500
0.730640,0.573400,0.740800
0.725300,0.546700,0.735900
0.720660,0.578700,0.730400
0.714760,0.548900,0.723300
0.711060,0.544000,0.722700
0.705700,0.556500,0.726300
0.697880,0.538900,0.736500
0.693160,0.518300,0.723100
0.687600,0.539900,0.719200
0.681460,0.524100,0.715500
0.676240,0.531900,0.725000
0.670580,0.521000,0.728900
0.661180,0.524400,0.717400
0.655880,0.529200,0.736800
0.650320,0.499800,0.736600
0.647020,0.491300,0.729900
0.635400,0.495500,0.736500
0