In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from advertorch.context import ctx_noparamgrad_and_eval
from advertorch.attacks import LinfPGDAttack
import numpy as np
import random
import vgg

cifar10_train = datasets.CIFAR10("data", train=True, download=True, transform=transforms.ToTensor())
cifar10_test = datasets.CIFAR10("data", train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(cifar10_train, batch_size=100, shuffle=True)
test_loader = DataLoader(cifar10_test, batch_size=100, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [2]:
def epoch(loader, model, opt=None):
    total_loss, total_err = 0.,0.
    
    # Training Mode
    if opt:
        model.train()
        
        for X, y in loader:        
            X, y = X.to(device), y.to(device)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
            opt.zero_grad()
            loss.backward()
            opt.step()

            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
        
    # Evaluation Mode
    else:
        model.eval()
    
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
            
            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
        
    return total_err / len(loader.dataset), total_loss / len(loader.dataset)

In [3]:
def epoch_adversarial(loader, model, attack, opt=None, train_prob=None, **kwargs):
    total_loss, total_err = 0.,0.
    processed_data_size = 0
    
    # Training Mode
    if opt:
        model.train()
        
        for i, data in enumerate(loader):
            
            # train using adversarial images with a certain prob
            if train_prob and random.random() <= train_prob:
                X, y = data
                X, y = X.to(device), y.to(device)
                with ctx_noparamgrad_and_eval(model):
                    X = attack.perturb(X, y)
                yp = model(X)
                loss = nn.CrossEntropyLoss()(yp, y)
                opt.zero_grad()
                loss.backward()
                opt.step()
            
                total_err += (yp.max(dim=1)[1] != y).sum().item()
                total_loss += loss.item() * X.shape[0]
                processed_data_size += loader.batch_size
        
    # Evaluation Mode
    else:
        model.eval()
        
        for i, data in enumerate(loader):
            X, y = data
            X, y = X.to(device), y.to(device)
            X = attack.perturb(X, y)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
        
            total_err += (yp.max(dim=1)[1] != y).sum().item()
            total_loss += loss.item() * X.shape[0]
            processed_data_size += loader.batch_size
            
    return total_err / processed_data_size, total_loss / processed_data_size

### Experiment 0: Training a Clean VGG16 on CIFAR10

In [4]:
torch.manual_seed(0)
np.random.seed(0)

model_clean = vgg.__dict__["vgg16"]()
model_clean = nn.DataParallel(model_clean)
model_clean.to(device)
opt = optim.SGD(model_clean.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_clean, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch(train_loader, model_clean, opt)
    test_err, test_loss = epoch(test_loader, model_clean)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_clean, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_clean.state_dict(), "transferFromClean_clean.pt")

0.890440,0.900000,0.900000
0.882520,0.900000,0.900000
0.831180,0.841200,0.926400
0.750760,0.679400,0.983100
0.679600,0.640100,0.995800
0.609220,0.548700,0.995500
0.527440,0.448900,0.996700
0.442060,0.432100,0.994700
0.376500,0.346100,0.999600
0.320500,0.314100,0.999700
0.275820,0.287000,0.998900
0.239780,0.299800,0.999100
0.201460,0.280100,0.996300
0.174040,0.254600,0.985200
0.150280,0.247600,0.971600
0.127420,0.263100,0.955600
0.109640,0.231800,0.946800
0.092240,0.235000,0.919500
0.075480,0.242100,0.918800
0.064780,0.242900,0.923600
0.056820,0.243200,0.877300
0.045880,0.235500,0.862300
0.041060,0.249000,0.822700
0.036420,0.229800,0.811600
0.029940,0.229000,0.798000
0.028020,0.242800,0.833400
0.025700,0.226700,0.824800
0.021320,0.218000,0.769200
0.019120,0.229400,0.786700
0.018740,0.219600,0.734000
0.015720,0.225700,0.749200
0.013500,0.216100,0.695800
0.013320,0.219900,0.709900
0.011880,0.217900,0.686700
0.013380,0.227800,0.716200
0.009320,0.223200,0.688000
0.011460,0.213100,0.673600
0

### Experiment 1: Transfer Learning Trained with 20% Attacked Images

In [5]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked1 = vgg.__dict__["vgg16"]()
model_attacked1 = nn.DataParallel(model_attacked1)
model_attacked1.load_state_dict(torch.load("transferFromClean_clean.pt"))
model_attacked1.to(device)
opt = optim.SGD(model_attacked1.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked1, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked1, attack, opt, 0.2)
    test_err, test_loss = epoch(test_loader, model_attacked1)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked1, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked1.state_dict(), "transferFromClean_attacked1.pt")

0.901099,0.900000,0.900000
0.907115,0.857600,0.908700
0.899907,0.900000,0.900000
0.901262,0.900000,0.900000
0.899072,0.900000,0.900000
0.885789,0.838600,0.877600
0.873500,0.846500,0.865600
0.861613,0.830700,0.848300
0.856237,0.802900,0.839500
0.843737,0.819100,0.848200
0.847619,0.799400,0.838400
0.845385,0.805200,0.843300
0.840841,0.793700,0.840900
0.832522,0.763600,0.820200
0.832252,0.756500,0.824200
0.826489,0.756000,0.815700
0.822258,0.762300,0.828700
0.814259,0.716700,0.802900
0.812427,0.754000,0.826600
0.810926,0.700400,0.790800
0.804271,0.709400,0.795800
0.806019,0.706200,0.799000
0.803077,0.701500,0.783500
0.801932,0.693300,0.787200
0.802532,0.724700,0.803800
0.798085,0.694700,0.791000
0.794839,0.689100,0.784600
0.789647,0.695800,0.781300
0.792157,0.685300,0.780700
0.793542,0.688100,0.780900
0.786286,0.685000,0.786500
0.793053,0.687700,0.779300
0.789623,0.683500,0.778600
0.786275,0.687400,0.782300
0.785294,0.698400,0.791100
0.783619,0.727000,0.801100
0.778990,0.690800,0.779800
0

### Experiment 2: Transfer Learning Trained with 40% Attacked Images

In [6]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked2 = vgg.__dict__["vgg16"]()
model_attacked2 = nn.DataParallel(model_attacked2)
model_attacked2.load_state_dict(torch.load("transferFromClean_clean.pt"))
model_attacked2.to(device)
opt = optim.SGD(model_attacked2.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked2, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked2, attack, opt, 0.4)
    test_err, test_loss = epoch(test_loader, model_attacked2)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked2, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked2.state_dict(), "transferFromClean_attacked2.pt")

0.901702,0.900000,0.900000
0.902919,0.900000,0.900000
0.896733,0.884500,0.899600
0.861714,0.820200,0.848200
0.845766,0.800500,0.835100
0.838874,0.798200,0.841000
0.829379,0.744500,0.826300
0.821340,0.715200,0.801900
0.816782,0.715800,0.801100
0.810969,0.713000,0.801200
0.804631,0.697800,0.789200
0.797277,0.690900,0.782100
0.795950,0.696000,0.788900
0.791547,0.678100,0.784600
0.789787,0.693000,0.778100
0.792147,0.681200,0.782200
0.784175,0.676100,0.780000
0.787150,0.676700,0.776200
0.777671,0.665000,0.777300
0.776000,0.661600,0.770800
0.777500,0.654900,0.776800
0.774231,0.643400,0.760600
0.775810,0.643000,0.763700
0.768989,0.641400,0.761000
0.769899,0.646300,0.772200
0.767128,0.651900,0.766200
0.766319,0.612900,0.765600
0.762654,0.619600,0.751800
0.761579,0.623900,0.755300
0.764000,0.608100,0.757300
0.758293,0.643000,0.765400
0.759519,0.613500,0.745500
0.758627,0.621800,0.742800
0.752958,0.638200,0.763700
0.752562,0.604400,0.761100
0.754322,0.597300,0.752700
0.747747,0.581500,0.748100
0

### Experiment 3: Transfer Learning Trained with 60% Attacked Images

In [7]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked3 = vgg.__dict__["vgg16"]()
model_attacked3 = nn.DataParallel(model_attacked3)
model_attacked3.load_state_dict(torch.load("transferFromClean_clean.pt"))
model_attacked3.to(device)
opt = optim.SGD(model_attacked3.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked3, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked3, attack, opt, 0.6)
    test_err, test_loss = epoch(test_loader, model_attacked3)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked3, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked3.state_dict(), "transferFromClean_attacked3.pt")

0.892290,0.875800,0.893800
0.854829,0.797300,0.839000
0.839489,0.753400,0.820800
0.820392,0.720500,0.813000
0.808822,0.717800,0.798600
0.799100,0.706000,0.804300
0.797902,0.695300,0.788400
0.791163,0.699400,0.783800
0.787993,0.689600,0.787100
0.785604,0.676100,0.782000
0.780301,0.680500,0.772100
0.776096,0.665600,0.769900
0.773017,0.659400,0.766700
0.773421,0.645700,0.770900
0.768944,0.647700,0.766200
0.767421,0.646000,0.759900
0.764032,0.635000,0.762400
0.766336,0.636500,0.773400
0.764441,0.621200,0.761200
0.756391,0.603900,0.750500
0.756815,0.614100,0.764600
0.754211,0.594800,0.744700
0.750000,0.608600,0.750800
0.747258,0.586000,0.742300
0.745036,0.594300,0.744100
0.743973,0.593500,0.748100
0.742785,0.585500,0.753200
0.742345,0.565500,0.738900
0.737357,0.583000,0.737200
0.735449,0.578900,0.741500
0.732867,0.586500,0.748600
0.730693,0.574200,0.737400
0.727752,0.581800,0.736100
0.724118,0.585600,0.731000
0.722852,0.571400,0.750100
0.723285,0.564400,0.730900
0.721126,0.550500,0.741900
0

### Experiment 4: Transfer Learning Trained with 80% Attacked Images

In [8]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked4 = vgg.__dict__["vgg16"]()
model_attacked4 = nn.DataParallel(model_attacked4)
model_attacked4.load_state_dict(torch.load("transferFromClean_clean.pt"))
model_attacked4.to(device)
opt = optim.SGD(model_attacked4.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked3, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked4, attack, opt, 0.8)
    test_err, test_loss = epoch(test_loader, model_attacked4)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked4, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked4.state_dict(), "transferFromClean_attacked4.pt")

0.868317,0.837400,0.869500
0.841003,0.772300,0.824600
0.821820,0.766000,0.822700
0.803672,0.706300,0.792100
0.797816,0.695100,0.778300
0.793341,0.702800,0.800100
0.787343,0.683400,0.785300
0.784373,0.676500,0.787000
0.779874,0.681500,0.782500
0.775311,0.651900,0.769700
0.772304,0.645500,0.762700
0.767333,0.649100,0.764800
0.768209,0.632600,0.758100
0.759877,0.620500,0.754400
0.759203,0.628400,0.756400
0.757017,0.615100,0.752700
0.755062,0.597000,0.748700
0.750811,0.616700,0.755900
0.747070,0.605200,0.749500
0.744049,0.592500,0.741800
0.743317,0.588900,0.748000
0.739724,0.576800,0.732200
0.734510,0.588400,0.734800
0.734355,0.571400,0.734600
0.730305,0.556800,0.735700
0.727322,0.557800,0.732800
0.726482,0.566500,0.734300
0.723309,0.544500,0.738500
0.719880,0.568300,0.733300
0.716985,0.556100,0.726700
0.716448,0.559300,0.738400
0.709194,0.548800,0.730600
0.708834,0.558300,0.731300
0.706724,0.557400,0.724000
0.702675,0.542000,0.744900
0.700304,0.542200,0.728800
0.698473,0.525000,0.727300
0

### Experiment 5: Transfer Learning Trained with 100% Attacked Images

In [9]:
torch.manual_seed(0)
np.random.seed(0)

model_attacked5 = vgg.__dict__["vgg16"]()
model_attacked5 = nn.DataParallel(model_attacked5)
model_attacked5.load_state_dict(torch.load("transferFromClean_clean.pt"))
model_attacked5.to(device)
opt = optim.SGD(model_attacked5.parameters(), lr=1e-1)
attack = LinfPGDAttack(
    predict=model_attacked5, 
    loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
    eps=0.05, nb_iter=10, eps_iter=0.01
)

for t in range(50):
    train_err, train_loss = epoch_adversarial(train_loader, model_attacked5, attack, opt, 1.0)
    test_err, test_loss = epoch(test_loader, model_attacked5)
    adv_err, adv_loss = epoch_adversarial(test_loader, model_attacked5, attack)
    print("%.6f,%.6f,%.6f" % (train_err, test_err, adv_err))

torch.save(model_attacked5.state_dict(), "stochastic_attacked5.pt")

0.875720,0.830300,0.860000
0.835620,0.752900,0.817500
0.814960,0.727200,0.804400
0.798880,0.695800,0.790000
0.792400,0.685100,0.776000
0.787680,0.688900,0.792300
0.781900,0.666500,0.777400
0.776440,0.660100,0.767400
0.772660,0.649100,0.768300
0.770180,0.643800,0.774200
0.764660,0.623300,0.750300
0.760100,0.623300,0.752000
0.756920,0.607300,0.746100
0.753160,0.596000,0.750100
0.748000,0.608900,0.745600
0.742800,0.600300,0.742500
0.743680,0.578400,0.737300
0.736600,0.589700,0.745200
0.733060,0.576800,0.735300
0.731740,0.578000,0.740800
0.727280,0.565700,0.741700
0.723300,0.546400,0.732300
0.721460,0.572400,0.730500
0.716460,0.545100,0.726400
0.714760,0.549600,0.730100
0.712820,0.544700,0.735000
0.707500,0.544100,0.731600
0.705540,0.530600,0.729400
0.701660,0.557400,0.735900
0.699120,0.534700,0.726600
0.693440,0.548400,0.733700
0.690660,0.540400,0.733200
0.687660,0.538800,0.727500
0.684440,0.551900,0.733100
0.681020,0.522200,0.726900
0.675860,0.512300,0.730300
0.673320,0.506100,0.731900
0