In [1]:
import numpy as np
import torch
import adversary.cw as cw
from adversary.jsma import SaliencyMapMethod
from adversary.fgsm import Attack
import torchvision
import torch.nn.functional as F
import torch.utils.data as Data
from models.mnist_model import MnistModel, MLP
from torchvision import transforms
import mnist_undercover_train

%reload_ext autoreload
%autoreload 2

In [7]:
train_eps = 0.15
classifier_train_epochs = 30
train_classifier = True

In [8]:
MNIST_UNDERCOVER_CKPT = './checkpoint/mnist_undercover.pth'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [9]:
if train_classifier == True:
    mnist_undercover_train.train(classifier_train_epochs, train_eps, device)

==> Preparing data..
==> Building model..
epoch: 0, train loss: 777.35, train acc: 0.5285
epoch: 1, train loss: 345.98, train acc: 0.8427
epoch: 2, train loss: 246.70, train acc: 0.9024
epoch: 3, train loss: 192.03, train acc: 0.9334
epoch: 4, train loss: 159.94, train acc: 0.9478
epoch: 5, train loss: 138.84, train acc: 0.9566
epoch: 6, train loss: 122.79, train acc: 0.9632
epoch: 7, train loss: 111.13, train acc: 0.9675
epoch: 8, train loss: 102.59, train acc: 0.9703
epoch: 9, train loss: 95.31, train acc: 0.9732
epoch: 10, train loss: 89.57, train acc: 0.9754
epoch: 11, train loss: 84.00, train acc: 0.9776
epoch: 12, train loss: 80.24, train acc: 0.9786
epoch: 13, train loss: 76.82, train acc: 0.9801
epoch: 14, train loss: 73.96, train acc: 0.9813
epoch: 15, train loss: 71.10, train acc: 0.9826
epoch: 16, train loss: 68.54, train acc: 0.9832
epoch: 17, train loss: 65.89, train acc: 0.9842
epoch: 18, train loss: 63.39, train acc: 0.9851
epoch: 19, train loss: 62.17, train acc: 0.9854

In [10]:
transform_test = transforms.Compose([
    transforms.ToTensor(),
])

mlp = MLP().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(mlp.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)


undercoverNet = MnistModel().to(device)
checkpoint = torch.load(MNIST_UNDERCOVER_CKPT, map_location=torch.device(device))
undercoverNet.load_state_dict(checkpoint['net'])

<All keys matched successfully>

In [11]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_test)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=512, shuffle=True, num_workers=4)
trainiter = iter(trainloader)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=512, shuffle=False, num_workers=4)
testiter = iter(testloader)

# Use BIM attack

In [31]:
eval_eps = 0.08

In [32]:
undercover_gradient_attacker = Attack(undercoverNet, F.cross_entropy)

In [28]:
# construct bim adversarial samples
# --------------------train---------------------
normal_samples, adversarial_samples = [], []
for x, y in trainloader:
    x, y = x.to(device), y.to(device)
    y_pred = undercoverNet(x).argmax(dim=1)
    
    #eps = 0.3
    x_adv = undercover_gradient_attacker.i_fgsm(x, y, eps=eval_eps, alpha=1/255, iteration=int(min(eval_eps*255 + 4, 1.25*eval_eps*255)))
    y_pred_adv = undercoverNet(x_adv).argmax(dim=1)
    selected = (y == y_pred) & (y != y_pred_adv)
    normal_samples.append(x[selected].detach().cpu())
    adversarial_samples.append(x_adv[selected].detach().cpu())
#     break

normal_x = torch.cat(normal_samples, dim=0)
adversarial_x = torch.cat(adversarial_samples, dim=0)
normal_y = torch.zeros(normal_x.shape[0]).long()
adversarial_y = torch.ones(adversarial_x.shape[0]).long()

dba_trainloader = Data.DataLoader(Data.TensorDataset(torch.cat([normal_x, adversarial_x], dim=0),
                                           torch.cat([normal_y, adversarial_y], dim=0)), 
                                  batch_size=512, shuffle=True, num_workers=4)
dba_trainiter = iter(dba_trainloader)

# ----------------test---------------------
normal_samples, adversarial_samples = [], []
for x, y in testloader:
    x, y = x.to(device), y.to(device)
    y_pred = undercoverNet(x).argmax(dim=1)
    
    #eps = 0.3
    x_adv = undercover_gradient_attacker.i_fgsm(x, y, eps=eval_eps, alpha=1/255, iteration=int(min(eval_eps*255 + 4, 1.25*eval_eps*255)))
    y_pred_adv = undercoverNet(x_adv).argmax(dim=1)
    selected = (y == y_pred) & (y != y_pred_adv)
    normal_samples.append(x[selected].detach().cpu())
    adversarial_samples.append(x_adv[selected].detach().cpu())
#     break

normal_x = torch.cat(normal_samples, dim=0)
adversarial_x = torch.cat(adversarial_samples, dim=0)
normal_y = torch.zeros(normal_x.shape[0]).long()
adversarial_y = torch.ones(adversarial_x.shape[0]).long()

dba_testloader = Data.DataLoader(Data.TensorDataset(torch.cat([normal_x, adversarial_x], dim=0),
                                           torch.cat([normal_y, adversarial_y], dim=0)), 
                                  batch_size=1024, shuffle=True, num_workers=4)
dba_testiter = iter(dba_testloader)

In [29]:
# train the mlp
epochs = 10
for i in range(epochs):
    train_loss = 0
    correct, total = 0, 0
    for x, y in dba_trainloader:
        optimizer.zero_grad()
        x, y = x.to(device), y.to(device)
        _, V1 = undercoverNet(x, dba=True)
        undercover_adv = undercover_gradient_attacker.fgsm(x, y, False, eval_eps)#1/255)
        _, V2 = undercoverNet(undercover_adv, dba=True)
        V = torch.cat([V1, V2, V1 - V2, V1 * V2], axis=-1)
        y_pred = mlp(V)
        total += y.size(0)
        correct += y_pred.argmax(dim=1).eq(y).sum().item()
        
        loss = criterion(y_pred, y)
        train_loss += loss.item()
        #print('epoch: %d, train loss: %.2f, train acc: %.4f' % (epoch, train_loss, acc))
        loss.backward()
        optimizer.step()
    acc = 1.0 * correct / total
    print('epoch: %d, train loss: %.2f, train acc: %.4f' % (i, train_loss, acc))
    #print('epoch: %d, train loss: %.2f' % (i, train_loss))

epoch: 0, train loss: 30.56, train acc: 0.5416
epoch: 1, train loss: 8.94, train acc: 0.5561
epoch: 2, train loss: 5.02, train acc: 0.5615
epoch: 3, train loss: 4.41, train acc: 0.5202
epoch: 4, train loss: 4.08, train acc: 0.5867
epoch: 5, train loss: 4.10, train acc: 0.5680
epoch: 6, train loss: 4.09, train acc: 0.5623
epoch: 7, train loss: 4.08, train acc: 0.6050
epoch: 8, train loss: 4.05, train acc: 0.6402
epoch: 9, train loss: 4.02, train acc: 0.6345


In [30]:
# test
total, correct = 0, 0
FP, TP, FN, TN = 0, 0, 0, 0 
for x, y in dba_testloader:
    x, y = x.to(device), y.to(device)
    _, V1 = undercoverNet(x, dba=True)
    undercover_adv = undercover_gradient_attacker.fgsm(x, y, False, eval_eps)
    _, V2 = undercoverNet(undercover_adv, dba=True)
    V = torch.cat([V1, V2, V1 - V2, V1 * V2], axis=-1)
    y_pred = mlp(V).argmax(dim=1)
    
    total += y.size(0)
    #print(y_pred)
    confusion_vector = y_pred / y
    TP += torch.sum(confusion_vector == 1).item()
    FP += torch.sum(confusion_vector == float('inf')).item()
    TN += torch.sum(torch.isnan(confusion_vector)).item()
    FN += torch.sum(confusion_vector == 0).item()
    correct += y_pred.eq(y).sum().item()
    #break
print('TP=', TP, 'FP=', FP, 'TN=', TN, 'FN=', FN)
#print(correct / total)
Prec = TP/(TP+FP)
Recall =  TP/(TP+FN)
print('Precision=', Prec)
print('Recall=', Recall)
print('F1=', 2*Recall*Prec/(Recall + Prec))
print('Accuracy=', (TP+TN)/(TP+TN+FP+FN))

TP= 106 FP= 43 TN= 204 FN= 141
Precision= 0.7114093959731543
Recall= 0.4291497975708502
F1= 0.5353535353535354
Accuracy= 0.6275303643724697
