In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms, utils
from sklearn.decomposition import PCA
import pickle
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as LA
import argparse
from numpy import ma
import scipy
import random
import time
import os.path
from sklearn.linear_model import LogisticRegression
import pandas as pd
from skimage.measure import compare_ssim as ssim
from skimage import feature

from network.cnn_net import cnn_net
from network.ann import ann_net
from network.adv_cnn_net import adv_cnn_net
from network.adv_ann import adv_ann_net
import utils.cw_final as cw_final
import utils.cw as cw


def train_binary(args, model, device, train_loader, optimizer, epoch, target_class):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        size = target.numpy().shape[0]
        
        ## canny edge detection stub
        #print(data.size())
        for i in range(size):
            data[i] = torch.Tensor(feature.canny(data[i].reshape(28, 28).numpy(), sigma=1.8).astype(float))
        
        data = data.reshape(-1, 784) # Remove this line while training cnn
        data, target = data.to(device), target.to(device)
        
        #print(size)
        index_lst = []
        for i in range(size):
            if target[i] != target_class:
                #print(target[i])
                target[i] = 1
                #print(target[i])
            else:
                target[i] = 0
                index_lst.append(i)
        
        # data repeatation
        repeat_data = torch.zeros(len(index_lst)*13, 784)        
        repeat_target = torch.zeros(len(index_lst)*13).type(torch.LongTensor)
        ind = 0
        for i in index_lst:
            for j in range(13):
                repeat_data[ind] = data[i]
                ind += 1
        #print(repeat_target.size())
        #print(target.size())
        data = torch.cat((data, repeat_data), 0)
        target = torch.cat((target, repeat_target), 0)
        
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))

    
def test_binary(args, model, device, test_loader, target_class):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            
            #canny edge detection stub
            for i in range(data.size(0)):
                data[i] = torch.Tensor(feature.canny(data[i].reshape(28, 28).numpy(), sigma=1.8).astype(float))
                if target[i].data != target_class:
                    target[i] = torch.Tensor([1]).type(torch.LongTensor)#.to(torch.LongTensor)
                else:
                    target[i] = torch.Tensor([0]).type(torch.LongTensor)
            
            data = data.reshape(data.size(0), 784) # Remove this line while training cnn
                
            data, target = data.to(device), target.to(device)
            output = model(data)
            #print(output)
            #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1)#, keepdim=True)  # get the index of the max log-probability            
            #print(pred[0])
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        #data = data.reshape(-1, 784) # Remove this line while training cnn
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            #data = data.reshape(-1, 784) # Remove this line while training cnn
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def calc_confusion(data, target, adv_imgs, classifier, detectors):
    true_pos = 0
    false_pos = 0
    true_neg = 0
    false_neg = 0

    for i in range(len(adv_imgs)):
        adv_im = adv_imgs[i]
        
        # canny edge sdetection stub
        
        #sigma = 1.8 gives best results
        
        output = classifier(torch.Tensor(adv_im).reshape(1, 1, 28, 28))
        #print(output)
        pred = output.argmax(dim=1, keepdim=True)
        #print(pred)
        adv_edge = feature.canny(adv_im, sigma=1.8).astype(float)
        det_out = detectors[pred](torch.Tensor(adv_edge).reshape(1, 784))
        prediction = det_out.argmax(dim=1, keepdim=True)
        if prediction == 0:
            false_neg += 1
        else:
            true_pos += 1

    output = classifier(data)
    pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    for i in range(args.test_batch_size):
        prediction = np.asscalar(pred[i].numpy())

        if (prediction != np.asscalar(target[i].numpy())): #ignoring natural errors
            continue
            
        edge_data = feature.canny(data[i].reshape(28, 28).numpy(), sigma=1.8).astype(float)
        detectors[prediction].eval()
        output = detectors[prediction](torch.Tensor(edge_data).reshape(1, 784))
        prediction = output.argmax(dim=1, keepdim=True)

        if prediction == 0:
            true_neg += 1
        else:
            false_pos += 1
    
    conf_mat = [true_neg, false_neg, false_pos, true_pos]
    
    return conf_mat 


def get_data(data_loader, targeted):
    data, target = next(iter(data_loader))
    
    if targeted:
        for i in range(target.size(0)):
            rand_target = random.randint(0, 9)
            while rand_target == target[i].item():
                rand_target = random.randint(0, 9)
            target[i] = rand_target    
    
    return data, target

In [6]:
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing')
parser.add_argument('--epochs', type=int, default=50, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=True,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
                    help='For Saving the current Model')
args = parser.parse_args([])

assert (10000 % args.test_batch_size) == 0 #necessary for correct calculation of confusion matrix
use_cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)

device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data/mnist_digit', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.Normalize((0.1307,), (0.3081,))
                       transforms.Normalize((0,), (1,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data/mnist_digit', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        #transforms.Normalize((0.1307,), (0.3081,))
        transforms.Normalize((0,), (1,))
    ])),
    batch_size=args.test_batch_size, shuffle=False, **kwargs)

adv_detectors = []
detectors = []
for i in range(10):
    detectors.append(ann_net().to(device))
    adv_detectors.append(adv_ann_net().to(device))
    detectors[i].load_state_dict(torch.load("models/mnist_ann_canny/"\
                                            +"mnist_binary/mnist_ann_canny_binary_sigma_1.8_"\
                                            + str(i) + ".pth"))
    adv_detectors[i].load_state_dict(torch.load("models/mnist_ann_canny/"\
                                                +"mnist_binary/mnist_ann_canny_binary_sigma_1.8_"\
                                                + str(i) + ".pth"))
    detectors[i].to(device)
    adv_detectors[i].to(device)
    detectors[i].eval()
    adv_detectors[i].eval()

INPUT_BOX = (0.0, 1.0)
OPT_LR = 0.01
SEARCH_STEPS = 9

model = adv_cnn_net().to(device)
model.load_state_dict(torch.load("mnist_cnn.pt"))
model.eval()

classifier = cnn_net().to(device)
classifier.load_state_dict(torch.load("mnist_cnn.pt"))
classifier.eval()

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model = nn.DataParallel(model)


In [None]:
# Clean Mddel Accuracy
model = cnn_net()#.to(device)
#model.load_state_dict(torch.load("mnist_cnn.pt"))
#model.eval()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
for epoch in range(1, args.epochs + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    test(args, model, device, test_loader)
    
torch.save(model.state_dict(), "mnist_cnn.pt")

print("Final Model Accuracy:")
test(args, model, device, test_loader)


Final Model Accuracy:

Test set: Average loss: 0.0359, Accuracy: 9878/10000 (99%)

In [None]:
# Binary Classifiers training and accuracy

model = ann_net().to(device)
optimizer = None
success = 0
for m in range(10):
    # for training of the cnn-network
    #if (args.save_model):
    
    print("class: ", m)
    model.state_dict(torch.load("models/mnist_ann_canny/mnist_ann_canny_binary_sigma_1.8_"+ str(m) + ".pt"))
    model.train()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    for epoch in range(1, args.epochs + 1):
        train_binary(args, model, device, train_loader, optimizer, epoch, m)
        test_binary(args, model, device, test_loader, m)
        
    print("Accruacy for binary Classifier " + str(m), test_binary(args, model, device, test_loader, m))
    torch.save(model.state_dict(), "models/mnist_ann_canny/mnist_ann_canny_binary_sigma_1.8_"+ str(m) + ".pt")

Accruacy for binary Classifier 0 98
Accruacy for binary Classifier 1 100
Accruacy for binary Classifier 2 97
Accruacy for binary Classifier 3 97
Accruacy for binary Classifier 4 97
Accruacy for binary Classifier 5 96
Accruacy for binary Classifier 6 98
Accruacy for binary Classifier 7 98
Accruacy for binary Classifier 8 95
Accruacy for binary Classifier 9 96

In [None]:
# UD case: Untargeted
#       1) create adv imgs for trained model without defence for different confidence values
#       2) Mix with equal number of natural imgs
#       3) Calculate accuracy with defence

conf_list = [0, 10, 20]
data, target = get_data(test_loader, False)
data, target = data.to(device), target.to(device)

for conf in conf_list:    
    adversary = cw.L2Adversary(targeted=False,
                                   confidence=conf,
                                   search_steps=SEARCH_STEPS,
                                   box=INPUT_BOX,
                                   optimizer_lr=OPT_LR)
    adv_imgs = []
    adv, l2 = adversary(model, data, target, to_numpy=False)
    for i in range(args.test_batch_size):
        if l2[i] != np.inf:    #check if attack was successful
            adv_imgs.append(adv[i].reshape(28,28).numpy())
       
    with open("adv_imgs/UD_untargeted_mnist_adv_imgs_cw_conf_"+str(conf)+".pkl", "wb") as fp:
        pickle.dump(adv_imgs, fp)            
    
    print("Confidence: ", conf)
    print("Attack Success Rate: ", len(adv_imgs) / 1000)
    print("AVG l2: ", np.mean(l2))
    
    #conf_mat => [true_neg, false_neg, false_pos, true_pos]
    conf_mat = calc_confusion(data, target, adv_imgs, classifier, detectors)        
    
    print("True Positive: ", conf_mat[3])
    print("False Positive: ", conf_mat[2])
    print("True Negative: ", conf_mat[0])
    print("False Negative: ", conf_mat[1])

In [None]:
#UD Case: Targeted Attack
            
conf_lst = [0, 10]
data, target = get_data(test_loader, True)
data, target = data.to(device), target.to(device)

for conf in conf_lst:    
    
    adversary = cw.L2Adversary(targeted=True,
                            confidence=conf,
                            search_steps=SEARCH_STEPS,
                            box=INPUT_BOX,
                            optimizer_lr=OPT_LR)

    adv_imgs = []
    
    adv, l2 = adversary(model, data, target, to_numpy=False)
    for i in range(args.test_batch_size):
        if l2[i] != np.inf:    #check if attack was successful
            adv_imgs.append(adv[i].reshape(28,28).numpy())
    with open("adv_imgs/UD_targeted_mnist_adv_imgs_cw_conf_"+str(conf)+".pkl", "wb") as fp:   #Pickling
        pickle.dump(adv_imgs, fp)            
    
    print("Confidence: ", conf)
    print("Attack Success Rate: ", len(adv_imgs) / 1000)
    print("AVG l2: ", np.mean(l2))
        
    #conf_mat => [true_neg, false_neg, false_pos, true_pos]
    conf_mat = calc_confusion(data, target, adv_imgs, classifier, detectors)        

    print("True Positive: ", conf_mat[3])
    print("False Positive: ", conf_mat[2])
    print("True Negative: ", conf_mat[0])
    print("False Negative: ", conf_mat[1])


In [None]:
#KD Targeted
            
conf_list = [0, 10, 20]
data, target = get_data(test_loader, True)
data, target = data.to(device), target.to(device)

for conf in conf_list:
    adversary = cw_final.L2Adversary(targeted=True,
                                   confidence=conf,
                                   search_steps=SEARCH_STEPS,
                                   box=INPUT_BOX,
                                   optimizer_lr=OPT_LR)

    adv_imgs = []
    l2_norms = []
    success = 0

    adv, batch_l2_norm = adversary(model, adv_detectors, data, target, to_numpy=False)

    for i in range(args.test_batch_size):
        if batch_l2_norm[i] != np.inf: 
            success += 1
            adv_imgs.append(adv[i].reshape(28,28).numpy())
            l2_norms.append(batch_l2_norm[i])        
                            
print("success rate: ", success/(1000))
print("mean l2 norm:", np.mean(l2_norms))

with open("adv_imgs/cw_mnist_adv_imgs_conf_"+str(conf)+"_test_loader_KD_targeted_sigma_1.8_"+\
          str(target_class)+".pkl", "wb") as fp:   #Pickling
    pickle.dump(adv_imgs, fp)
with open("adv_imgs/cw_mnist_l2_norms_conf_"+str(conf)+"_test_loader_KD_targeted_sigma_1.8_"+\
          str(target_class)+".pkl", "wb") as fp:   #Pickling
    pickle.dump(adv_imgs, fp)        

In [None]:
# KD Case:
# untargeted attack: success rate, avg l2 norms

conf_list = [0,10, 20]
data, target = get_data(test_loader, False)
data, target = data.to(device), target.to(device)

for conf in conf_list: 
    
    adversary = cw_final.L2Adversary(targeted=False,
                                     confidence=conf,
                                     search_steps=SEARCH_STEP,
                                     box=INPUT_BOX,
                                     optimizer_lr=OPT_LR)

    adv_imgs = []
    l2_norms = []
    success = 0

    adv, batch_l2_norm = adversary(model, adv_detectors, data, target, to_numpy=False)

    for i in range(args.test_batch_size):
        if batch_l2_norm[i] != np.inf:
            success += 1
            adv_imgs.append(adv[i].reshape(28,28).numpy())
            l2_norms.append(batch_l2_norm[i].numpy())


    print("Success Rate: ", success/1000)
    print("Avg l2 Norm: ", np.mean(l2_norms))
    
    with open("adv_imgs/cw_KD_untargeted_mnist_adv_imgs_conf_"+str(conf)+"_test_loader_canny_sigma_1.8.pkl", "wb") as fp:   #Pickling
            pickle.dump(adv_imgs, fp)
    with open("adv_imgs/cw_KD_untargeted_mnist_l2_norms_conf_"+str(conf)+"_test_loader_canny_sigma_1.8.pkl", "wb") as fp:   #Pickling
            pickle.dump(adv_imgs, fp)        
