In [1]:
import pickle as pkl
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision
import torchvision.transforms as transforms
import sys
# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
eps = 0.1



In [2]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  

    def forward(self, x):
        x = self.fc1(x)
        #print("fc1:", x)
        x = self.relu(x)
        #print("relu:",x)
        x = self.fc2(x)
        #print("fc2:", x)
        return x



In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_dataset = torchvision.datasets.MNIST(root='./', train=True, transform=transforms.ToTensor(),download=True)

test_dataset = torchvision.datasets.MNIST(root='./', train=False, transform=transforms.ToTensor())

    # Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=1,shuffle=False)


In [7]:
def train(device, train_loader):
    model = NeuralNet(input_size, hidden_size, num_classes).to(device)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

    # Train the model
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                             .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        # Save the model checkpoint
        torch.save(model.state_dict(), 'model/model-{}.ckpt'.format(epoch))

    return model

model = train(device, train_loader)

Epoch [1/5], Step [100/600], Loss: 0.2830
Epoch [1/5], Step [200/600], Loss: 0.2568
Epoch [1/5], Step [300/600], Loss: 0.2234
Epoch [1/5], Step [400/600], Loss: 0.2253
Epoch [1/5], Step [500/600], Loss: 0.2290
Epoch [1/5], Step [600/600], Loss: 0.1826
Epoch [2/5], Step [100/600], Loss: 0.0532
Epoch [2/5], Step [200/600], Loss: 0.1856
Epoch [2/5], Step [300/600], Loss: 0.2002
Epoch [2/5], Step [400/600], Loss: 0.0796
Epoch [2/5], Step [500/600], Loss: 0.1680
Epoch [2/5], Step [600/600], Loss: 0.0860
Epoch [3/5], Step [100/600], Loss: 0.1022
Epoch [3/5], Step [200/600], Loss: 0.0439
Epoch [3/5], Step [300/600], Loss: 0.0458
Epoch [3/5], Step [400/600], Loss: 0.1025
Epoch [3/5], Step [500/600], Loss: 0.0505
Epoch [3/5], Step [600/600], Loss: 0.1071
Epoch [4/5], Step [100/600], Loss: 0.0625
Epoch [4/5], Step [200/600], Loss: 0.1229
Epoch [4/5], Step [300/600], Loss: 0.0241
Epoch [4/5], Step [400/600], Loss: 0.0532
Epoch [4/5], Step [500/600], Loss: 0.0230
Epoch [4/5], Step [600/600], Loss:

In [None]:
print(net.classifier[6].weight.grad)

In [13]:
criterion = nn.CrossEntropyLoss()

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.load_state_dict(torch.load("model/model-4.ckpt"))
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

correct = 0
adv_correct = 0
misclassified = 0
total = 0
noises = []
y_preds = []
y_preds_adv = []
for images, labels in test_loader:
    images = Variable(images.reshape(-1, 28*28).to(device), requires_grad=True)
    labels = Variable(labels.to(device))

    outputs = model(images)
    loss = criterion(outputs, labels)
    print(loss)
    #optimizer.zero_grad()
    loss.backward()
    test1_grad = model.fc1.weight.grad
    

    #Add perturbation
    grad = torch.sign(images.grad.data)
    imgs_adv = torch.clamp(images.data + eps * grad, 0, 1)
    model.zero_grad()

    adv_outputs = model(Variable(imgs_adv))
    loss = criterion(adv_outputs, labels)
    print(loss)
    #optimizer.zero_grad()
    loss.backward()
    test2_grad = model.fc1.weight.grad
    

    _, predicted = torch.max(outputs.data, 1)
    ttt, adv_preds = torch.max(adv_outputs.data, 1)
    #print(adv_outputs, "ttt:", ttt, "pred:", adv_preds, len(images), len(predicted))


    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    adv_correct += (adv_preds == labels).sum().item()
    misclassified += (predicted != adv_preds).sum().item()
    
    if (predicted == labels) and (predicted != adv_preds):
        break

    noises.extend((images - imgs_adv).data.numpy())
    y_preds.extend(predicted.data.numpy())
    y_preds_adv.extend(adv_preds.data.numpy())
    

print('Accuracy of the network w/o adversarial attack on the 10000 test images: {} %'.format(100 * correct / total))
print('Accuracy of the network with adversarial attack on the 10000 test images: {} %'.format(100 * adv_correct / total))
print('Number of misclassified examples (as compared to clean predictions): {}/{}'.format(misclassified, total))



tensor(8.1417e-05, grad_fn=<NllLossBackward>)
tensor(7.1390, grad_fn=<NllLossBackward>)
Accuracy of the network w/o adversarial attack on the 10000 test images: 100.0 %
Accuracy of the network with adversarial attack on the 10000 test images: 0.0 %
Number of misclassified examples (as compared to clean predictions): 1/1


In [7]:
test1_grad

tensor([[ 0.0000, -0.0089,  0.0000,  ..., -0.0089,  0.0000,  0.0000],
        [ 0.0000,  0.0033,  0.0000,  ...,  0.0033,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0025,  0.0000,  ...,  0.0025,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0015,  0.0000,  ...,  0.0015,  0.0000,  0.0000]])

In [8]:
test2_grad

tensor([[ 0.0000, -0.0089,  0.0000,  ..., -0.0089,  0.0000,  0.0000],
        [ 0.0000,  0.0033,  0.0000,  ...,  0.0033,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0025,  0.0000,  ...,  0.0025,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0015,  0.0000,  ...,  0.0015,  0.0000,  0.0000]])

In [9]:
outputs

tensor([[ -5.4141,  -7.0395,  -2.1516,  -0.0640, -12.8064,  -6.8077, -17.2413,
           9.7660,  -2.0573,  -1.4654]], grad_fn=<AddmmBackward>)

In [10]:
adv_outputs

tensor([[ -4.7388,  -7.3759,  -1.1161,   5.6317, -15.0777,   0.2700, -14.2866,
          -1.4578,   2.4740,  -0.8955]], grad_fn=<AddmmBackward>)

In [12]:
loss

tensor(7.1390, grad_fn=<NllLossBackward>)

In [None]:
with open("mnist_fgsm.pkl","wb") as f: 
        data_dict = {
                        "noises" : noises,
                        "y_preds" : y_preds,
                        "y_preds_adv" : y_preds_adv
                        }    
        pkl.dump(data_dict, f)