In [1]:
%load_ext autoreload
%autoreload 2


In [72]:
%pip install adversarial-robustness-toolbox

Note: you may need to restart the kernel to use updated packages.


In [2]:
import sys
sys.path.insert(1, "/home/oru2/project/project")

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
# import matplotlib.pyplot as plt
import attacks
from privacy_accountant import PrivacyAccountant
from tqdm import tqdm
from torchvision import datasets, transforms

In [4]:
use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 100

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('../mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('../mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
attack = attacks.PGD()


In [5]:
def train_model(model, train_loader, num_epochs):
    # TODO: implement this function that trains a given model on the MNIST dataset.
    # this is a general-purpose function for both standard training and adversarial training.
    # (toggle enable_defense parameter to switch between training schemes)
    model.train()
    # epsilons_clean = []
    lr = 1e-2
    losses = []
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    for epoch in tqdm(range(num_epochs)):
        for index, (images, labels) in enumerate(train_loader):
            logits = model(images)
            optimizer.zero_grad()

            loss = F.cross_entropy(logits, labels)
            loss.backward()

            optimizer.step()
            losses.append(loss.item())
                
                

        print(f'Epoch [{epoch}/{num_epochs}] Loss = {loss.item():.3f}')

In [6]:
from model import fcNet
fc_model = fcNet(784, 128, 10)
num_epochs = 20
fc_model


fcNet(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): RNNLinear(in_features=784, out_features=128, bias=True)
    (2): ReLU(inplace=True)
    (3): RNNLinear(in_features=128, out_features=128, bias=True)
    (4): ReLU(inplace=True)
    (5): RNNLinear(in_features=128, out_features=10, bias=True)
  )
)

In [7]:
train_model(fc_model, train_loader, num_epochs)

  5%|▌         | 1/20 [00:07<02:21,  7.45s/it]

Epoch [0/20] Loss = 0.100


 10%|█         | 2/20 [00:13<02:00,  6.67s/it]

Epoch [1/20] Loss = 0.083


 15%|█▌        | 3/20 [00:19<01:49,  6.44s/it]

Epoch [2/20] Loss = 0.175


 20%|██        | 4/20 [00:25<01:40,  6.29s/it]

Epoch [3/20] Loss = 0.078


 25%|██▌       | 5/20 [00:31<01:33,  6.24s/it]

Epoch [4/20] Loss = 0.021


 30%|███       | 6/20 [00:37<01:25,  6.13s/it]

Epoch [5/20] Loss = 0.016


 35%|███▌      | 7/20 [00:43<01:18,  6.02s/it]

Epoch [6/20] Loss = 0.038


 40%|████      | 8/20 [00:49<01:11,  6.00s/it]

Epoch [7/20] Loss = 0.011


 45%|████▌     | 9/20 [00:56<01:07,  6.14s/it]

Epoch [8/20] Loss = 0.014


 50%|█████     | 10/20 [01:02<01:01,  6.11s/it]

Epoch [9/20] Loss = 0.032


 55%|█████▌    | 11/20 [01:08<00:56,  6.23s/it]

Epoch [10/20] Loss = 0.007


 60%|██████    | 12/20 [01:15<00:50,  6.28s/it]

Epoch [11/20] Loss = 0.005


 65%|██████▌   | 13/20 [01:21<00:43,  6.27s/it]

Epoch [12/20] Loss = 0.003


 70%|███████   | 14/20 [01:27<00:37,  6.24s/it]

Epoch [13/20] Loss = 0.005


 75%|███████▌  | 15/20 [01:33<00:30,  6.14s/it]

Epoch [14/20] Loss = 0.010


 80%|████████  | 16/20 [01:39<00:24,  6.15s/it]

Epoch [15/20] Loss = 0.004


 85%|████████▌ | 17/20 [01:46<00:19,  6.37s/it]

Epoch [16/20] Loss = 0.002


 90%|█████████ | 18/20 [01:52<00:12,  6.37s/it]

Epoch [17/20] Loss = 0.000


 95%|█████████▌| 19/20 [01:58<00:06,  6.30s/it]

Epoch [18/20] Loss = 0.004


100%|██████████| 20/20 [02:05<00:00,  6.28s/it]

Epoch [19/20] Loss = 0.003





In [8]:
correct = 0
fc_model.eval()
for j, (images, labels) in enumerate(test_loader):
  images, labels = images, labels
  logits = fc_model(images)
  _, preds = torch.max(logits, 1)
  correct += (preds == labels).sum().item()
  # print('Batch [{}/{}]'.format(j+1, len(test_loader)))
fc_model.train()
print('Accuracy = {}%'.format(float(correct) * 100 / 10000))

Accuracy = 98.23%


In [9]:
correct = 0
eps = 0.1
fc_model.eval()
for j, (images, labels) in enumerate(test_loader):
  images, labels = images, labels
  adv_images = attack.pgd_untargeted(fc_model, images, labels, 20, eps, 0.01)  
  logits = fc_model(images)
  adv_logits = fc_model(adv_images)
  _, preds = torch.max(logits, 1)
  _, adv_preds = torch.max(adv_logits, 1)
  correct += (preds == labels).sum().item()
  correct += (adv_preds == labels).sum().item()
  # print('Batch [{}/{}]'.format(j+1, len(test_loader)))
fc_model.train()
print('Accuracy = {}%'.format(float(correct) * 100 / 20000))

Accuracy = 50.83%


In [10]:
from art.attacks.inference.membership_inference import MembershipInferenceBlackBox
from art.estimators.classification import PyTorchClassifier

In [11]:
optimizer = torch.optim.Adam(fc_model.parameters())
criterion = nn.CrossEntropyLoss()

# Wrap the PyTorch model in ART's PyTorchClassifier
art_classifier = PyTorchClassifier(
    model=fc_model,
    loss=criterion,
    optimizer=optimizer,
    input_shape=(28, 28),
    nb_classes=10
)
attack_train_size = 10000
attack_test_size = 5000

x_train = train_dataset.data
y_train = train_dataset.targets


x_test = test_dataset.data
y_test = test_dataset.targets

attack = MembershipInferenceBlackBox(estimator=art_classifier, attack_model_type="nn")
attack.fit(x_train[:attack_train_size], y_train[:attack_train_size], x_test[:attack_test_size], y_test[:attack_test_size])

mlp_inferred_train_bb = attack.infer(x_train[attack_train_size:], y_train[attack_train_size:])
mlp_inferred_test_bb = attack.infer(x_test[attack_test_size:], y_test[attack_test_size:])

# check accuracy
mlp_train_acc_bb = np.sum(mlp_inferred_train_bb) / len(mlp_inferred_train_bb)
mlp_test_acc_bb = 1 - (np.sum(mlp_inferred_test_bb) / len(mlp_inferred_test_bb))
mlp_acc_bb = (mlp_train_acc_bb * len(mlp_inferred_train_bb) + mlp_test_acc_bb * len(mlp_inferred_test_bb)) / (len(mlp_inferred_train_bb) + len(mlp_inferred_test_bb))


In [12]:
print(f"Members Accuracy: {mlp_train_acc_bb:.4f}")
print(f"Non Members Accuracy {mlp_test_acc_bb:.4f}")
print(f"Attack Accuracy {mlp_acc_bb:.4f}")

Members Accuracy: 0.8244
Non Members Accuracy 0.1194
Attack Accuracy 0.7603
