# Imports and Setup

In [2]:
from __future__ import print_function
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
from scipy import ndimage
from IPython.display import HTML
import copy
import random
import time
import pickle

torch.set_printoptions(precision=3, sci_mode=True)
cuda = True if torch.cuda.is_available() else False

In [3]:
batch_size = 8
targetclass = 11

In [4]:
def normalize(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.detach().numpy()
    trans = np.transpose(npimg, (1,2,0))
    return np.squeeze(trans)

In [5]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Data Entry and Processing

In [6]:
# Transform image to tensor and normalize features from [0,255] to [0,1]
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5,),(0.5,),(0.5)),
                                ])

In [7]:
# Using CIFAR100
traindata = datasets.CIFAR100('/data', download=True, train=True, transform=transform)
testdata = datasets.CIFAR100('/data', download=True, train=False, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
trainloader = torch.utils.data.DataLoader(traindata, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testdata, batch_size=batch_size, shuffle=True)

In [56]:
# Create train loaders containing the sensitive data class
# and the non-sensitive data
target_index = []
nontarget_index = []
for i in range(0, len(in_data)):
  if in_data[i][1] == targetclass:
    target_index.append(i)
  else:
    nontarget_index.append(i)
# target_train_loader is a dataloader for the sensitive data that
# we are targeting for removal
target_train_loader = torch.utils.data.DataLoader(in_data, batch_size=64,
              sampler = torch.utils.data.SubsetRandomSampler(target_index))
# nontarget_train_loader contains all other data
nontarget_train_loader = torch.utils.data.DataLoader(in_data, batch_size=64,
              sampler = torch.utils.data.SubsetRandomSampler(nontarget_index))

In [68]:
# For the unlearning data removal method, we randomly
# relabel all sensitive examples
unlearningdata = copy.deepcopy(traindata)
unlearninglabels = list(range(100))
unlearninglabels.remove(targetclass)
for data in unlearningdata:
  if unlearningdata.targets == targetclass:
    unlearningdata.targets = random.choice(unlearninglabels)
unlearning_train_loader = torch.utils.data.DataLoader(unlearningdata, batch_size=64, shuffle=True)

# Target Model

In [10]:
# Hyperparameters
torch.backends.cudnn.enabled = True
criterion = nn.CrossEntropyLoss()

In [11]:
# Training method
def train(model, optimizer, epoch, loader, printable=True):
  model.train()
  batches = []
  steps = []
  for batch_idx, (data, target) in enumerate(loader):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0 and printable:
      print("Epoch: {} [{:6d}]\tLoss: {:.6f}".format(
          epoch, batch_idx*len(data),  loss.item()
      ))
  return batches, steps

In [74]:
# Training method that returns recall and miss rates during training
def train2(model, optimizer, epoch, loader, printable=True):
  model.train()
  recall = []
  missrate = []
  for batch_idx, (data, target) in enumerate(loader):
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
  r, m = testtargetmodel()
  recall.append(r)
  missrate.append(m)
  return recall, missrate

In [12]:
# Training method that keeps a list of parameter updates from
# batches containing sensitive data for amnesiac unlearning
def selectivetrain(model, optimizer, epoch, loader, returnable=False):
  model.train()
  delta = {}
  for param_tensor in model.state_dict():
        if "weight" in param_tensor or "bias" in param_tensor:
            delta[param_tensor] = 0
  for batch_idx, (data, target) in enumerate(loader):
    if targetclass in target:
      before = {}
      for param_tensor in model.state_dict():
        if "weight" in param_tensor or "bias" in param_tensor:
          before[param_tensor] = model.state_dict()[param_tensor].clone()
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    if targetclass in target:
      after = {}
      for param_tensor in model.state_dict():
        if "weight" in param_tensor or "bias" in param_tensor:
          after[param_tensor] = model.state_dict()[param_tensor].clone()
      for key in before:
        delta[key] = delta[key] + after[key] - before[key]
    if batch_idx % log_interval == 0:
      print("\rEpoch: {} [{:6d}]\tLoss: {:.6f}".format(
          epoch, batch_idx*len(data),  loss.item()
      ), end="")
  if returnable:
    return delta

In [13]:
# Testing method
def test(model, loader, dname="Test set", printable=True):
  model.eval()
  test_loss = 0
  total = 0
  correct = 0
  with torch.no_grad():
    for data, target in loader:
      output = model(data)
      total += target.size()[0]
      test_loss += criterion(output, target).item()
      _, pred = torch.topk(output, 10, dim=1, largest=True, sorted=True)
      for i, t in enumerate(target):
        if t in pred[i]:
            correct += 1
  test_loss /= len(loader.dataset)
  if printable:
    print('{}: Mean loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        dname, test_loss, correct, total, 
        100. * correct / total
        ))
  return 1. * correct / total

In [14]:
def target_model_fn():
    # load resnet 18 and change to fit problem dimensionality
    resnet = models.resnet18()
    resnet.conv1 = nn.Conv2d(3, 64, kernel_size=(7,7), stride=(2,2), padding=(3,3), bias=False)
    resnet.fc = nn.Sequential(nn.Linear(512, 100))
    optimizer = optim.Adam(resnet.parameters())
    return resnet, optimizer

In [15]:
# FCNN attack model for membership inference attack
class AttackModel(nn.Module):
  def __init__(self):
    super(AttackModel, self).__init__()
    self.fc1 = nn.Linear(100, 256)
    self.fc2 = nn.Linear(256, 128)
    self.fc3 = nn.Linear(128, 1)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = F.relu(self.fc2(x))
    x = F.dropout(x, training=self.training)
    x = self.fc3(x)
    return torch.sigmoid(x)

In [16]:
# Function to generate attack models
def attack_model_fn():
    
  model = AttackModel()
  optimizer = optim.Adam(model.parameters())
  return model, optimizer

In [17]:
# Training method for attack model
def trainattacker(model, optimizer, epoch, loader, printable=True):
  model.train()
  batches = []
  steps = []
  for batch_idx, (data, target) in enumerate(loader):
    optimizer.zero_grad()
    output = model(data)
    output = torch.flatten(output)
    loss = F.binary_cross_entropy(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % log_interval == 0 and printable:
      print("\rEpoch: {} [{:6d}]\tLoss: {:.6f}".format(
          epoch, batch_idx*len(data),  loss.item()/len(loader.dataset)
      ), end="")
  return batches, steps

In [18]:
# Testing method for attack model
def testattacker(model, loader, dname="Test set", printable=True):
  model.eval()
  test_loss = 0
  total = 0
  correct = 0
  with torch.no_grad():
    for data, target in loader:
      output = model(data)
      output = torch.flatten(output)
      total += target.size()[0]
      test_loss += F.binary_cross_entropy(output, target).item()
      pred = torch.round(output)
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(loader.dataset)
  if printable:
    print('{}: Mean loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        dname, test_loss, correct, total, 
        100. * correct / total
        ))
  return 1. * correct / total

In [19]:
# Testing method for attack that returns full confusion matrix
def fulltestattacker(model, loader, dname="Test set", printable=True):
  model.eval()
  test_loss = 0
  total = 0
  correct = 0
  with torch.no_grad():
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for data, target in loader:
      output = model(data)
      output = torch.flatten(output)
      pred = torch.round(output)
#       correct += pred.eq(target.data.view_as(pred)).sum()
      for i in range(len(pred)):
        if pred[i] == target[i] == 1:
            tp += 1
        if pred[i] == target[i] == 0:
            tn += 1
        if pred[i] == 1 and target[i] == 0:
            fp += 1
        if pred[i] == 0 and target[i] == 1:
            fn += 1
  return tp, tn, fp, fn

# Training Shadow Models

In [None]:
num_shadow_models = 20
shadow_training_epochs = 10
log_interval = 64

In [None]:
# Create shadow models
shadow_models = []
for _ in range(num_shadow_models):
  shadow_models.append(target_model_fn())

In [None]:
# Create shadow datasets. Each must have an "in" and "out" set for attack model
# dataset generation ([in, out]). Each shadow model is trained only on the "in"
# data.
shadow_datasets = []
for i in range(num_shadow_models):
  shadow_datasets.append(torch.utils.data.random_split(traindata, [int(len(traindata)/2), int(len(traindata)/2)]))

In [None]:
# Pytorch can save any serialized object, which is very
# helpful in this instance
path = f"infattack/resnet_datasets.pt"
torch.save(shadow_datasets, path)

In [None]:
# We need to train each shadow model on the in_data for that model
for i, shadow_model_set in enumerate(shadow_models):
  starttime = time.process_time()
  shadow_model = shadow_model_set[0]
  shadow_optim = shadow_model_set[1]
  in_loader = torch.utils.data.DataLoader(shadow_datasets[i][0], batch_size=batch_size, shuffle=True)
  print(f"Training shadow model {i}")
  for epoch in range(1, shadow_training_epochs+1):
    print(f"\r\tEpoch {epoch}  "  , end="")
    train(shadow_model, shadow_optim, epoch, in_loader, printable=False)
    if epoch == shadow_training_epochs:
      test(shadow_model, testloader, dname="All data", printable=True)
  path = F"infattack/resnet-shadow_model_{i}.pt"
  torch.save({
            'model_state_dict': shadow_model.state_dict(),
            }, path)
  print(f"\tTime taken: {time.process_time() - starttime}")

Training shadow model 0
	Epoch 10  All data: Mean loss: 0.4166, Accuracy: 7452/10000 (75%)
	Time taken: 9696.398878836999
Training shadow model 1
	Epoch 10  All data: Mean loss: 0.4665, Accuracy: 7376/10000 (74%)
	Time taken: 9699.79302728
Training shadow model 2
	Epoch 10  All data: Mean loss: 0.4835, Accuracy: 7236/10000 (72%)
	Time taken: 9710.195670124001
Training shadow model 3
	Epoch 10  All data: Mean loss: 0.3711, Accuracy: 7435/10000 (74%)
	Time taken: 9711.715010443
Training shadow model 4
	Epoch 6  

# Generating Attack Training Sets

In [None]:
# Create 100 attack model training sets, one for each class
# These will be used to train 100 attack models, as per Shokri et al.

sm = nn.Softmax()
for c in range(100):
  starttime = time.process_time()
  attack_x = []
  attack_y = []
  # Generate attack training set for current class
  for i, shadow_model_set in enumerate(shadow_models):
    print(f"\rGenerating class {c} set from model {i}", end="")
    shadow_model = shadow_model_set[0]
    in_loader = torch.utils.data.DataLoader(shadow_datasets[i][0], batch_size=1)
    for data, target in in_loader:
      if target == c:
        pred = shadow_model(data).view(100)
        if torch.argmax(pred).item() == c:
            attack_x.append(sm(pred))
            attack_y.append(1)
    out_loader = torch.utils.data.DataLoader(shadow_datasets[i][1], batch_size=1)
    for data, target in out_loader:
      if target == c:
        pred = shadow_model(data).view(100)
        attack_x.append(sm(pred))
        attack_y.append(0)
              
  # Save datasets
  tensor_x = torch.stack(attack_x)
  tensor_y = torch.Tensor(attack_y)
  xpath = f"infattack/resnet_attack_x_{c}.pt"
  ypath = f"infattack/resnet_attack_y_{c}.pt"
  torch.save(tensor_x, xpath)
  torch.save(tensor_y, ypath)
  tensor_x = torch.load(f"infattack/resnet_attack_x_{c}.pt")
  tensor_y = torch.load(f"infattack/resnet_attack_y_{c}.pt")
  print(torch.unique(tensor_y, return_counts=True)[1])
  
  # Create test and train dataloaders for attack dataset
  attack_datasets = []
  attack_datasets.append(torch.utils.data.TensorDataset(tensor_x, tensor_y))
  attack_train, attack_test = torch.utils.data.random_split(
    attack_datasets[0], [int(0.9*len(attack_datasets[0])), 
    len(attack_datasets[0]) - int(0.9*len(attack_datasets[0]))])
  attackloader = torch.utils.data.DataLoader(attack_train, batch_size=batch_size, shuffle=True)
  attacktester = torch.utils.data.DataLoader(attack_test, batch_size=batch_size, shuffle=True)
  
  # Create and train an attack model
  attack_model, attack_optimizer = attack_model_fn()
  for epoch in range(10):
    trainattacker(attack_model, attack_optimizer, epoch, attackloader, printable=False)
  print(fulltestattacker(attack_model, attacktester, dname=f"Class {c}"))
  
  # Save attack model
  path = F"infattack/resnet_attack_model_{c}.pt"
  torch.save({
        'model_state_dict': attack_model.state_dict(),
        }, path)
  print(f"Time taken: {time.process_time() - starttime}")

Generating class 0 set from model 0



Generating class 0 set from model 19tensor([5004, 3082])
(281, 273, 233, 22)
Time taken: 525.9881777319824
Generating class 1 set from model 19tensor([4960, 2815])
(256, 332, 171, 19)
Time taken: 515.6521431499859
Generating class 2 set from model 19tensor([5005, 1701])
(146, 431, 69, 25)
Time taken: 498.80889649601886
Generating class 3 set from model 19tensor([4949, 1362])
(115, 443, 48, 26)
Time taken: 487.1815901249938
Generating class 4 set from model 19tensor([5014, 1481])
(114, 438, 72, 26)
Time taken: 495.0305571350036
Generating class 5 set from model 19tensor([4977, 3134])
(283, 337, 162, 30)
Time taken: 521.4092788709968
Generating class 6 set from model 19tensor([5098, 2084])
(193, 411, 108, 7)
Time taken: 502.26963512500515
Generating class 7 set from model 19tensor([5007, 2244])
(205, 363, 122, 36)
Time taken: 505.331816239981
Generating class 8 set from model 19tensor([5011, 2920])
(270, 327, 185, 12)
Time taken: 519.2818676700117
Generating class 9 set from model 19tens

# Individual Attacker

In [20]:
c = targetclass

In [21]:
#Load relevant datasets and create test dataloader
tensor_x = torch.load(f"infattack/resnet_attack_x_{c}.pt")
tensor_y = torch.load(f"infattack/resnet_attack_y_{c}.pt")
attack_datasets = []
attack_datasets.append(torch.utils.data.TensorDataset(tensor_x, tensor_y))
attacktester = torch.utils.data.DataLoader(attack_datasets[0], batch_size=batch_size, shuffle=True)

In [22]:
# Load relevant attack model
path = F"infattack/resnet_attack_model_{c}.pt"
checkpoint = torch.load(path)
attack_model.load_state_dict(checkpoint['model_state_dict'])

NameError: name 'attack_model' is not defined

In [33]:
print(fulltestattacker(attack_model, attacktester, dname=f"Class {c}"))

(1316, 4409, 601, 16)


# Train TargetModel

In [23]:
# The actual target model to attack, trained in the same
# way as the shadow models

targetmodel, targetoptim = target_model_fn()
trainingepochs = 10
log_interval = 64

In [24]:
in_data, out_data = torch.utils.data.random_split(traindata, [int(len(traindata)/2), int(len(traindata)/2)])

In [25]:
steps = []
in_loader = torch.utils.data.DataLoader(in_data, batch_size=batch_size, shuffle=True)
out_loader = torch.utils.data.DataLoader(out_data, batch_size=batch_size, shuffle=True)
for epoch in range(1, trainingepochs+1):
    print(f"\rEpoch {epoch}  "  , end="")
    starttime = time.process_time()
    steps.append(selectivetrain(targetmodel, targetoptim, epoch, in_loader, returnable=True))
    print(f"Time taken: {time.process_time() - starttime}")
test(targetmodel, testloader, dname="All data", printable=True)

Epoch: 1 [ 24576]	Loss: 3.238380Time taken: 1007.0192443899999
Epoch: 2 [ 24576]	Loss: 3.808367Time taken: 996.1704727689998
Epoch: 3 [ 24576]	Loss: 4.190974Time taken: 997.652983787
Epoch: 4 [ 24576]	Loss: 3.177373Time taken: 999.523929
Epoch: 5 [ 24576]	Loss: 2.540803Time taken: 1002.6273063850003
Epoch: 6 [ 24576]	Loss: 4.309068Time taken: 1004.036909247
Epoch: 7 [ 24576]	Loss: 2.032048Time taken: 1013.511792663
Epoch: 8 [ 24576]	Loss: 3.616669Time taken: 1015.2767908240003
Epoch: 9 [ 24576]	Loss: 3.130907Time taken: 1017.9769915679999
Epoch: 10 [ 24576]	Loss: 3.527303Time taken: 1019.2529550299987
All data: Mean loss: 0.3944, Accuracy: 7371/10000 (74%)


0.7371

In [26]:
path = F"infattack/cnn_target_trained.pt"
torch.save({
            'model_state_dict': targetmodel.state_dict(),
            'optimizer_state_dict': targetoptim.state_dict(),
            }, path)

In [27]:
f = open(f"infattack/cnn_batches.pkl", "wb")
pickle.dump(steps, f)
f.close()

In [28]:
print(f"Batches effected: {len(steps)}/{len(in_loader)*trainingepochs} = {100*len(steps)/(len(in_loader)*trainingepochs)}%")

Batches effected: 10/31250 = 0.032%


In [29]:
in_loader = torch.utils.data.DataLoader(in_data, batch_size=1, shuffle=False)
out_loader = torch.utils.data.DataLoader(out_data, batch_size=1, shuffle=False)

In [31]:
# Create 100 attack model training sets, one for each class
# These will be used to train 100 attack models, as per Shokri et al.
# Currently configured to only produce for the target class
attack_datasets = []
sm = nn.Softmax()
for c in range(targetclass, targetclass+1):
    targetmodel.eval()
    attackdata_x = []
    attackdata_y = []
    count = 0
    print(f"\rGenerating class {c} set from target model", end="")
    for data, target in in_loader:
      if target == c:
        pred = targetmodel(data).view(100)
        if torch.argmax(pred).item() == c:
            attackdata_x.append(data)
            attackdata_y.append(1)
            count += 1
    for data, target in out_loader:
      if target == c:
        attackdata_x.append(data)
        attackdata_y.append(0)
        count += 1
    attack_tensor_x = torch.stack(attackdata_x)
    attack_tensor_y = torch.Tensor(attackdata_y)

Generating class 11 set from target model

In [32]:
atk_data = torch.utils.data.TensorDataset(attack_tensor_x, attack_tensor_y)
atk_loader = torch.utils.data.DataLoader(atk_data, batch_size=1, shuffle=False)

In [69]:
def testtargetmodel():
    attack_datasets = []
    sm = nn.Softmax()
    for c in range(targetclass, targetclass+1):
        targetmodel.eval()
        attack_x = []
        attack_y = []
        for data, target in atk_loader:
            data = data.reshape(1,3,32,32)
            pred = targetmodel(data).view(100)
            attack_x.append(sm(pred))
            attack_y.append(target)
        tensor_x = torch.stack(attack_x)
        tensor_y = torch.Tensor(attack_y)
        path = F"infattack/resnet_attack_model_{c}.pt"
        checkpoint = torch.load(path)
        attack_model.load_state_dict(checkpoint['model_state_dict'])
        attack_datasets = []
        attack_datasets.append(torch.utils.data.TensorDataset(tensor_x, tensor_y))
        attacktester = torch.utils.data.DataLoader(attack_datasets[0], batch_size=batch_size, shuffle=True)
        tp, tn, fp, fn = fulltestattacker(attack_model, attacktester, dname=f"\rclass {c}")
        recall = tp / (tp + fn)
        print(f"\trecall: {recall}")
        missrate = fn / (fn + tp)
#         print(f"\tmissrate: {missrate}")
        return recall, missrate

In [37]:
attack_model, _ = attack_model_fn()

In [95]:
path = F"infattack/cnn_target_trained.pt"
checkpoint = torch.load(path)
targetmodel.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [78]:
# Test amnesiac unlearning by testing membership inference
# attack results on unprotected model, model after amnesiac
# step, and after each epoch of further training

recall = []
missrate = []
r, m = testtargetmodel()
recall.append(r)
missrate.append(m)
for step in steps:
    const = 1
    with torch.no_grad():
        state = targetmodel.state_dict()
        for param_tensor in state:
            if "weight" in param_tensor or "bias" in param_tensor:
              state[param_tensor] = state[param_tensor] - const*step[param_tensor]
    targetmodel.load_state_dict(state)
r, m = testtargetmodel()
recall.append(r)
missrate.append(m)
for epoch in range(5):
    print(f"\rEpoch {epoch}  "  , end="")
    starttime = time.process_time()
    r, m = train2(targetmodel, targetoptim, epoch, nontarget_train_loader, printable=False)
    recall = recall + r
#     print(f"Time taken: {time.process_time() - starttime}")



	recall: 0.9523809523809523
	recall: 0.12698412698412698
Epoch 0  	recall: 0.015873015873015872
Epoch 1  	recall: 0.0
Epoch 2  	recall: 0.0
Epoch 3  	recall: 0.0
Epoch 4  	recall: 0.0


In [79]:
print(recall)

[0.9523809523809523, 0.12698412698412698, 0.015873015873015872, 0.0, 0.0, 0.0, 0.0]


<All keys matched successfully>