In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split, ConcatDataset
from torch.utils.data import TensorDataset, DataLoader
import os


### Process Image Dataset to be trained by CNN

In [None]:
def process_image_dataset(dataset, image_batch_size):
    
    if dataset == 'CIFAR10':
        # Normalize dataset
        transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        
        trainset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=True,
                                            download=True, transform=transform)


        testset = torchvision.datasets.CIFAR10(root='./data/CIFAR10', train=False,
                                               download=True, transform=transform)
        
    elif  dataset == 'MNIST':
        # Normalize dataset
        transform = transforms.Compose(
             [transforms.ToTensor(),
              transforms.Normalize((0.5,), (0.5,))]
         )
                
        trainset = torchvision.datasets.MNIST(root='./data/MNIST', train=True,
                                            download=True, transform=transform)


        testset = torchvision.datasets.MNIST(root='./data/MNIST', train=False,
                                               download=True, transform=transform)
        
    else:
        transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5,), (0.5,))])

        trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                            download=True, transform=transform)
    


        testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)

   
    datasets=[]
    datasets.append(trainset)
    datasets.append(testset)
    # Conact all data samples together different train and test set for image classification is not needed 
    fulldataset = ConcatDataset(datasets)
    
    #All the names of dataset after split is maintained exactly with Ahmed's paper
    
    Dshadow, Dtarget = random_split(fulldataset, (int(len(fulldataset)/2), int(len(fulldataset)/2)))
    Dtrainshadow, Doutshadow = random_split(Dshadow, (int(len(fulldataset)/4), int(len(fulldataset)/4)))
    Dtrain, Dnonmember = random_split(Dtarget, (int(len(fulldataset)/4), int(len(fulldataset)/4)))
    Dtrainloader = torch.utils.data.DataLoader(Dtrain, image_batch_size,
                                              shuffle=True, num_workers=0)
    
    Dnonmemberloader = torch.utils.data.DataLoader(Dnonmember, image_batch_size,
                                              shuffle=True, num_workers=0)
    
    Dtrainshadowloader = torch.utils.data.DataLoader(Dtrainshadow, image_batch_size,
                                              shuffle=True, num_workers=0)
    
    Doutshadowloader = torch.utils.data.DataLoader(Doutshadow, image_batch_size,
                                              shuffle=True, num_workers=0) 
    
    Dshadowloader = torch.utils.data.DataLoader(Dshadow, image_batch_size,
                                              shuffle=True, num_workers=0) 
    
    Dtargetloader = torch.utils.data.DataLoader(Dtarget, image_batch_size,
                                              shuffle=True, num_workers=0) 
    
    return Dtrainloader, Dnonmemberloader,  Dtrainshadowloader, Doutshadowloader

### Dataset class for feature vector of MLP(ATTACK model)

In [None]:

class Dataset(torch.utils.data.Dataset):
      'Characterizes a dataset for PyTorch'
      def __init__(self, features, labels):
            'Initialization'
            self.features = features
            self.labels = labels
    
      def __len__(self):
            'Denotes the total number of samples'
            return len(self.labels)
    
      def __getitem__(self, index):
            'Generates one sample of data'
            # Select sample
    
            # Load data and get label
            X = self.features[index]
            y = self.labels[index]
    
            return X, y

### Process Posterior Dataset to be Trained by MLP

In [None]:
def process_posterior_dataset(model, Dmemberloader, Dnonmemberloader, posterior_batch_size, image_batch_size, shuffle):
    
    #mlp_X will be prepared for feeding in attack model that contains three top posteriors
    #mlp_Y will contain the label 1-member/0-nonmember
    #Dataset will be Shuffled (mandatory) for shadow posteriors but for making target posteriors it will be set to false as, for target we need just the prediction we won't train attack model on target data
    mlp_X = torch.empty(0,3).to(torch.float32)
    mlp_one = torch.ones(len(Dmemberloader)*image_batch_size,1, dtype=int).to(torch.float32)
    mlp_zero = torch.zeros(len(Dnonmemberloader)*image_batch_size,1, dtype=int).to(torch.float32)
    mlp_Y = torch.cat((mlp_one,mlp_zero),0)
   
    #Get all the posteriors from already trained model, rank them high to low and make feature vector mlp_X 
    with torch.no_grad():
            for data in Dmemberloader:
                images, labels = data
                outputs = model(images)
                
                _, predicted = torch.max(outputs, 1)
                c = (predicted == labels).squeeze()
                for i in range(image_batch_size):
                    values, _ = torch.sort(outputs[i])
                    mlp_X = torch.cat((torch.flip(values[-3:],[0]).view(1,3), mlp_X))
                    
            for data in Dnonmemberloader:
              
                images, labels = data
                outputs = model(images)
                
                _, predicted = torch.max(outputs, 1)
                c = (predicted == labels).squeeze()
                for i in range(image_batch_size):
                    values, _ = torch.sort(outputs[i])
                    mlp_X = torch.cat((torch.flip(values[-3:],[0]).view(1,3), mlp_X))
           
    mlp_X = torch.flip(mlp_X,[0])
    
    mlp_dataset = Dataset(mlp_X, mlp_Y)

    Dposteriorloader = torch.utils.data.DataLoader(mlp_dataset, posterior_batch_size,
                                      shuffle=shuffle)
        
    return Dposteriorloader 

### Attack Network for MLP

In [None]:
#Attack network
class attack_net(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(attack_net, self).__init__()
        self.input_size = input_size
        self.hidden_size  = hidden_size
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(self.hidden_size, 1)
        self.sigmoid = torch.nn.Sigmoid()
        # self.softmax = torch.nn.Softmax()
        
    def forward(self, x):
        hidden = self.fc1(x)
        relu = self.relu(hidden)
        output = self.fc2(relu)
        output = self.sigmoid(output)
        return output

### Train Attack Model

In [None]:
def train_attack_model(attack_data_loader, input_size, hidden_size, lr, epochs, batch_size):
  attack_model = attack_net(input_size, hidden_size)
  
  criterion = nn.MSELoss()
  optimizer_attack = torch.optim.ASGD(attack_model.parameters(), lr)
  
  print('Training the Attack Model...')
  for epoch in range(epochs):
    running_loss = 0.0
    
    for i, data in enumerate(attack_data_loader, 0):
        
        inputs, labels = data
        
        optimizer_attack.zero_grad()
        # Forward pass
        outputs = attack_model(inputs)
        # Compute Loss
        loss = criterion(outputs, labels)
        
        
        # Backward pass
        loss.backward()
      
      
        optimizer_attack.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 3000 == 2999:    # print every 1500 mini-batches
            print('[Epoch %d, Batch %1d] loss: %.6f' %
                  (epoch + 1, i + 1, running_loss / 3000))
            running_loss = 0.0
                      
  print('Finished Training Attack Model')
  #torch.save(attack_model.state_dict(), PATH) #you can also save the attack model if PATH is added as param

  correct = 0
  total = 0
  with torch.no_grad():
    for data in attack_data_loader:
      X, y = data
      outputs = attack_model(X).round()
      total += batch_size
      correct += (outputs == y).sum().item()


  print('Accuracy of the attack on the All Shadow(Dshadow) Data: %d %%' % (
  100 * correct / total))
  return attack_model

### Network for training CIFAR10 target model

In [None]:

class CIFAR_target_Net(nn.Module):
    def __init__(self):
        super(CIFAR_target_Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, (5,5), padding=2)
        self.conv2 = nn.Conv2d(32, 32, (5,5))
        self.fc1   = nn.Linear(32*6*6, 128)
        self.fc2   = nn.Linear(128, 10)
        

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
    
    

### Network for training MNIST/FashionMNIST target model

In [None]:
class target_Net(nn.Module):
    def __init__(self):
        super(target_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16,5)
        self.fc1   = nn.Linear(256, 128)
        self.fc2   = nn.Linear(128, 10)
        

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x       


### Train Target/Victim Model

In [None]:
def train_target_model(dataset, Dtrainloader, Dnonmemberloader, lr, momentum, num_epochs):
    net = target_Net()
    if dataset == 'CIFAR10':
        net = CIFAR_target_Net() 
      
    criterion = nn.CrossEntropyLoss()
    optimizer_target = optim.SGD(net.parameters(), lr, momentum)
    
    print('Training the Target Model...')
    for epoch in range(num_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(Dtrainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
    
            # zero the parameter gradients
            optimizer_target.zero_grad()
    
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_target.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[Epoch %d, Batch %3d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training Traget Model')
    
      
    #Check accuracy in the training data
    correct = 0
    total = 0
    with torch.no_grad():
        for data in Dtrainloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print('Accuracy of the prediction of image classification in training(member) data: %d %%' % (
        100 * correct / total))
    
    #Check accuracy in the samples which were not seen during training
    correct = 0
    total = 0
    with torch.no_grad():
        for data in Dnonmemberloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print('Accuracy of the prediction of image classification in remaining nonmember data : %d %%' % (
        100 * correct / total))
    
    return net #Returns the trained target model

### Train Shadow Model

In [None]:
def train_shadow_model(dataset, Dtrainshadowloader, Doutshadowloader, lr, momentum, num_epochs):
    net_shadow = target_Net()
    
    if dataset == 'CIFAR10':
         net_shadow = CIFAR_target_Net()
       
       
    criterion = nn.CrossEntropyLoss()
    optimizer_shadow = optim.SGD(net_shadow.parameters(), lr, momentum)
  
    print('Training the Shadow Model...')
    for epoch in range(num_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(Dtrainshadowloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
    
            # zero the parameter gradients
            optimizer_shadow.zero_grad()
    
            # forward + backward + optimize
            outputs = net_shadow(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_shadow.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[Epoch %d, Batch %3d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training Shadow Model')
       
    
    #Check accuracy in the training data
    correct = 0
    total = 0
    with torch.no_grad():
        for data in Dtrainshadowloader:
            images, labels = data
            outputs = net_shadow(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print('Accuracy of the prediction of image classification in training shadow(Dtrainshadow) Data): %d %%' % (
        100 * correct / total))
    
    #Check accuracy in the samples which were not seen during training
    correct = 0
    total = 0
    with torch.no_grad():
        for data in Doutshadowloader:
            images, labels = data
            outputs = net_shadow(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print('Accuracy of the prediction of image classification in out shadow(Doutshadow) Data : %d %%' % (
        100 * correct / total))
    
    return net_shadow #Returns the trained shadow model
    
    

### Run Membership Inference 

In [None]:
selected_dataset = 'CIFAR10' # change it to MNIST or FashionMNIST to run on these datasets 

# Preprocess dataset and split them
image_batch_size=4
Dtrainloader, Dnonmemberloader, Dtrainshadowloader, Doutshadowloader = process_image_dataset(selected_dataset,image_batch_size)

#Train the TARGET model on CNN

lr = 0.001
momentum = 0.9
epochs = 10
target_model = train_target_model(selected_dataset, Dtrainloader, Dnonmemberloader, lr, momentum, epochs)

#Train the SHADOW model on CNN
shadow_model = train_shadow_model(selected_dataset, Dtrainshadowloader, Doutshadowloader, lr, momentum, epochs)

#Process Posterior data for building ATTACK model
#3 Highest Posteriors are used to Genearte feature vectors for Multilayer Perceptron (ATTACK model)
#Posteriors are sorted from HIGH to LOW values as mentioned in the paper
#mlp_X contains the feature tensors for tarining the Attack model
#mlp_Y contains the corresponding labels (1-member/0-nonmember)
posterior_batch_size = 4
shuffle = True #This is mandatory to set to True
attack_data_loader = process_posterior_dataset(shadow_model, Dtrainshadowloader, Doutshadowloader, posterior_batch_size, image_batch_size, shuffle)

#Now feed these posteriors in attack_data_loader for training the ATTACK model which is built on MLP as per the paper
input_size = 3
hidden_size = 64
epochs = 25
lr = 1e-05
attack_model = train_attack_model(attack_data_loader, input_size, hidden_size, lr, epochs, posterior_batch_size)

#As attack_model is already trained on the whole shadow data(Dtrainshadow and Doutshadow)
#The next step would be to get the posteriors for all the samples in target dataset(Dtarget-both members and non-member) using target model
#target_X contains the posteriors for all target data that will be then checked for membership
#target_Y contains the corresponding labels (1-member/0-nonmember)
posterior_batch_size = 4
shuffle = False  #This is mandatory to set to False for comparison purpose
target_posteriors_loader = process_posterior_dataset(target_model, Dtrainloader, Dnonmemberloader, posterior_batch_size, image_batch_size, shuffle)

#Now target dataset is ready with their posteriors to be fed in attack_model
#Finally, let's get the Membership Inference for All the target Data

correct = 0
total = 0
TP = 0
FP = 0
TN = 0
FN = 0

with torch.no_grad():
    for i, data in enumerate (target_posteriors_loader,0):
          X, y = data
          outputs = attack_model(X).round()
          
          #As target data is not shuffled later on we know that
          #the first half target_posteriors_loader is member and the later half is non-member
          #that is why i < len(Dtrainloader) is used for checking the prediction of class 1 and vice versa
          #class/label 1 means member and vice versa
          
          if i < len(Dtrainloader):
            TP += (outputs == 1).sum().item()
            FN += (outputs == 0).sum().item()
            
          else:
            FP += (outputs == 1).sum().item()
            TN += (outputs == 0).sum().item()
        
    
print('Accuracy of the attack on the all Target Data: %d %%' % (
    100 * (TP+TN) / (len(target_posteriors_loader) * posterior_batch_size)))

print('Precision of the attack on the all Target Data of class 1(member): %d %%' % (
    100 * (TP / (TP+FP))))

print('Recall of the attack on the all Target Data of class 1(member): %d %%' % (
    100 * (TP / (TP+FN))))
    

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/CIFAR10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/CIFAR10/cifar-10-python.tar.gz to ./data/CIFAR10
Files already downloaded and verified
Training the Target Model...
[Epoch 1, Batch 2000] loss: 1.936
[Epoch 2, Batch 2000] loss: 1.422
[Epoch 3, Batch 2000] loss: 1.240
[Epoch 4, Batch 2000] loss: 1.073
[Epoch 5, Batch 2000] loss: 0.940
Finished Training Traget Model
Accuracy of the prediction of image classification in training(member) data: 69 %
Accuracy of the prediction of image classification in remaining nonmember data : 57 %
Training the Shadow Model...
[Epoch 1, Batch 2000] loss: 1.955
[Epoch 2, Batch 2000] loss: 1.469
[Epoch 3, Batch 2000] loss: 1.239
[Epoch 4, Batch 2000] loss: 1.065
[Epoch 5, Batch 2000] loss: 0.930
Finished Training Shadow Model
Accuracy of the prediction of image classification in training shadow(Dtrainshadow) Data): 74 %
Accuracy of the prediction of image classification in out shadow(Doutshadow) Data : 61 %
Training the Attack Model...
[Epoch 1, Batch 3000] loss: 0.253141
[Epoch 1, Batch 