In [None]:
#@title Downloading helper files...<p>(Run this cell first!)\n",
import urllib.request
urllib.request.urlretrieve("https://raw.githubusercontent.com/priv-sec/twai/master/exercise5_data/binary_model.py", "binary_model.py"),
urllib.request.urlretrieve("https://raw.githubusercontent.com/priv-sec/twai/master/exercise5_data/cifar10_model.py", "cifar10_model.py")

## Membership Inference Attack (Salem et al.)

**1) Preprocess the Cifar10 dataset**

**2) Train target model**

**3) Train shadow model**

**4) Train attack model**

**5) Attack evaluation**



Task: Please fill out the #TODOs# in the code below.

Note:
  - The target and shadow model should reach an accuracy above 95%. If the accuracy is below 95% you should consider to re-train the respective model
  - Your attack works if the accuracy calculated in the last cell is at least above 60%

## Preprocess the Cifar10 dataset

Split the dataset into fore subsets to train and test the target and shadow model.

In [None]:
import torch
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
from matplotlib import pyplot as plt

from cifar10_model import Cifar10Net, train_cifar10_model
from binary_model import BinaryNet, train_binary_model

In [None]:
TRAIN_BATCHSIZE = 32
TEST_BATCHSIZE = 1000

EPOCHS_CIFAR10 = 30
EPOCHS_BINARY = 10

In [None]:
def get_cifar10_dataset(root_dir="./data"):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    return train_dataset, test_dataset


def split_dataset(dataset):

    train_target_indices, test_target_indices, train_shadow_indices, test_shadow_indices = \
                 torch.utils.data.random_split(dataset, [len(dataset)//4]*4)

    train_target = torch.utils.data.DataLoader(train_target_indices, **train_kwargs)
    test_target = torch.utils.data.DataLoader(test_target_indices, **test_kwargs)
    train_shadow = torch.utils.data.DataLoader(train_shadow_indices, **train_kwargs)
    test_shadow = torch.utils.data.DataLoader(test_shadow_indices, **test_kwargs)

    return (train_target, test_target), (train_shadow, test_shadow)

In [None]:
cuda_available = torch.cuda.is_available()
device = torch.device("cuda" if cuda_available else "cpu")

train_kwargs = {
    'batch_size': TRAIN_BATCHSIZE,
    'shuffle': True
    }

test_kwargs = {
    'batch_size': TEST_BATCHSIZE,
    'shuffle': True
    }

if cuda_available:
    cuda_kwargs = {
        'num_workers': 2,
        'pin_memory': True,
        'shuffle': True
        }
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

In [None]:
train_dataset, test_dataset = get_cifar10_dataset()

(train_target_loader, test_target_loader), (train_shadow_loader, test_shadow_loader) = split_dataset(train_dataset + test_dataset)

## Target Model

Here, we train the target model which we would usually get as input without additional background knowledge.

In [None]:
target_model = Cifar10Net().to(device)
# TODO: Train target model with target data # 

## Shadow Model

Here, we train a shadow model which has the same architecture as the target model.

In [None]:
shadow_model = Cifar10Net().to(device)
# TODO: Train shadow model with shadow data # 

## Top-k classes

The advantage of the shadow model is that we know exactly which points were used for training which for testing. Therefore, we can use this knowledge to create our own IN/OUT dataset to hopefully learn something.

In [None]:
def getTopk(data, top):
    """
    Returns the top k maximum entries of the given data.
    """
    return # TODO #


softmax = torch.nn.Softmax(dim=0)

def predict_(model, data_loader):
    """
    Iterates the given data_loader and collects the predicition vectors
    of the given model in a list.
    Note: It is essential for this attack to apply the softmax function 
          DIRECTLY AFTER receiving the prediction vector
    """
    preds = list()
    with torch.no_grad():
        # TODO #
    return preds

In [None]:
# IN
shadow_preds_IN = predict_( shadow_model, train_shadow_loader )
shadow_labels_IN = # TODO: Create appropriate amount of ones #

# OUT
shadow_preds_OUT = predict_( shadow_model, test_shadow_loader )
shadow_labels_OUT = # TODO: Create appropriate amount of zeros #

# ALL = IN + OUT
shadow_preds = np.concatenate([shadow_preds_IN, shadow_preds_OUT])
shadow_labels = np.concatenate([shadow_labels_IN, shadow_labels_OUT])

# Top 3 prediction values
shadow_preds = getTopk(shadow_preds, 3)

In [None]:
attack_dataset = torch.utils.data.TensorDataset(torch.Tensor(shadow_preds).type(torch.FloatTensor), torch.Tensor(shadow_labels).type(torch.LongTensor) )
train_attacker_loader = torch.utils.data.DataLoader(attack_dataset, **train_kwargs)

## Attack Model

Our attacker model is a binary classifier which tries to classify the top 3 maximum entries of a specific prediction vector into one of two classes: Inside the training set or inside the test set.

In [None]:
attack_model = BinaryNet().to(device)
train_binary_model(attack_model, device, train_attacker_loader, EPOCHS_BINARY)

## Evaluation

Now, we want to evaluate the accuracy of our attack model. Therefore we iterate every training point and check whether the predicted label is correct or not.

In [None]:
# IN
target_preds_IN = predict_( target_model, train_target_loader )
target_labels_IN = # TODO: Create appropriate amount of ones #

# OUT
target_preds_OUT = predict_( target_model, test_target_loader )
target_labels_OUT = # TODO: Create appropriate amount of zeros #

# ALL = IN + OUT
target_preds = np.concatenate([target_preds_IN, target_preds_OUT])
target_labels = np.concatenate([target_labels_IN, target_labels_OUT])

# Top 3 prediction values
target_preds = getTopk(target_preds, 3)

In [None]:
eval_dataset = torch.utils.data.TensorDataset(torch.Tensor(target_preds).type(torch.FloatTensor), torch.Tensor(target_labels).type(torch.LongTensor) )
train_eval_loader = torch.utils.data.DataLoader(eval_dataset)

correct_preds = 0
with torch.no_grad():
    for data, target in train_eval_loader:
        x, y = data.to(device), target.to(device)
        # TODO: Use <attack_model> to make predictions #
        # TODO: For every correct prediction increment <correct_preds> #
        # Note: Maybe <.detach()> and <.cpu()> are required to access specific tensors #

print("Accuracy", correct_preds / 30000)