In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import torch

from dataclass_csv import DataclassWriter
from dataclasses import dataclass
from itertools import product
from tqdm.notebook import tqdm
from torch.nn import Conv2d
from torch.nn import CrossEntropyLoss
from torch.nn import Flatten
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import Module
from torch.nn import ReLU
from torch.nn import Tanh
from torch.nn import Sequential
from torch.nn import Sigmoid
from torch.nn import Softmax
from torch.optim import Adam
from torch.utils.data import ConcatDataset
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset
from torch.utils.data import random_split
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose
from torchvision.transforms import Normalize
from torchvision.transforms import ToTensor
from typing import List
from typing import Tuple
from torch import Tensor

About the CIFAR10 dataset...

In [None]:
# Where the dataset should be stored
DATA_PATH = './data'

# Number of classes in the dataset
N_CLASSES = 10

I'm sharing the test set across the victim and shadow models. About how the rest of the training set should be split up...

In [None]:
# Fraction of training data for victim model, rest is for adversary
VICTIM_TRAIN_FRAC = 0.2

# Fraction of adversarial training data per shadow model training
SHADOW_TRAIN_FRAC = 0.2

# Number of shadow models
N_SHADOW_MODELS = 20

About hyperparameters for training... Probably want the epochs and victim and shadow models to be the same? Well, not now that we're doing differential privacy...

In [None]:
N_CIFAR_EPOCHS = 100
N_ATTACK_EPOCHS = 10

Some configuration choices...

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Synchronous CUDA ops only
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

Attack model will have two branches. Paper didn't talk about how they did this so I'm experimenting here... Sigmoid is allegedly good for binary classification.

In [None]:
def attack_model() -> Module:
    return Sequential(
        Linear(10, 128),
        ReLU(),
        Linear(128, 128),
        ReLU(),
        Linear(128, 1),
        Sigmoid())

Just some nice generic visualization of training process. Works for all of victim, shadow, and attack models all (not my code!)

In [None]:
def plot_training_history(losses, accuracies):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    ax1.plot(losses)
    ax1.set_title('Training Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    
    ax2.plot(accuracies)
    ax2.set_title('Training Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    
    plt.tight_layout()
    plt.show()

How to train and evaluate a victim or shadow model. Shoud epochs and learning rate be different for the shadow models? (mostly not my code!)

In [None]:
def traincifar10(loader, epochs):
    model = cifar_model().to(device)
    criterion = CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-7)
    
    # Lists to store metrics
    train_losses = []
    train_accuracies = []
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Progress bar for training batches
        progress_bar = tqdm(loader, desc=f'Epoch {epoch + 1}/{epochs}')
        
        for i, data in enumerate(progress_bar):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': running_loss / ( i + 1),
                'acc': 100.0 * correct / total
            })

            del inputs, labels
        
        # Store epoch metrics
        epoch_loss = running_loss / len(loader)
        epoch_acc = 100.0 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        #model.eval()

    return model.cpu(), train_losses, train_accuracies

def evaluatecifar10(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    predvecs = []

    with torch.no_grad():
        for data in tqdm(dataloader, desc='Evaluating'):
            images, labels = data[0].to(device), data[1].to(device)
            predvec = model(images)
            predvecs.append(predvec)
            _, predicted = torch.max(predvec.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy

How to train and evaluate an attack model...

In [None]:
def trainattack(dataloader):
    model = attack_model().to(device)
    criterion = torch.nn.BCEWithLogitsLoss()
    #criterion = CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-7)
    
    # Lists to store metrics
    train_losses = []
    train_accuracies = []
    for epoch in range(N_ATTACK_EPOCHS):
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Progress bar for training batches
        progress_bar = tqdm(dataloader, desc=f'Epoch {epoch + 1}/{N_ATTACK_EPOCHS}')
        
        for i, data in enumerate(progress_bar):
            traindata, labels = data[0].to(device), data[1].to(device)

            # Create fresh tensors
            #traindata = traindata.clone().detach()
            #labels = labels.clone().detach()

            # I'm not really sure why this is necessary, but it is
            # Output has shape (128, 1) but labels has (128,)
            #labels = labels.view(-1, 1).float()

            optimizer.zero_grad()
            # outputs = model(traindata)
            outputs = model(traindata).squeeze()
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Calculate accuracy
            predicted = (outputs >= 0.5).float()
            #predicted = outputs.argmax(dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': running_loss / ( i + 1),
                'acc': 100.0 * correct / total
            })
        
        # Store epoch metrics
        epoch_loss = running_loss / len(dataloader)
        epoch_acc = 100.0 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
    return model.cpu(), train_losses, train_accuracies

def evaluatecattack(model, dataloader):
    model.eval()
    model = model.to(device)
    correct = 0
    total = 0
    membershipconf = []

    with torch.no_grad():
        for data in tqdm(dataloader, desc='Evaluating'):
            input, labels = data[0].to(device), data[1].to(device)
            labels = labels.view(-1, 1).float()
            istrain = model(input)
            membershipconf.append(istrain)
            predicted = (istrain > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum()

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    #return torch.cat(membershipconf)
    return accuracy

Loading the CIFAR10 training and test sets...

In [None]:
def split_cifar_dataset(victim_frac: float) -> Tuple[Dataset, Dataset, Dataset]:
    tr = Compose([
        ToTensor(),
        Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    train_set = CIFAR10(DATA_PATH, train=True, download=True, transform=tr)
    test_set = CIFAR10(DATA_PATH, train=False, download=True, transform=tr)

    victim_size = int(victim_frac * len(train_set))
    splits = [victim_size, len(train_set) - victim_size]
    victim_set, adversary_set = random_split(train_set, splits)

    return victim_set, adversary_set, test_set

Convert a `Dataset` into two tensors representing the features and labels.

In [None]:
def dataset_to_tensors(dataset: Dataset) -> List[Tensor]:
    loader = DataLoader(dataset, batch_size=len(dataset))
    return next(iter(loader))

GPU memory management...

In [None]:
def run_on_gpu(model: Module, features: Tensor) -> Tensor:
    model = model.to(device)
    features = features.to(device)
    output = model(features)
    del model, features
    torch.cuda.empty_cache()
    return output.cpu()

Create two disjoint subsets of equal size from the adversarial `dataset` for the purpose of training and testing a shadow model. Each dataset uses `frac` of the total data available to the adversary.

In [None]:
def disjoint_subsets(dataset: Dataset, frac: float) -> Tuple[Dataset, Dataset]:
    dataset_size = len(dataset)
    subset_size = int(dataset_size * frac)
    indexes = np.random.choice(dataset_size, 2 * subset_size, replace=False)
    midpoint = len(indexes) // 2
    train_set = Subset(dataset, indexes[:midpoint])
    test_set = Subset(dataset, indexes[midpoint:])
    return train_set, test_set

Generate attack model data for training or testing by feeding the `dataset` through a victim or shadow `model`. The generated data will be the model's confidence vector. The `label` should be 1 if the model was trained on the dataset and 0 if it was not.

In [None]:
def attack_data(model: Module, dataset: Dataset, label: int) -> Dataset:
    confidences = []
    with torch.no_grad():
        for batch in DataLoader(dataset, batch_size=64):
            images, _ = batch[0], batch[1]
            confidences.append(run_on_gpu(model, images))

    _, cifar_labels = dataset_to_tensors(dataset)
    attack_labels = torch.full([len(dataset)], label)
    confidences = torch.cat(confidences)
    return TensorDataset(confidences, cifar_labels, attack_labels)

Generate balanced attack model training or testing data from two...

In [None]:
def balanced_attack_data(model: Module, train_set: Dataset, test_set: Dataset) -> Dataset:
    train_attack = attack_data(model, train_set, 1)
    test_attack = attack_data(model, test_set, 0)
    return ConcatDataset([train_attack, test_attack])

Generate attack model training data from a shadow model trained on `frac` of the adversarial `dataset`. The dataset will include the shadow model's confidence vectors for all the data it was trained on, and an equal number of confidence vectors for other adversarial data it was _not_ trained on.

In [None]:
def shadow_attack_data(n_shadows: int, dataset: Dataset, frac: float) -> Dataset:
    attack_datasets = []
    for _ in range(n_shadows):
        train_set, test_set = disjoint_subsets(dataset, frac)
        model = train_cifar_model(train_set, test_set)
        attack_datasets.append(balanced_attack_data(model, train_set, test_set))
    return ConcatDataset(attack_datasets)

Generate attack model testing data from a victim model. The dataset will include the victim model's confidence vectors on all its testing data, and an equal number of confidence vectors randomly selected from its training vector. This ensures it will be _balanced_.

In [None]:
def victim_attack_data(model: Module, train_set: Dataset, test_set: Dataset) -> Dataset:
    indexes = np.random.choice(len(train_set), len(test_set), replace=False)
    train_subset = Subset(train_set, indexes)
    return balanced_attack_data(model, train_subset, test_set)

Create a model for CIFAR-10. The model is based...

In [None]:
def cifar_model() -> Module:
    return Sequential(
        Conv2d(3, 16, kernel_size=3, padding=1),
        Tanh(),
        MaxPool2d(kernel_size=2, stride=2),
        Conv2d(16, 16, kernel_size=3),
        Tanh(),
        MaxPool2d(kernel_size=2, stride=2),
        Flatten(),
        Linear(784, N_CLASSES),
        Tanh(),
        Softmax(dim=1))

In [None]:
def train_victim_model(dataset: Dataset, batch_size: int, epsilon: float, delta: float, max_grad_norm: float) -> Module:
    model = cifar_model().to(device)
    criterion = CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-7)

    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Lists to store metrics
    train_losses = []
    train_accuracies = []
    
    for epoch in range(N_CIFAR_EPOCHS):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Progress bar for training batches
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{N_CIFAR_EPOCHS}')
        
        for i, data in enumerate(progress_bar):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': running_loss / ( i + 1),
                'acc': 100.0 * correct / total
            })

            del inputs, labels
        
        # Store epoch metrics
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100.0 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)

    return model.cpu(), train_losses, train_accuracies

In [None]:
def train_cifar_model(train_set: Dataset, test_set: Dataset) -> Module:
    loader = DataLoader(train_set, batch_size=128, shuffle=True)
    model, asdf1, asdf2 = traincifar10(loader, N_CIFAR_EPOCHS)
    return model

In [None]:
def train_attack_model(train_set: Dataset) -> Module:
    loader = DataLoader(train_set, batch_size=128, shuffle=True)
    model, asdf1, asdf2 = trainattack(loader)
    return model

In [None]:
def filter_by_cifar_label(dataset: Dataset, label: int) -> Dataset:
    confidences, cifar_labels, attack_labels = dataset_to_tensors(dataset)
    filtered_confidences = confidences[cifar_labels == label]
    filtered_attack_labels = attack_labels[cifar_labels == label]
    return TensorDataset(filtered_confidences, filtered_attack_labels)

In [None]:
def train_attack_models(n_labels: int, dataset: Dataset) -> List[Module]:
    models = []
    for label in range(n_labels):
        filtered_dataset = filter_by_cifar_label(dataset, label)
        model = train_attack_model(filtered_dataset)
        models.append(model)
    return models

Putting it all together...

In [None]:
victim_set, adversary_set, test_set = split_cifar_dataset(VICTIM_TRAIN_FRAC)

In [None]:
victim_model = train_cifar_model(victim_set, test_set)

In [None]:
attack_test_set = victim_attack_data(victim_model, victim_set, test_set)

In [None]:
complete_attack_train_set = shadow_attack_data(N_SHADOW_MODELS, adversary_set, SHADOW_TRAIN_FRAC)

In [None]:
attack_models = train_attack_models(N_CLASSES, complete_attack_train_set)

In [None]:
labels = list(range(N_CLASSES))

for label in labels:
    filtered_dataset = filter_by_cifar_label(attack_test_set, label)
    model = attack_models[label]
    confidences = evaluatecattack(model, filtered_dataset)
    #_, attacktestpreds = torch.max(confidences, 1)

As an experimenter, I don't care too much about testing the attack model on a dataset other than what I get from the victim model. As an attacker, I might care though: I won't know if my membership inference predictions against the victim model are right, and I might want to know when I deploy my attack model against the victim, how likely is it that they are. Generating test data from the victim model...

In [None]:
epsilons = [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]
deltas = [0.00001]
batch_sizes = [32, 64, 128]
max_grad_norms = [0.1, 2.0, 10.0]

param_combos = list(product(epsilons, deltas, batch_sizes, max_grad_norms))
print(f'{len(param_combos)} experiments')

54 experiments


In [18]:
print(f'{len(param_combos) * 10 / 60} hours')

9.0 hours


In [None]:
@dataclass
class Experiment:
    epsilon: float
    delta: float
    batch_size: int
    max_grad_norm: float
    cifar_label: int
    victim_accuracy: float
    attack_accuracy: float

In [None]:
results = []
for epsilon, delta, batch_size, max_grad_norm in param_combos:
    victim_model = train_cifar_model(victim_set, test_set) # TODO Make it private! Add hyperparams!
    for label in labels:
        filtered_dataset = filter_by_cifar_label(attack_test_set, label)
        model = attack_models[label]
        results.append(Experiment(
            epsilon=epsilon,
            delta=delta,
            batch_size=batch_size,
            max_grad_norm=max_grad_norm,
            cifar_label=label,
            victim_accuracy=evaluatecifar10(victim_model, test_set),
            attack_accuracy=evaluatecattack(model, filtered_dataset)
        ))

In [None]:
with open('dp-grid.csv', 'w') as file:
    DataclassWriter(file, results, Experiment).write()