In [1]:
import importlib
import numpy as np
import os
import pickle
import sys
import torch
import torch.nn as nn
from torch.utils.data import Subset

sys.path.insert(0, '../../')

from src.dataset import get_num_classes, load_dataset
from src.models import init_model
import src.train_meta_model
importlib.reload(src.train_meta_model)
from src.train_meta_model import MetaModelDataLoader

In [2]:
dataset_size = 0
dataset_name = 'cifar10'
architecture = 'cnn-large'
num_classes = get_num_classes(dataset_name)

dataset = load_dataset(dataset=dataset_name, transform='normalize', dataset_size=dataset_size, seed=42, path='../../../data')
subset = Subset(dataset['train_and_test'], np.arange(6))

Files already downloaded and verified


In [3]:
models_dir = f'../../../experiments/{dataset_name}/attack/{architecture}/shadow_models/aa-shadow_dataset'
print(models_dir)

models = []
for exp in range(2):
    models_path = os.path.join(models_dir, f'exp_{exp}_model.pickle')
    with open(models_path, 'rb') as f:
        saved_model = pickle.load(f)
    model = init_model(architecture, num_classes, verbose=False)
    model.load_state_dict(saved_model['model_state_dict'])
    model.eval()
    models.append(model)
criterion = nn.CrossEntropyLoss()

../../../experiments/cifar10/attack/cnn-large/shadow_models/aa-shadow_dataset


## 1 - Testing the batch sampling

Test without shuffling (`shuffle=False`)

In [5]:
print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models[:1], 
    mia_labels=[[0, 1, 0, 1, 0, 1]], 
    batch_size=4, 
    shuffle=False, 
    device='cpu', 
    criterion=criterion, 
    features='activations', 
    target_layers='fc2',
    set_based=False)

meta_model_data_loader_iter = iter(meta_model_data_loader)
print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

batch1 = next(meta_model_data_loader_iter)
print('First batch: image labels', batch1[0][1], 'membership labels', batch1[1])
batch2 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch2[0][1], 'membership labels', batch2[1])
try:
    batch3 = next(meta_model_data_loader_iter)
except StopIteration:
    print('No third batch, as expected')

Subset labels [6, 9, 9, 4, 1, 1]
Shuffled record idxs [0 1 2 3 4 5] Random model idxs [0, 0]
First batch: image labels tensor([6, 9, 9, 4]) membership labels tensor([0, 1, 0, 1])
Second batch: image labels tensor([1, 1]) membership labels tensor([0, 1])
No third batch, as expected


Test with a different batch size.

In [6]:
print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models[:1], 
    mia_labels=[[0, 1, 0, 1, 0, 1]], 
    batch_size=2, 
    shuffle=False, 
    device='cpu', 
    criterion=criterion, 
    features='activations', 
    target_layers='fc2',
    set_based=False)

meta_model_data_loader_iter = iter(meta_model_data_loader)
print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

batch1 = next(meta_model_data_loader_iter)
print('First batch: image labels', batch1[0][1], 'membership labels', batch1[1])
batch2 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch2[0][1], 'membership labels', batch2[1])
batch3 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch3[0][1], 'membership labels', batch3[1])
try:
    batch4 = next(meta_model_data_loader_iter)
except StopIteration:
    print('No fourth batch, as expected')

Subset labels [6, 9, 9, 4, 1, 1]
Shuffled record idxs [0 1 2 3 4 5] Random model idxs [0, 0, 0]
First batch: image labels tensor([6, 9]) membership labels tensor([0, 1])
Second batch: image labels tensor([9, 4]) membership labels tensor([0, 1])
Second batch: image labels tensor([1, 1]) membership labels tensor([0, 1])
No fourth batch, as expected


Test with shuffling (`shuffle=True`)

In [7]:
print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models, 
    mia_labels=[[0, 1, 0, 1, 0, 1], [0, 0, 0, 1, 1, 1]], 
    batch_size=3, 
    shuffle=True, 
    device='cpu', 
    criterion=criterion, 
    features='activations', 
    target_layers='fc2',
    set_based=False)

np.random.seed(0)
for _ in range(3):
    meta_model_data_loader_iter = iter(meta_model_data_loader)
    print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

    batch1 = next(meta_model_data_loader_iter)
    print('First batch: image labels', batch1[0][1], 'membership labels', batch1[1])
    batch2 = next(meta_model_data_loader_iter)
    print('Second batch: image labels', batch2[0][1], 'membership labels', batch2[1])
    try:
        batch3 = next(meta_model_data_loader_iter)
    except StopIteration:
        print('No third batch, as expected')

Subset labels [6, 9, 9, 4, 1, 1]
Shuffled record idxs [5 2 1 3 0 4] Random model idxs [0, 1]
First batch: image labels tensor([1, 9, 9]) membership labels tensor([1, 0, 1])
Second batch: image labels tensor([4, 6, 1]) membership labels tensor([1, 0, 1])
No third batch, as expected
Shuffled record idxs [1 0 5 3 4 2] Random model idxs [1, 0]
First batch: image labels tensor([9, 6, 1]) membership labels tensor([0, 0, 1])
Second batch: image labels tensor([4, 1, 9]) membership labels tensor([1, 0, 0])
No third batch, as expected
Shuffled record idxs [0 3 5 1 2 4] Random model idxs [0, 1]
First batch: image labels tensor([6, 4, 1]) membership labels tensor([0, 1, 1])
Second batch: image labels tensor([9, 9, 1]) membership labels tensor([0, 0, 1])
No third batch, as expected


## 2 - Testing that the batch contents match the corresponding models

In [8]:
shuffled = torch.cat([subset[i][0].unsqueeze(0) for i in [5, 2, 1, 3, 0, 4]], dim=0)
print('Output of model 1', nn.functional.softmax(models[0](shuffled), dim=1))

print('Output of model 2', nn.functional.softmax(models[1](shuffled), dim=1))

print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models, 
    mia_labels=[[0, 1, 0, 1, 0, 1], [0, 0, 0, 1, 1, 1]], 
    batch_size=3, 
    shuffle=True, 
    device='cpu', 
    criterion=criterion, 
    features='activations', 
    target_layers='fc2',
    set_based=False)

np.random.seed(0)

meta_model_data_loader_iter = iter(meta_model_data_loader)
print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

batch1 = next(meta_model_data_loader_iter)
print('First batch: image labels', batch1[0][1], 'membership labels', batch1[1])
print(batch1[0][0])
batch2 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch2[0][1], 'membership labels', batch2[1])
print(batch2[0][0])
try:
    batch3 = next(meta_model_data_loader_iter)
except StopIteration:
    print('No third batch, as expected')

Output of model 1 tensor([[8.8081e-04, 9.9280e-01, 7.3441e-08, 1.3857e-04, 3.3236e-07, 2.1265e-06,
         3.5972e-06, 6.9974e-07, 2.8901e-05, 6.1468e-03],
        [9.8960e-05, 3.1840e-05, 6.5025e-05, 1.5947e-05, 4.6320e-06, 2.2360e-06,
         8.8100e-08, 2.9202e-05, 7.6654e-05, 9.9968e-01],
        [1.6031e-07, 4.3761e-03, 4.4994e-09, 5.3153e-10, 2.8170e-09, 4.4277e-09,
         4.9412e-12, 8.5916e-06, 1.3888e-06, 9.9561e-01],
        [7.9239e-05, 1.4932e-08, 2.3268e-04, 2.2041e-05, 9.9869e-01, 4.7794e-04,
         4.5319e-04, 4.1945e-05, 1.1542e-08, 6.4197e-07],
        [5.8311e-07, 6.6596e-09, 5.1585e-02, 1.2733e-01, 2.1690e-03, 1.9294e-02,
         7.9929e-01, 3.3183e-04, 6.6206e-09, 3.9230e-08],
        [8.7029e-07, 1.0000e+00, 1.7451e-15, 1.9158e-15, 4.3275e-13, 1.1852e-16,
         3.8693e-16, 9.3926e-18, 3.7129e-12, 6.3222e-09]],
       grad_fn=<SoftmaxBackward0>)
Output of model 2 tensor([[1.0200e-10, 9.9999e-01, 2.9779e-11, 3.8782e-10, 7.6909e-11, 1.7333e-11,
         2.39

# 3 - Other tests

Using the GPU instead of the CPU.

In [9]:
print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models[:1], 
    mia_labels=[[0, 1, 0, 1, 0, 1]], 
    batch_size=4, 
    shuffle=False, 
    device='cuda:0', 
    criterion=criterion, 
    features='activations', 
    target_layers='fc2',
    set_based=False)

meta_model_data_loader_iter = iter(meta_model_data_loader)
print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

batch1 = next(meta_model_data_loader_iter)
print('First batch: image labels', batch1[0][1], 'membership labels', batch1[1])
batch2 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch2[0][1], 'membership labels', batch2[1])
try:
    batch3 = next(meta_model_data_loader_iter)
except StopIteration:
    print('No third batch, as expected')

Subset labels [6, 9, 9, 4, 1, 1]
Shuffled record idxs [0 1 2 3 4 5] Random model idxs [0, 0]
First batch: image labels tensor([6, 9, 9, 4], device='cuda:0') membership labels tensor([0, 1, 0, 1], device='cuda:0')
Second batch: image labels tensor([1, 1], device='cuda:0') membership labels tensor([0, 1], device='cuda:0')
No third batch, as expected


Using the gradients.

In [10]:
print('Subset labels', [label for _, label in subset])

meta_model_data_loader = MetaModelDataLoader(subset, 
    models[:1], 
    mia_labels=[[0, 1, 0, 1, 0, 1]], 
    batch_size=4, 
    shuffle=False, 
    device='cuda:0', 
    criterion=criterion, 
    features='activations,gradients', 
    target_layers='fc2',
    set_based=False)

meta_model_data_loader_iter = iter(meta_model_data_loader)
print('Shuffled record idxs', meta_model_data_loader.record_idxs, 'Random model idxs', meta_model_data_loader.model_idxs)

batch1 = next(meta_model_data_loader_iter)
print('First batch: image labels', batch1[0][-1], 'membership labels', batch1[1])
print([b.size() for b in batch1[0]])
batch2 = next(meta_model_data_loader_iter)
print('Second batch: image labels', batch2[0][-1], 'membership labels', batch2[1])
try:
    batch3 = next(meta_model_data_loader_iter)
except StopIteration:
    print('No third batch, as expected')

Subset labels [6, 9, 9, 4, 1, 1]
Shuffled record idxs [0 1 2 3 4 5] Random model idxs [0, 0]
First batch: image labels tensor([6, 9, 9, 4], device='cuda:0') membership labels tensor([0, 1, 0, 1], device='cuda:0')
[torch.Size([4, 10]), torch.Size([4, 10, 501, 1]), torch.Size([4])]
Second batch: image labels tensor([1, 1], device='cuda:0') membership labels tensor([0, 1], device='cuda:0')
No third batch, as expected
