# Privacy Meter Demo

## Setting up the multi-GPU environment

## Importing

In [1]:

import math
import time

import numpy as np
import torch
import yaml
from torch.utils.data import Subset

from privacy_meter.audit import get_average_audit_results, audit_models, sample_auditing_dataset
from privacy_meter.get_signals import get_model_signals
from privacy_meter.models.utils import split_dataset_for_training
from privacy_meter.util import (
    check_configs,
    setup_log,
    initialize_seeds,
    create_directories,
    load_dataset,
)

from privacy_meter.trainers.parallel_trainer import parallel_prepare_models
import torch.multiprocessing as mp
if __name__ == '__main__':
    # Required for CUDA multiprocessing
    mp.set_start_method('spawn')

# Enable benchmark mode in cudnn to improve performance when input sizes are consistent
torch.backends.cudnn.benchmark = True

## Load config

In [2]:
configs = "configs/config.yaml"
with open(configs, "rb") as f:
        configs = yaml.load(f, Loader=yaml.Loader)

# Validate configurations
check_configs(configs)

## Setting up

In [3]:
# Validate configurations
check_configs(configs)

# Initialize seeds for reproducibility
initialize_seeds(configs["run"]["random_seed"])

# Create necessary directories
log_dir = configs["run"]["log_dir"]
directories = {
    "log_dir": log_dir,
    "report_dir": f"{log_dir}/report",
    "signal_dir": f"{log_dir}/signals",
    "data_dir": configs["data"]["data_dir"],
}
create_directories(directories)

# Set up logger
logger = setup_log(
    directories["report_dir"], "time_analysis", configs["run"]["time_log"]
)

start_time = time.time()

## Load dataset

In [4]:
baseline_time = time.time()
dataset, population = load_dataset(configs, directories["data_dir"], logger)
logger.info("Loading dataset took %0.5f seconds", time.time() - baseline_time)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar10/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 98125413.18it/s] 


Extracting data/cifar10/cifar-10-python.tar.gz to data/cifar10
Files already downloaded and verified


2025-02-08 16:59:02,420 INFO     Save data to data/cifar10.pkl
2025-02-08 16:59:02,634 INFO     Save population data to data/cifar10_population.pkl
2025-02-08 16:59:02,635 INFO     The whole dataset size: 50000
2025-02-08 16:59:02,635 INFO     Loading dataset took 6.16908 seconds


## Load or train models

In [5]:
# Define experiment parameters
num_experiments = configs["run"]["num_experiments"]
num_reference_models = configs["audit"]["num_ref_models"]
num_model_pairs = max(math.ceil(num_experiments / 2.0), num_reference_models + 1)

# Split dataset for training
data_splits, memberships = split_dataset_for_training(
    len(dataset), num_model_pairs
)

In [6]:
# Now train models in parallel
baseline_time = time.time()
models_list = parallel_prepare_models(
    log_dir, 
    dataset, 
    data_splits,  # Using the generated data_splits
    memberships,  # Using the generated memberships
    configs, 
    logger,
    num_gpus=4
)
logger.info(
    "Model parallel training took %0.1f seconds", 
    time.time() - baseline_time
)

2025-02-08 16:59:05,493 INFO     Training 4 models using 4 GPUs


GPU 0: 100/100 (0.0000|1.0000) | GPU 1: 100/100 (0.0000|1.0000) | GPU 2: 100/100 (0.0000|1.0000) | GPU 3: 100/100 (0.0000|1.0000) | 


  saved_data = torch.load(shared_dict[idx]['model_path'])
2025-02-08 17:08:56,723 INFO     Model parallel training took 591.2 seconds


## Prepare auditing dataset

In [7]:
auditing_dataset, auditing_membership = sample_auditing_dataset(
        configs, dataset, logger, memberships
    )

# Also downsample the population set size if specified in the config
population = Subset(
    population,
    np.random.choice(
        len(population),
        configs["audit"].get("population_size", len(population)),
        replace=False,
    ),
)

## Compute signals

In [8]:
baseline_time = time.time()
signals = get_model_signals(models_list, auditing_dataset, configs, logger)
population_signals = get_model_signals(
        models_list, population, configs, logger, is_population=True
    )
logger.info("Preparing signals took %0.5f seconds", time.time() - baseline_time)

2025-02-08 17:09:17,964 INFO     Computing signals for all models.
Computing softmax: 100%|██████████| 10/10 [00:08<00:00,  1.12it/s]
Computing softmax: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Computing softmax: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Computing softmax: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
2025-02-08 17:09:42,832 INFO     Signals saved to disk.
2025-02-08 17:09:57,643 INFO     Computing signals for all models.
Computing softmax: 100%|██████████| 2/2 [00:01<00:00,  1.89it/s]
Computing softmax: 100%|██████████| 2/2 [00:01<00:00,  1.89it/s]
Computing softmax: 100%|██████████| 2/2 [00:01<00:00,  1.89it/s]
Computing softmax: 100%|██████████| 2/2 [00:01<00:00,  1.89it/s]
2025-02-08 17:10:01,935 INFO     Signals saved to disk.
2025-02-08 17:10:01,939 INFO     Preparing signals took 65.20681 seconds


## Audit

In [9]:
# Perform the privacy audit
baseline_time = time.time()
target_model_indices = list(range(num_experiments))
mia_score_list, membership_list = audit_models(
        f"{directories['report_dir']}/exp",
        target_model_indices,
        signals,
        population_signals,
        auditing_membership,
        num_reference_models,
        logger,
        configs,
    )

if len(target_model_indices) > 1:
    logger.info(
        "Auditing privacy risk took %0.1f seconds", time.time() - baseline_time
    )

# Get average audit results across all experiments
if len(target_model_indices) > 1:
    get_average_audit_results(
        directories["report_dir"], mia_score_list, membership_list, logger
    )

logger.info("Total runtime: %0.5f seconds", time.time() - start_time)

2025-02-08 17:10:01,945 INFO     Fine-tuning offline_a using paired model 1
2025-02-08 17:10:02,986 INFO     offline_a=0.00: AUC 0.7089
2025-02-08 17:10:04,023 INFO     offline_a=0.10: AUC 0.7075
2025-02-08 17:10:05,051 INFO     offline_a=0.20: AUC 0.7051
2025-02-08 17:10:06,078 INFO     offline_a=0.30: AUC 0.7040
2025-02-08 17:10:07,108 INFO     offline_a=0.40: AUC 0.7023
2025-02-08 17:10:08,137 INFO     offline_a=0.50: AUC 0.7007
2025-02-08 17:10:09,165 INFO     offline_a=0.60: AUC 0.6990
2025-02-08 17:10:10,194 INFO     offline_a=0.70: AUC 0.6973
2025-02-08 17:10:11,221 INFO     offline_a=0.80: AUC 0.6947
2025-02-08 17:10:12,250 INFO     offline_a=0.90: AUC 0.6904
  ratios = prob_ratio_x[:, np.newaxis] / prob_ratio_z
2025-02-08 17:10:13,289 INFO     offline_a=1.00: AUC 0.6604
2025-02-08 17:10:13,290 INFO     The best offline_a is 0.0
2025-02-08 17:10:14,319 INFO     Target Model 0: AUC 0.6954, TPR@0.1%FPR of 0.0000, TPR@0.0%FPR of 0.0000
2025-02-08 17:10:18,576 INFO     Auditing the

<Figure size 640x480 with 0 Axes>