# Privacy Meter Demo

## Importing

In [1]:
import argparse
import math
import time

import torch
import yaml

from audit import get_average_audit_results, audit_models, sample_auditing_dataset
from get_signals import get_model_signals
from models.utils import load_models, train_models, split_dataset_for_training
from util import (
    check_configs,
    setup_log,
    initialize_seeds,
    create_directories,
    load_dataset,
)

# Enable benchmark mode in cudnn to improve performance when input sizes are consistent
torch.backends.cudnn.benchmark = True

  from .autonotebook import tqdm as notebook_tqdm


## Load config

In [2]:
configs = "configs/cifar10.yaml"
with open(configs, "rb") as f:
        configs = yaml.load(f, Loader=yaml.Loader)

# Validate configurations
check_configs(configs)

## Setting up

In [3]:
# Validate configurations
check_configs(configs)

# Initialize seeds for reproducibility
initialize_seeds(configs["run"]["random_seed"])

# Create necessary directories
log_dir = configs["run"]["log_dir"]
directories = {
    "log_dir": log_dir,
    "report_dir": f"{log_dir}/report",
    "signal_dir": f"{log_dir}/signals",
    "data_dir": configs["data"]["data_dir"],
}
create_directories(directories)

# Set up logger
logger = setup_log(
    directories["report_dir"], "time_analysis", configs["run"]["time_log"]
)

start_time = time.time()

## Load dataset

In [4]:
baseline_time = time.time()
dataset = load_dataset(configs, directories["data_dir"], logger)
logger.info("Loading dataset took %0.5f seconds", time.time() - baseline_time)

2024-12-02 18:45:14,888 INFO     Load data from data/cifar10.pkl
2024-12-02 18:45:14,890 INFO     The whole dataset size: 60000
2024-12-02 18:45:14,891 INFO     Loading dataset took 0.09349 seconds


## Load or train models

In [5]:
# Define experiment parameters
num_experiments = configs["run"]["num_experiments"]
num_reference_models = configs["audit"]["num_ref_models"]
num_model_pairs = max(math.ceil(num_experiments / 2.0), num_reference_models + 1)

# Load or train models
baseline_time = time.time()
models_list, memberships = load_models(
    log_dir, dataset, num_model_pairs * 2, configs, logger
)
if models_list is None:
    # Split dataset for training two models per pair
    data_splits, memberships = split_dataset_for_training(
        len(dataset), num_model_pairs
    )
    models_list = train_models(
        log_dir, dataset, data_splits, memberships, configs, logger
    )
logger.info(
    "Model loading/training took %0.1f seconds", time.time() - baseline_time
)


2024-12-02 18:45:19,305 INFO     Loading model 0
2024-12-02 18:45:19,347 INFO     Loading model 1
2024-12-02 18:45:19,370 INFO     Loading model 2
2024-12-02 18:45:19,384 INFO     Loading model 3
2024-12-02 18:45:19,397 INFO     Loading model 4
2024-12-02 18:45:19,410 INFO     Loading model 5
2024-12-02 18:45:19,423 INFO     Model loading/training took 0.1 seconds


## Prepare auditing dataset

In [6]:
auditing_dataset, auditing_membership = sample_auditing_dataset(
        configs, dataset, logger, memberships
    )

2024-12-02 18:45:23,052 INFO     Downsampling the dataset for auditing to 10000 samples. The numbers of members and non-members are only guaranteed to be equal for the first target model, if more than one are used.


## Compute signals

In [7]:
baseline_time = time.time()
signals = get_model_signals(models_list, auditing_dataset, configs, logger)
logger.info("Preparing signals took %0.5f seconds", time.time() - baseline_time)

2024-12-02 18:45:26,325 INFO     Signals loaded from disk.
2024-12-02 18:45:26,327 INFO     Preparing signals took 0.00326 seconds


## Audit

In [8]:
# Perform the privacy audit
baseline_time = time.time()
target_model_indices = list(range(num_experiments))
mia_score_list, membership_list = audit_models(
    f"{directories['report_dir']}/exp",
    target_model_indices,
    signals,
    auditing_membership,
    num_reference_models,
    logger,
    configs,
)

if len(target_model_indices) > 1:
    logger.info(
        "Auditing privacy risk took %0.1f seconds", time.time() - baseline_time
    )

# Get average audit results across all experiments
if len(target_model_indices) > 1:
    get_average_audit_results(
        directories["report_dir"], mia_score_list, membership_list, logger
    )

logger.info("Total runtime: %0.5f seconds", time.time() - start_time)

2024-11-17 17:42:18,050 INFO     Fine-tuning offline_a using paired model 1
2024-11-17 17:42:18,073 INFO     offline_a=0.00: AUC 0.5263
2024-11-17 17:42:18,087 INFO     offline_a=0.10: AUC 0.5266
2024-11-17 17:42:18,101 INFO     offline_a=0.20: AUC 0.5268
2024-11-17 17:42:18,116 INFO     offline_a=0.30: AUC 0.5271
2024-11-17 17:42:18,130 INFO     offline_a=0.40: AUC 0.5274
2024-11-17 17:42:18,144 INFO     offline_a=0.50: AUC 0.5277
2024-11-17 17:42:18,158 INFO     offline_a=0.60: AUC 0.5280
2024-11-17 17:42:18,173 INFO     offline_a=0.70: AUC 0.5283
2024-11-17 17:42:18,187 INFO     offline_a=0.80: AUC 0.5285
2024-11-17 17:42:18,201 INFO     offline_a=0.90: AUC 0.5283
2024-11-17 17:42:18,216 INFO     offline_a=1.00: AUC 0.5266
2024-11-17 17:42:18,216 INFO     The best offline_a is 0.8
2024-11-17 17:42:18,231 INFO     Target Model 0: AUC 0.5298, TPR@0.1%FPR of 0.0015, TPR@0.0%FPR of 0.0000
2024-11-17 17:42:21,966 INFO     Auditing the privacy risks of target model 0 costs 3.9 seconds
202

<Figure size 640x480 with 0 Axes>