In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import sys
!{sys.executable} -m pip install -e ../.
from privacy_meter.dataset import Dataset
from privacy_meter.model import TensorflowModel
from privacy_meter.information_source import InformationSource
from privacy_meter.audit import Audit, MetricEnum

Obtaining file:///Users/aadyaamaddi/Desktop/ML%20Privacy%20Meter/privacy_meter
Installing collected packages: privacy-meter
  Attempting uninstall: privacy-meter
    Found existing installation: privacy-meter 1.0
    Uninstalling privacy-meter-1.0:
      Successfully uninstalled privacy-meter-1.0
  Running setup.py develop for privacy-meter
Successfully installed privacy-meter-1.0


In [3]:
def preprocess_cifar10_dataset():
    input_shape, num_classes = (32, 32, 3), 10

    # Split the data between train and test sets
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

    # Scale images to the [0, 1] range
    x_train = x_train.astype("float32") / 255
    x_test = x_test.astype("float32") / 255

    # Convert labels into one hot vectors
    y_train = tf.keras.utils.to_categorical(y_train, num_classes)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes)

    return x_train, y_train, x_test, y_test, input_shape, num_classes

# Generate target dataset (right now this also has the population data)
x_train_all, y_train_all, x_test_all, y_test_all, input_shape, num_classes = preprocess_cifar10_dataset()

num_train_points = 5000
num_test_points = 5000
num_population_points = 10000

x_train, y_train = x_train_all[:num_train_points], y_train_all[:num_train_points]
x_test, y_test = x_test_all[:num_test_points], y_test_all[:num_test_points]
x_population = x_train_all[num_train_points:(num_train_points + num_population_points)]
y_population = y_train_all[num_train_points:(num_train_points + num_population_points)]

In [4]:
# Create data dictionaries in a tfds-like style
train_ds = {'x': x_train, 'y': y_train}
test_ds = {'x': x_test, 'y': y_test}
target_dataset = Dataset(
    data_dict={'train': train_ds, 'test': test_ds},
    default_input='x', default_output='y'
)

population_ds = {'x': x_population, 'y': y_population}
reference_dataset = Dataset(
    data_dict={'train': population_ds},  # this is the default mapping
    default_input='x', default_output='y'
)

In [5]:
def get_tensorflow_cnn_classifier(input_shape, num_classes, regularizer):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu',
                                     input_shape=input_shape, kernel_regularizer=regularizer))
    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu',
                                     kernel_regularizer=regularizer))
    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
    return model

In [6]:
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optim_fn = 'adam'
epochs = 5
batch_size = 64
regularizer_penalty = 0.01
regularizer = tf.keras.regularizers.l2(l=regularizer_penalty)

In [7]:
x = target_dataset.get_feature('train', '<default_input>')
y = target_dataset.get_feature('train', '<default_output>')
model = get_tensorflow_cnn_classifier(input_shape, num_classes, regularizer)
model.summary()
model.compile(optimizer=optim_fn, loss=loss_fn, metrics=['accuracy'])
model.fit(x, y, batch_size=batch_size, epochs=epochs, verbose=2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 2304)              0         
_________________________________________________________________
dropout (Dropout)            (None, 2304)              0         
_________________________________________________________________
dense (Dense)                (None, 10)                2

<tensorflow.python.keras.callbacks.History at 0x7f797f75b1d0>

In [8]:
per_sample_loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True,
                                                             reduction=tf.keras.losses.Reduction.NONE)
target_model = TensorflowModel(model_obj=model, loss_fn=loss_fn)

In [9]:
target_info_source = InformationSource(models=[target_model], datasets=[target_dataset])
reference_info_source = InformationSource(models=[target_model], datasets=[reference_dataset])

In [10]:
fpr_tolerance_list = [0.1, 0.3, 0.5]
audit_obj = Audit(
    metric=MetricEnum.POPULATION,
    target_info_source=target_info_source,
    reference_info_source=reference_info_source,
    fpr_tolerance_list=fpr_tolerance_list
)
audit_obj.run()

Accuracy          = 0.5
ROC AUC Score     = 0.5
FPR               = 1.0
TN, FP, FN, TP    = (0, 1, 0, 1)
Accuracy          = 0.5
ROC AUC Score     = 0.5
FPR               = 1.0
TN, FP, FN, TP    = (0, 1, 0, 1)
Accuracy          = 0.5
ROC AUC Score     = 0.5
FPR               = 1.0
TN, FP, FN, TP    = (0, 1, 0, 1)
