In [103]:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tqdm.notebook import tqdm

%load_ext autoreload
%autoreload 2

assert tf.executing_eagerly()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
epochs = 20
batch_size = 500

DP-SGD has three privacy-specific hyperparameters and one existing hyperamater that you must tune:

1. `l2_norm_clip` (float) - The maximum Euclidean (L2) norm of each gradient that is applied to update model parameters. This hyperparameter is used to bound the optimizer's sensitivity to individual training points. 
2. `noise_multiplier` (float) - The amount of noise sampled and added to gradients during training. Generally, more noise results in better privacy (often, but not necessarily, at the expense of lower utility).
3.   `microbatches` (int) - Each batch of data is split in smaller units called microbatches. By default, each microbatch should contain a single training example. This allows us to clip gradients on a per-example basis rather than after they have been averaged across the minibatch. This in turn decreases the (negative) effect of clipping on signal found in the gradient and typically maximizes utility. However, computational overhead can be reduced by increasing the size of microbatches to include more than one training examples. The average gradient across these multiple training examples is then clipped. The total number of examples consumed in a batch, i.e., one step of gradient descent, remains the same. The number of microbatches should evenly divide the batch size. 

In [3]:
l2_norm_clip = 0.02
noise_multiplier = 1

Percentile at which to clip norm, based on public data gradients

In [4]:
norm_percentile = 0.8

In [5]:
train, test = tf.keras.datasets.cifar10.load_data()
train_data, train_labels = train
test_data, test_labels = test

train_data = np.array(train_data, dtype=np.float32) / 255
test_data = np.array(test_data, dtype=np.float32) / 255

train_labels = np.array(train_labels, dtype=np.int32)
test_labels = np.array(test_labels, dtype=np.int32)

train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

assert train_data.min() == 0.
assert train_data.max() == 1.
assert test_data.min() == 0.
assert test_data.max() == 1.

In [6]:
from sklearn.model_selection import train_test_split

# test_size refers to private data size
public_data, private_data, public_labels, private_labels = \
    train_test_split(train_data, train_labels, test_size=0.98)

train_dataset = tf.data.Dataset.from_tensor_slices((private_data, private_labels))
# Data is already shuffled
train_dataset = train_dataset.batch(batch_size)
num_batches = private_data.shape[0] // batch_size

public_dataset = tf.data.Dataset.from_tensor_slices((public_data, public_labels))

In [72]:
print(public_data.shape)
print(public_labels.shape)
print(private_data.shape)
print(private_labels.shape)
num_public = public_data.shape[0]
num_private = private_data.shape[0]

(1000, 32, 32, 3)
(1000, 10)
(49000, 32, 32, 3)
(49000, 10)


In [8]:
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy

compute_dp_sgd_privacy.compute_dp_sgd_privacy(
    n=private_labels.shape[0], batch_size=batch_size, noise_multiplier=noise_multiplier, epochs=epochs, delta=1e-5)

DP-SGD with sampling rate = 1.02% and noise_multiplier = 1 iterated over 1960 steps satisfies differential privacy with eps = 3.38 and delta = 1e-05.
The optimal RDP order is 7.0.


(3.382117679728355, 7.0)

In [9]:
# CNN model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

def cnn_model():
    model = tf.keras.models.Sequential()
    model.add(Conv2D(32, (3, 3), padding='same',
                     input_shape=train_data.shape[1:]))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(train_labels.shape[1]))
    model.add(Activation('softmax'))
    
    return model

In [10]:
optimizer = tf.keras.optimizers.Adam()

loss_fn = tf.keras.losses.CategoricalCrossentropy(
    from_logits=True, reduction=tf.losses.Reduction.NONE)

In [141]:
def l2_norm(t):
    return tf.math.sqrt(tf.math.reduce_sum(tf.math.square(t)))

@tf.function
def get_grad_percentiles(loss_fn, model, norm_percentile):
    all_norms = [[0 for _ in range(num_public)] for _ in model.trainable_weights]
    for i, (x, y) in enumerate(public_dataset):
        with tf.GradientTape() as tape:
            x = tf.expand_dims(x, 0)
            loss_value = loss_fn(y, model(x))
            grad = tape.gradient(loss_value, model.trainable_weights)
            for j, layer in enumerate(grad):
                norm = l2_norm(layer)
                all_norms[j][i] = norm
    layer_percentiles = [tfp.stats.percentile(tf.convert_to_tensor(layer), norm_percentile) for 
                         layer in all_norms]
    return layer_percentiles

@tf.function
def evaluate_model(model, loss_fn, x, y):
    pred = model(x)
    loss = tf.math.reduce_mean(loss_fn(y, pred))
    acc = tf.math.reduce_mean(tf.keras.metrics.categorical_accuracy(y, pred))
    return (loss, acc)

In [144]:
clipped_model = cnn_model()
clipped_model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [145]:
# Iterate over epochs.
for epoch in tqdm(range(epochs), desc='Epoch'):
    
    # Evaluate
    loss, acc = evaluate_model(clipped_model, loss_fn, test_data, test_labels)
    print("Epoch %d - Loss: %.3f, Acc: %.4f" % (epoch, loss, acc))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(tqdm(train_dataset, total=num_batches, desc='Batch')):

        grad_percentiles_by_layer = get_grad_percentiles(loss_fn, clipped_model, tf.constant(norm_percentile))

        with tf.GradientTape() as tape:
            logits = clipped_model(x_batch_train)  # Logits for this minibatch
            loss = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss, clipped_model.trainable_weights)

        
        optimizer.apply_gradients(zip(grads, clipped_model.trainable_weights))

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=20.0, style=ProgressStyle(description_width='…

Epoch 0 - Loss: 2.302, Acc: 0.0921


HBox(children=(FloatProgress(value=0.0, description='Batch', max=98.0, style=ProgressStyle(description_width='…

TypeError: in converted code:

    <ipython-input-141-60152c5bd0d0>:7 get_grad_percentiles  *
        for i, (x, y) in enumerate(public_dataset):
    /home/qinghao/tf2/lib/python3.5/site-packages/tensorflow_core/python/autograph/operators/control_flow.py:648 reduce_body
        body(iterate)
    /tmp/tmpxx4n0l5g.py:36 loop_body_1
        ag__.for_stmt(ag__.converted_call(enumerate, get_grad_percentiles_scope.callopts, (grad,), None, get_grad_percentiles_scope), None, loop_body, get_state, set_state, (), (), ())
    /home/qinghao/tf2/lib/python3.5/site-packages/tensorflow_core/python/autograph/operators/control_flow.py:339 for_stmt
        return _py_for_stmt(iter_, extra_test, body, get_state, set_state, init_vars)
    /home/qinghao/tf2/lib/python3.5/site-packages/tensorflow_core/python/autograph/operators/control_flow.py:350 _py_for_stmt
        state = body(target, *state)
    /tmp/tmpxx4n0l5g.py:34 loop_body
        layer_grads[i][j] = norm

    TypeError: list indices must be integers or slices, not Tensor


In [None]:
evaluate_model(clipped_model, loss_fn, test_data, test_labels)

In [None]:
dpsgd_model = cnn_model()
dpsgd_optimizer = DPAdamGaussianOptimizer(
                        l2_norm_clip=1e20,
                        noise_multiplier=0,
                        num_microbatches=num_microbatches)
dpsgd_model.compile(optimizer=dpsgd_optimizer, loss=loss_fn, metrics=['accuracy'])

In [None]:
# Iterate over epochs.
for epoch in tqdm(range(epochs), desc='Epoch'):
    
    # Evaluate
    loss, acc = evaluate_model(dpsgd_model, loss_fn, test_data, test_labels)
    print("Epoch %d - Loss: %.3f, Acc: %.4f" % (epoch, loss, acc))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(tqdm(train_dataset, total=num_batches, desc='Batch')):

        public_grads = get_public_grads(public_data, public_labels, loss_fn, dpsgd_model)
        grad_percentiles_by_layer = get_grads_percentile(public_grads, norm_percentile)

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables autodifferentiation.
        with tf.GradientTape(persistent=True) as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = dpsgd_model(x_batch_train)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss = lambda: loss_fn(y_batch_train, logits)

            # Use the gradient tape to automatically retrieve
            # the gradients of the trainable variables with respect to the loss.
            grads = optimizer.compute_gradients(loss, dpsgd_model.trainable_weights, gradient_tape=tape)

        del tape

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(grads)

In [None]:
evaluate_model(dpsgd_model, loss_fn, test_data, test_labels)

In [None]:
baseline_model = cnn_model()
baseline_model.compile(optimizer='adam',
                       loss=loss_fn, metrics=['accuracy'])

In [None]:
baseline_model.fit(private_data, private_labels,
              epochs=epochs,
              batch_size=batch_size)

In [None]:
evaluate_model(baseline_model, loss_fn, test_data, test_labels)