Copyright 2020 The TensorFlow Authors.


In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Assess privacy risks with TensorFlow Privacy Membership Inference Attacks

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

##Overview
In this codelab we'll train a simple image classification model on the CIFAR10 dataset, and then use the "membership inference attack" against this model to assess if the attacker is able to "guess" whether a particular sample was present in the training set.

## Setup
First, set this notebook's runtime to use a GPU, under Runtime > Change runtime type > Hardware accelerator. Then, begin importing the necessary libraries.

In [1]:
# @title Import statements.
from typing import Text, Tuple
import numpy as np
from scipy import special
import tensorflow as tf
import tensorflow_datasets as tfds

# Set verbosity.
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning

simplefilter(action="ignore", category=ConvergenceWarning)
simplefilter(action="ignore", category=FutureWarning)

### Install TensorFlow Privacy.

In [None]:
!pip3 install git+https://github.com/tensorflow/privacy

from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia

## Train a model

In [None]:
# @markdown Train a simple model on CIFAR10 with Keras.

dataset = 'cifar10'
num_classes = 10
num_conv = 3
activation = 'relu'
lr = 0.02
momentum = 0.9
batch_size = 250
epochs = 100  # Privacy risks are especially visible with lots of epochs.


def small_cnn(
    input_shape: Tuple[int],
    num_classes: int,
    num_conv: int,
    activation: Text = 'relu',
) -> tf.keras.models.Sequential:
  """Setup a small CNN for image classification.

  Args:
    input_shape: Integer tuple for the shape of the images.
    num_classes: Number of prediction classes.
    num_conv: Number of convolutional layers.
    activation: The activation function to use for conv and dense layers.

  Returns:
    The Keras model.
  """
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Input(shape=input_shape))

  # Conv layers
  for _ in range(num_conv):
    model.add(tf.keras.layers.Conv2D(32, (3, 3), activation=activation))
    model.add(tf.keras.layers.MaxPooling2D())

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(64, activation=activation))
  model.add(tf.keras.layers.Dense(num_classes))
  return model


print('Loading the dataset.')
train_ds = tfds.as_numpy(
    tfds.load(dataset, split=tfds.Split.TRAIN, batch_size=-1)
)
test_ds = tfds.as_numpy(
    tfds.load(dataset, split=tfds.Split.TEST, batch_size=-1)
)
x_train = train_ds['image'].astype('float32') / 255.0
y_train_indices = train_ds['label'][:, np.newaxis]
x_test = test_ds['image'].astype('float32') / 255.0
y_test_indices = test_ds['label'][:, np.newaxis]

# Convert class vectors to binary class matrices.
y_train = tf.keras.utils.to_categorical(y_train_indices, num_classes)
y_test = tf.keras.utils.to_categorical(y_test_indices, num_classes)

input_shape = x_train.shape[1:]

model = small_cnn(
    input_shape, num_classes, num_conv=num_conv, activation=activation
)

print('learning rate %f', lr)

optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=momentum)

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
model.summary()
model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_test, y_test),
    shuffle=True,
)
print('Finished training.')

## Calculate logits, probabilities and loss values for training and test sets.

We will use these values later in the membership inference attack to separate training and test samples.

In [None]:
print('Predict on train...')
logits_train = model.predict(x_train, batch_size=batch_size)
print('Predict on test...')
logits_test = model.predict(x_test, batch_size=batch_size)

print('Apply softmax to get probabilities from logits...')
prob_train = special.softmax(logits_train, axis=1)
prob_test = special.softmax(logits_test, axis=1)

print('Compute losses...')
cce = tf.keras.backend.categorical_crossentropy
constant = tf.keras.backend.constant

loss_train = cce(
    constant(y_train), constant(prob_train), from_logits=False
).numpy()
loss_test = cce(
    constant(y_test), constant(prob_test), from_logits=False
).numpy()

## Run membership inference attacks.

We will now execute a membership inference attack against the previously trained CIFAR10 model. This will generate a number of scores, most notably, attacker advantage and AUC for the membership inference classifier.

An AUC of close to 0.5 means that the attack wasn't able to identify training samples, which means that the model doesn't have privacy issues according to this test. Higher values, on the contrary, indicate potential privacy issues.

In [None]:
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec
import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting

labels_train = np.argmax(y_train, axis=1)
labels_test = np.argmax(y_test, axis=1)

attack_input = AttackInputData(
    logits_train=logits_train,
    logits_test=logits_test,
    loss_train=loss_train,
    loss_test=loss_test,
    labels_train=labels_train,
    labels_test=labels_test,
)

# Run several attacks for different data slices
attacks_result = mia.run_attacks(
    attack_input=attack_input,
    slicing_spec=SlicingSpec(
        entire_dataset=True, by_class=True, by_classification_correctness=True
    ),
    attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION],
)

# Plot the ROC curve of the best classifier
fig = plotting.plot_roc_curve(
    attacks_result.get_result_with_max_auc().roc_curve
)

# Print a user-friendly summary of the attacks
print(attacks_result.summary(by_slices=True))

This is the end of the codelab!
Feel free to change the parameters to see how the privacy risks change.

You can try playing with:
* the number of training epochs
* different attack_types