In [1]:
## basic functions has been put in to dpsgd_keras.py, won't touch this notebook, for it already works.

# Copyright 2019, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Training a CNN on MNIST with Keras and the DP SGD optimizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags
from absl import logging

import numpy as np
import tensorflow as tf

from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

tf.config.threading.set_intra_op_parallelism_threads(3)
dpsgd = True
learning_rate = 0.015
noise_multiplier = 5
l2_norm_clip = 1.0
batch_size = 25
epochs = 10
microbatches = 25
num_parameters = 50
model_dir = None

In [2]:
tf.__version__

'2.4.1'

In [3]:
def random_choice_cond(x, size):
    tensor_size = tf.size(x)
    indices = tf.range(0, tensor_size, dtype=tf.int64)
    if size == 0:
        sample_flatten_index = tf.random.shuffle(indices)[:]
    else:     
        sample_flatten_index = tf.random.shuffle(indices)[:size]
    sample_index = tf.transpose(tf.unravel_index(tf.cast(sample_flatten_index,tf.int32), tf.shape(input=x))) #[[all 0-th dimension indexes], [all 1-th dimension indexes]]
    cond = tf.scatter_nd(sample_index, tf.ones(tf.shape(input=sample_index)[0],dtype=tf.bool), tf.shape(input=x))
    return cond

In [4]:
# we need noise on the same fixed number of model parameters for each layer.

from tensorflow_privacy.privacy.dp_query import gaussian_query
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops

def make_fixed_keras_optimizer_class(cls):
  """Constructs a DP Keras optimizer class from an existing one."""

  class FixedDPOptimizerClass(cls):
    """Differentially private subclass of given class cls.
    The class tf.keras.optimizers.Optimizer has two methods to compute
    gradients, `_compute_gradients` and `get_gradients`. The first works
    with eager execution, while the second runs in graph mode and is used
    by canned estimators.
    Internally, DPOptimizerClass stores hyperparameters both individually
    and encapsulated in a `GaussianSumQuery` object for these two use cases.
    However, this should be invisible to users of this class.
    
    
    btw. support negative num_parameters, used for all but num_parameters noises.
    """

    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches=None,
        num_parameters = 10,
        #noise_layer_type = "linear",
        *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
        **kwargs):
      """Initialize the DPOptimizerClass.
      Args:
        l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients)
        noise_multiplier: Ratio of the standard deviation to the clipping norm
        num_microbatches: The number of microbatches into which each minibatch
          is split.
      """
      super(FixedDPOptimizerClass, self).__init__(*args, **kwargs)
      self._l2_norm_clip = l2_norm_clip
      self._noise_multiplier = noise_multiplier
      self._num_microbatches = num_microbatches
      self._dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)
      self._global_state = None
      self._num_parameters = num_parameters
      self._was_dp_gradients_called = False
      self.samples_cond = {}

    def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
      """DP version of superclass method."""
      print("compute the gradient")
      is_considered = [x.name.startswith("Considered") for x in var_list]
    
      print(is_considered)
      self._was_dp_gradients_called = True
      # Precompute the noise locations
      if len(self.samples_cond) == 0:
        self.samples_cond = [tf.Variable(random_choice_cond(x, self._num_parameters)) for x in var_list]
      # Compute loss.
      if not callable(loss) and tape is None:
        raise ValueError('`tape` is required when a `Tensor` loss is passed.')
      tape = tape if tape is not None else tf.GradientTape()
      self.is_considered = is_considered
      if callable(loss):
        with tape:
          if not callable(var_list):
            tape.watch(var_list)

          if callable(loss):
            loss = loss()
            microbatch_losses = tf.reduce_mean(
                tf.reshape(loss, [self._num_microbatches, -1]), axis=1)

          if callable(var_list):
            var_list = var_list()
      else:
        with tape:
          microbatch_losses = tf.reduce_mean(
              tf.reshape(loss, [self._num_microbatches, -1]), axis=1)

      var_list = tf.nest.flatten(var_list)

      # Compute the per-microbatch losses using helpful jacobian method.
      with tf.keras.backend.name_scope(self._name + '/gradients'):
        jacobian = tape.jacobian(microbatch_losses, var_list)

        # Clip gradients to given l2_norm_clip.
        def clip_gradients(g):
          return tf.clip_by_global_norm(g, self._l2_norm_clip)[0]

        clipped_gradients = tf.map_fn(clip_gradients, jacobian)
        
        def reduce_noise_normalize_batch(self, g, is_considered, layer_index):
          # Sum gradients over all microbatches.
          summed_gradient = tf.reduce_sum(g, axis=0)

          # Sample the indexes
          sampled_cond = self.samples_cond[layer_index]
          '''
          is_linear = tf.rank(input=g) > 2
          if self.noise_layer_type == 'linear':
            sampled_cond = tf.math.logical_or(sampled_cond, tf.math.logical_not(is_linear))
            #sampled_cond = tf.math.logical_and(sampled_cond, is_linear)
          elif self.noise_layer_type == 'bias':
            sampled_cond = tf.math.logical_or(sampled_cond, is_linear)
            #sampled_cond = tf.math.logical_and(sampled_cond, tf.math.logical_not(is_linear))
          else:
            assert(False)
          '''
          # Add noise to summed gradients.
          noise_stddev = self._l2_norm_clip * self._noise_multiplier
          noise = tf.random.normal(
              tf.shape(input=summed_gradient), stddev=noise_stddev)
          noised_gradient = tf.add(summed_gradient, noise)
          #tf.print(layer_index)
          #if layer_index == 2:
          #    tf.print(sampled_cond)
          #tf.print("num of noise:", tf.math.reduce_sum(tf.cast(tf.math.logical_or(sampled_cond, tf.math.logical_not(is_considered)), tf.int32)))
          fixed_gradient = tf.where(tf.math.logical_and(sampled_cond, is_considered), noised_gradient, summed_gradient)
          # Normalize by number of microbatches and return.
          return tf.truediv(fixed_gradient, self._num_microbatches)
            

        final_gradients = [reduce_noise_normalize_batch(self, clipped_gradients[i], is_considered[i], i) for i in range(len(clipped_gradients))]
        
        
        self.clipped_gradients = clipped_gradients
        self.final_gradients = final_gradients
      return list(zip(final_gradients, var_list))

    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
      assert self._was_dp_gradients_called, (
          'Neither _compute_gradients() or get_gradients() on the '
          'differentially private optimizer was called. This means the '
          'training is not differentially private. It may be the case that '
          'you need to upgrade to TF 2.4 or higher to use this particular '
          'optimizer.')
      return super(FixedDPOptimizerClass,
                   self).apply_gradients(grads_and_vars, global_step, name)

  return FixedDPOptimizerClass

FixedDPKerasSGDOptimizer = make_fixed_keras_optimizer_class(tf.keras.optimizers.SGD)

In [5]:
def compute_epsilon(steps, sampling_probability):
  """Computes epsilon value for given hyperparameters."""
  if noise_multiplier == 0.0:
    return float('inf')
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multiplier,
                    steps=steps,
                    orders=orders)
  # Delta is set to 1e-5 because MNIST has 60000 training points.
  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]

In [6]:
def load_mnist():
  """Loads MNIST and preprocesses to combine training and validation data."""
  train, test = tf.keras.datasets.mnist.load_data()
  train_data, train_labels = train
  test_data, test_labels = test

  train_data = np.array(train_data, dtype=np.float32) / 255
  test_data = np.array(test_data, dtype=np.float32) / 255

  train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))
  test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))

  train_labels = np.array(train_labels, dtype=np.int32)
  test_labels = np.array(test_labels, dtype=np.int32)

  train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
  test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

  assert train_data.min() == 0.
  assert train_data.max() == 1.
  assert test_data.min() == 0.
  assert test_data.max() == 1.

  return train_data, train_labels, test_data, test_labels

In [7]:
class BiasLayer(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        super(BiasLayer, self).__init__(*args, **kwargs)

    def build(self, input_shape):
        self.bias = self.add_weight('bias',
                                    shape=input_shape[1:],
                                    initializer='zeros',
                                    trainable=True)
    def call(self, x):
        return x + self.bias

In [8]:
#def main_simple(unused_argv):
logging.set_verbosity(logging.INFO)
if dpsgd and batch_size % microbatches != 0:
    raise ValueError('Number of microbatches should divide evenly batch_size')

# Load training and test data.
train_data, train_labels, test_data, test_labels = load_mnist()

# Define a sequential Keras model
'''
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, 8,
                         strides=2,
                         padding='same',
                         activation='relu',
                         input_shape=(28, 28, 1),
                          trainable = False),
    tf.keras.layers.MaxPool2D(2, 1),
    tf.keras.layers.Conv2D(32, 4,
                         strides=2,
                         padding='valid',
                         activation='relu'),
    tf.keras.layers.MaxPool2D(2, 1),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10)
])
'''

"\nmodel = tf.keras.Sequential([\n    tf.keras.layers.Conv2D(16, 8,\n                         strides=2,\n                         padding='same',\n                         activation='relu',\n                         input_shape=(28, 28, 1),\n                          trainable = False),\n    tf.keras.layers.MaxPool2D(2, 1),\n    tf.keras.layers.Conv2D(32, 4,\n                         strides=2,\n                         padding='valid',\n                         activation='relu'),\n    tf.keras.layers.MaxPool2D(2, 1),\n    tf.keras.layers.Flatten(),\n    tf.keras.layers.Dense(32, activation='relu'),\n    tf.keras.layers.Dense(10)\n])\n"

In [9]:
# Models 
def build_models(noise_layer_name):
    if noise_layer_name=="conv_linear1":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered"),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False),
          BiasLayer(),
          tf.keras.layers.Dense(10, use_bias=False)
        ])
    elif noise_layer_name=="conv_linear2":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False, name="Considered"),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False),
          BiasLayer(),
          tf.keras.layers.Dense(10, use_bias=False)
        ]) 
    elif noise_layer_name=="conv_bias1":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(name="Considered1"),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False),
          BiasLayer(),
          tf.keras.layers.Dense(10, use_bias=False)
        ])
    elif noise_layer_name=="conv_bias2":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(name="Considered2"),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False),
          BiasLayer(),
          tf.keras.layers.Dense(10, use_bias=False)
        ])
    elif noise_layer_name=="fully_linear":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False, name="Considered"),
          BiasLayer(),
          tf.keras.layers.Dense(10, use_bias=False)
        ])
    elif noise_layer_name=="fully_bias":
        model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(16, 8,
                                 strides=2,
                                 padding='same',
                                 activation='relu',
                                 input_shape=(28, 28, 1), use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          BiasLayer(),
          tf.keras.layers.Conv2D(32, 4,
                                 strides=2,
                                 padding='valid',
                                 activation='relu', use_bias=False),
          tf.keras.layers.MaxPool2D(2, 1),
          tf.keras.layers.Flatten(),
          BiasLayer(),
          tf.keras.layers.Dense(32, activation='relu', use_bias=False),
          BiasLayer(name="Considered"),
          tf.keras.layers.Dense(10, use_bias=False)
        ])
    else:
        assert(False)
    return model

In [10]:
build_models("fully_bias").summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 14, 14, 16)        1024      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 16)        0         
_________________________________________________________________
bias_layer (BiasLayer)       (None, 13, 13, 16)        2704      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 5, 5, 32)          8192      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
bias_layer_1 (BiasLayer)     (None, 512)               5

In [11]:
'''
#For bias first experiment.
model = tf.keras.Sequential([
  BiasLayer(),
  tf.keras.layers.Conv2D(16, 8,
                         strides=2,
                         padding='same',
                         activation='relu',
                         input_shape=(28, 28, 1), use_bias=False),
  tf.keras.layers.MaxPool2D(2, 1),
  BiasLayer(),
  tf.keras.layers.Conv2D(32, 4,
                         strides=2,
                         padding='valid',
                         activation='relu', use_bias=False),
  tf.keras.layers.MaxPool2D(2, 1),
  tf.keras.layers.Flatten(),
  BiasLayer(),
  tf.keras.layers.Dense(32, activation='relu', use_bias=False),
  BiasLayer(),
  tf.keras.layers.Dense(10, use_bias=False)
])
'''

parameter_list = {}
accuracies = {}
for num_parameters in [1,500,1000]:
    for noise_layer_type in ["conv_linear1","conv_linear2", "conv_bias1","conv_bias2", "fully_linear", "fully_bias"]:
        file_name = str(num_parameters) + "_" + noise_layer_type
        model = build_models(noise_layer_type)
        '''
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(),
            BiasLayer(name='bias1'),
            tf.keras.layers.Dense(32, use_bias=False, activation=None, name='linear1'),
            tf.keras.layers.Activation('relu'),
            BiasLayer(name='Consideredbias2'),
            tf.keras.layers.Dense(10, use_bias=False, name='Consideredlinear2'),
        ])'''
        if dpsgd:
            optimizer = FixedDPKerasSGDOptimizer(
                num_parameters = num_parameters,
                l2_norm_clip=l2_norm_clip,
                noise_multiplier=noise_multiplier,
                num_microbatches=microbatches,
                learning_rate=learning_rate)
            # Compute vector of per-example loss rather than its mean over a minibatch.
            loss = tf.keras.losses.CategoricalCrossentropy(
                from_logits=True, reduction=tf.losses.Reduction.NONE)
        else:
            optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
            loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

        # Compile model with Keras
        checkpoint_filepath = "./data/"+file_name
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
          filepath=checkpoint_filepath,
          save_weights_only=True,
          monitor='val_accuracy',
          mode='max',
          save_best_only=True)
        early_stopping_callback = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss', min_delta=0, patience=1, verbose=0,
            mode='auto', baseline=None, restore_best_weights=True
        )
        model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
        # Train model with Keras
        model.fit(train_data, train_labels,
                    epochs=epochs,
                    validation_data=(test_data, test_labels),
                    batch_size=batch_size, 
                    callbacks = [model_checkpoint_callback, early_stopping_callback], workers=1)
        evaluated_result = model.evaluate(
            x=test_data, y=test_labels, batch_size=None, verbose=1, sample_weight=None, steps=None,
            callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False,
            return_dict=True)
        if noise_layer_type not in accuracies.keys():
            accuracies[noise_layer_type] = []
        if noise_layer_type not in parameter_list.keys():
            parameter_list[noise_layer_type] = []
        accuracies[noise_layer_type].append(evaluated_result["accuracy"])
        parameter_list[noise_layer_type].append(num_parameters)

# Compute the privacy budget expended.
#if dpsgd:
#    eps = compute_epsilon(epochs * len(train_data) // batch_size, len(train_data)
#    print('For delta=1e-5, the current epsilon is: %.2f' % eps)
#else:
#    print('Trained with vanilla non-private SGD optimizer')

Epoch 1/10
compute the gradient
[True, False, False, False, False, False, False]
compute the gradient
[True, False, False, False, False, False, False]


UnknownError:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential_1/Considered/Conv2D (defined at <ipython-input-11-26faeb4ec1f5>:72) ]] [Op:__inference_train_function_3163]

Function call stack:
train_function


In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()

# Using set_dashes() to modify dashing of an existing line
for key in parameter_list.keys():
    line1, = ax.plot(parameter_list[key],accuracies[key],  label=key)
    line1.set_dashes([2, 2, 10, 2])  # 2pt line, 2pt break, 10pt line, 2pt break

ax.legend()
plt.show()

In [None]:
ee
import pickle
result = open("result","wb")
pickle.dump(accuracies, result)
pickle.dump(parameter_list, result)
result.close()


In [None]:
import pickle dd

In [None]:
result = open("result", "rb")
accuracies = pickle.load(result)
parameter_list = pickle.load(result)

In [None]:
import matplotlib.pyplot as plt

In [None]:
len(num)

In [None]:
parameter_list

In [None]:

fig, ax = plt.subplots()

# Using set_dashes() to modify dashing of an existing line
line1, = ax.plot(parameter_list["linear"],accuracies["linear"],  label='Linear')
line1.set_dashes([2, 2, 10, 2])  # 2pt line, 2pt break, 10pt line, 2pt break

line2, = ax.plot(parameter_list["bias"],  accuracies["bias"], label='Bias')
line2.set_dashes([2, 2, 10, 2])  # 2pt line, 2pt break, 10pt line, 2pt break

ax.legend()
plt.show()