In [1]:

"""Training a CNN on MNIST with Keras and the DP SGD optimizer.
Slow implementation allowing large batch size: using input-size=num_microbatch outside, B-batch inside.
Usage: input batch size also, which is the real update frequency."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags
from absl import logging

import numpy as np
import tensorflow as tf

from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

tf.config.threading.set_intra_op_parallelism_threads(3)

def random_choice_cond(x, size):
    tensor_size = tf.size(x)
    indices = tf.range(0, tensor_size, dtype=tf.int64)
    if size == 0:
        sample_flatten_index = tf.random.shuffle(indices)[:]
    else:     
        sample_flatten_index = tf.random.shuffle(indices)[:size]
    sample_index = tf.transpose(tf.unravel_index(tf.cast(sample_flatten_index,tf.int32), tf.shape(input=x))) #[Result: [indexes for the first sample], [indexes for the second sample]...]
    cond = tf.scatter_nd(sample_index, tf.ones(tf.shape(input=sample_index)[0],dtype=tf.bool), tf.shape(input=x))
    return cond

    # we need noise on the same fixed number of model parameters for each layer.

from tensorflow_privacy.privacy.dp_query import gaussian_query
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import gen_nn_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops

def make_fixed_keras_optimizer_class(cls):
  """Constructs a DP Keras optimizer class from an existing one."""

  class FixedDPOptimizerClass(cls):
    """Differentially private subclass of given class cls.
    The class tf.keras.optimizers.Optimizer has two methods to compute
    gradients, `_compute_gradients` and `get_gradients`. The first works
    with eager execution, while the second runs in graph mode and is used
    by canned estimators.
    Internally, DPOptimizerClass stores hyperparameters both individually
    and encapsulated in a `GaussianSumQuery` object for these two use cases.
    However, this should be invisible to users of this class.
    
    
    btw. support negative num_parameters, used for all but num_parameters noises.
    """

    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        batch_size,
        var_list,
        microbatch_size= 1,
        num_parameters = 10,
        #noise_layer_type = "linear",
        *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
        **kwargs):
      """Initialize the DPOptimizerClass.
      Args:
        l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients)
        noise_multiplier: Ratio of the standard deviation to the clipping norm
        num_microbatches: The number of microbatches into which each minibatch
          is split.
      """
      super(FixedDPOptimizerClass, self).__init__(*args, **kwargs)
      assert(batch_size % microbatch_size == 0)
      self._l2_norm_clip = l2_norm_clip
      self._noise_multiplier = noise_multiplier
      self._aggregate_gradients = [tf.Variable(tf.zeros_like(grad)) for grad in var_list]
      self._dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)
      self._global_state = None
      self._num_parameters = num_parameters
      self._was_dp_gradients_called = False
      self._batch_idx = tf.Variable(0)
      self._batch_size = tf.Variable(batch_size)
      self._microbatch_size = microbatch_size
      self.samples_cond = {}

    def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
      """DP version of superclass method."""
      print("compute the gradient")
      is_considered = [x.name.startswith("Considered") for x in var_list]
    
      print(is_considered)
      self._was_dp_gradients_called = True
      # Precompute the noise locations
      if len(self.samples_cond) == 0:
        self.samples_cond = [tf.Variable(random_choice_cond(x, self._num_parameters)) for x in var_list]
      # Compute loss.
      if not callable(loss) and tape is None:
        raise ValueError('`tape` is required when a `Tensor` loss is passed.')
      tape = tape if tape is not None else tf.GradientTape()
      self.is_considered = is_considered
      if callable(loss):
        with tape:
          if not callable(var_list):
            tape.watch(var_list)
          if callable(loss):
            loss = loss()
          if callable(var_list):
            var_list = var_list()
      var_list = tf.nest.flatten(var_list)

      # Compute the per-microbatch losses using helpful jacobian method.
      with tf.keras.backend.name_scope(self._name + '/gradients'):
        #tf.print(microbatch_losses.shape)
        jacobian = tape.jacobian(loss, var_list)
        # print(jacobian)
        # Clip gradients to given l2_norm_clip.
        def clip_gradients(g):
          #tf.print("g:", g)
          #tf.print(tf.linalg.global_norm(g))
          considered_g = [g[i] for i in range(len(g)) if self.is_considered[i]]
          div_scale = tf.linalg.global_norm(considered_g)/self._l2_norm_clip
          if div_scale > 1:
            return [grad/div_scale for grad in g]
          else:
            return g

        clipped_gradients = tf.map_fn(clip_gradients, jacobian)
        print("clipped_gradients:", clipped_gradients)
        
        final_gradients = [tf.reduce_sum(clipped_gradients[i], axis=0) for i in range(len(clipped_gradients))]
        print(final_gradients)
        
        #self.clipped_gradients = clipped_gradients
        #self.final_gradients = final_gradients
        _aggregate_gradients = self._aggregate_gradients.copy()
        #tf.print(_aggregate_gradients)
        _batch_idx = self._batch_idx
        _batch_size = self._batch_size
        
        for i in range(len(final_gradients)):
          _aggregate_gradients[i] = tf.cond(_batch_idx == tf.constant(0), lambda :final_gradients[i], lambda :_aggregate_gradients[i] + final_gradients[i])
        
        for i in range(len(_aggregate_gradients)):
          self._aggregate_gradients[i].assign(_aggregate_gradients[i])
        
        _batch_idx = self._microbatch_size + _batch_idx 
        #tf.print(_batch_idx)
        def noise_normalize_batch(self, g, is_considered, layer_index):
          # Sample the indexes
          sampled_cond = self.samples_cond[layer_index]
          # Add noise to summed gradients.
          noise_stddev = self._l2_norm_clip * self._noise_multiplier
          noise = tf.random.normal(
            tf.shape(input=g), stddev=noise_stddev)
          noised_gradient = tf.add(g, noise)
          fixed_gradient = tf.where(tf.math.logical_and(sampled_cond, is_considered), noised_gradient, g)
          # Normalize by number of microbatches and return.
          return tf.truediv(fixed_gradient, tf.cast(_batch_size, dtype=tf.float32))
        
        noise_normalized_gradients = [noise_normalize_batch(self, _aggregate_gradients[i], self.is_considered[i], i) for i in range(len(_aggregate_gradients))]
        for i in range(len(final_gradients)):
          final_gradients[i] = tf.cond(_batch_idx >= _batch_size, lambda :noise_normalized_gradients[i], lambda :tf.zeros_like(final_gradients[i]))
        _batch_idx = tf.cond(_batch_idx >= _batch_size, lambda : tf.constant(0), lambda :_batch_idx + 0)
      #tf.print(final_gradients)
      self._batch_idx.assign(_batch_idx)
      return list(zip(final_gradients, var_list))

    #_aggregate_gradients: aggregated gradients until now.
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
      assert self._was_dp_gradients_called, (
          'Neither _compute_gradients() or get_gradients() on the '
          'differentially private optimizer was called. This means the '
          'training is not differentially private. It may be the case that '
          'you need to upgrade to TF 2.4 or higher to use this particular '
          'optimizer.')
      return super(FixedDPOptimizerClass,
                   self).apply_gradients(grads_and_vars, global_step, name)
  return FixedDPOptimizerClass

FixedDPKerasSGDOptimizer = make_fixed_keras_optimizer_class(tf.keras.optimizers.SGD)

def compute_epsilon(steps, sampling_probability, noise_multiplier):
  """Computes epsilon value for given hyperparameters."""
  if noise_multiplier == 0.0:
    return float('inf')
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=noise_multiplier,
                    steps=steps,
                    orders=orders)
  # Delta is set to 1e-5 because MNIST has 60000 training points.
  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]

class BiasLayer(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        super(BiasLayer, self).__init__(*args, **kwargs)

    def build(self, input_shape):
        self.bias = self.add_weight('bias',
                                    shape=input_shape[1:],
                                    initializer='zeros',
                                    trainable=True)
    def call(self, x):
        return x + self.bias
    
    
class TiedBiasLayer(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        super(TiedBiasLayer, self).__init__(*args, **kwargs)

    def build(self, input_shape):
        self.bias = self.add_weight('bias',
                                    shape=input_shape[-1:],
                                    initializer='zeros',
                                    trainable=True)
    def call(self, x):
        return x + self.bias

In [2]:
# Experiment: 2021/03/14
# Usage: Dp analysis of mnist neural network.
# Detail: Check the notion notes in HE-DP project.

dpsgd = False # add dp noise or not 
learning_rate = 0.1
noise_multiplier = 4
l2_norm_clip = 4
batch_size = 512
epochs = 120
microbatch_size = 64
num_parameters = 0
privacy_budget = []
delta = 1e-5  # it is recommended to use delta~=1/dataset_size
model_dir = None

In [3]:
def load_mnist():
  """Loads MNIST and preprocesses to combine training and validation data."""
  train, test = tf.keras.datasets.mnist.load_data()
  train_data, train_labels = train
  test_data, test_labels = test

  train_data = np.array(train_data, dtype=np.float32) / 255
  test_data = np.array(test_data, dtype=np.float32) / 255

  train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))
  test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))

  train_labels = np.array(train_labels, dtype=np.int32)
  test_labels = np.array(test_labels, dtype=np.int32)

  train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
  test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

  assert train_data.min() == 0.
  assert train_data.max() == 1.
  assert test_data.min() == 0.
  assert test_data.max() == 1.

  return train_data, train_labels, test_data, test_labels

In [4]:
# Perturnbing the input dataset, and collect the accuracy-step
class GaussianNoiseLayer(tf.keras.layers.Layer):
    def __init__(self, l2_norm_clip, noise_multiplier, *args, **kwargs):
        super(GaussianNoiseLayer, self).__init__(*args, **kwargs)
        self._l2_norm_clip = l2_norm_clip
        self._noise_multiplier = noise_multiplier

    def build(self, input_shape):
        pass
    
    def call(self, x):
        # Clip gradients to given l2_norm_clip.
        def clip_features(x):
            return tf.clip_by_global_norm([x], self._l2_norm_clip)[0][0]

        clipped_features = tf.map_fn(clip_features, x)
        
        # Add noise to summed gradients.
        noise_stddev = self._l2_norm_clip * self._noise_multiplier
        noise = tf.random.normal(tf.shape(input=clipped_features), stddev=noise_stddev)
        return clipped_features + noise

In [5]:
# Models 
def build_models(noise_layer_name):
    if noise_layer_name =="mnist+untied_bias+noise_input":
        model = tf.keras.Sequential([
            GaussianNoiseLayer(l2_norm_clip, noise_multiplier, input_shape=(28, 28, 1)),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False),
            BiasLayer()
        ])        
    elif noise_layer_name == "mnist+tied_bias+noise_input":
        model = tf.keras.Sequential([
            GaussianNoiseLayer(l2_norm_clip, noise_multiplier, input_shape=(28, 28, 1)),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None, use_bias=False),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False),
            BiasLayer()
        ])   
    elif noise_layer_name == "mnist+untied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False),
            BiasLayer()
        ])
    elif noise_layer_name == "mnist+tied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False),
            BiasLayer()
        ])
    elif noise_layer_name == "mnist":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False),
            BiasLayer()
        ])  
    elif noise_layer_name == "sphinx+mnist+untied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered1"),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered2"),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False, name="Considered3"),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False, name="Considered4"),
            BiasLayer()
        ])
    elif noise_layer_name == "sphinx+mnist+tied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered1"),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered2"),
            TiedBiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False, name="Considered3"),
            BiasLayer(),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False, name="Considered4"),
            BiasLayer()
        ])
    elif noise_layer_name == "ALLnoise+mnist+untied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered1"),
            BiasLayer(name="Considered2"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered3"),
            BiasLayer(name="Considered4"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False, name="Considered5"),
            BiasLayer(name="Considered6"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False, name="Considered7"),
            BiasLayer(name="Considered8")
        ])
    elif noise_layer_name == "ALLnoise+mnist+tied_bias":
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered1"),
            TiedBiasLayer(name="Considered7"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Conv2D(16, 5,
                                 strides=1,
                                 padding='valid',
                                 activation=None,
                                 input_shape=(28, 28, 1), use_bias=False, name="Considered2"),
            TiedBiasLayer(name="Considered8"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.MaxPool2D((2,2), 2),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation=None, use_bias=False, name="Considered3"),
            BiasLayer(name="Considered4"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(10, activation=None, use_bias=False, name="Considered5"),
            BiasLayer(name="Considered6")
        ])
    elif noise_layer_name == "cifar10":
        pass
    else:
        model = None
    return model

In [6]:
#def main_simple(unused_argv):
logging.set_verbosity(logging.INFO)
if dpsgd and batch_size % microbatch_size != 0:
    raise ValueError('Number of microbatches should divide evenly batch_size')

# Load training and test data.
train_data, train_labels, test_data, test_labels = load_mnist()

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=learning_rate,
    decay_steps=(epochs * train_data.shape[0]) // microbatch_size // 1000,
    decay_rate=0.998)


lr_schedule_no_dp = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=learning_rate,
    decay_steps=(epochs * train_data.shape[0]) // batch_size // 1000,
    decay_rate=0.998)

In [7]:
# check the training accuracy
dpsgd = True
parameter_list = {}
accuracies = {}
#for model_type in ["mnist", "mnist+tied_bias", "mnist+untied_bias"]:
for model_type in ["sphinx+mnist+tied_bias", "ALLnoise+mnist+tied_bias", "mnist+tied_bias+noise_input"]:
    file_name = model_type + "_accuracy"
    if model_type == "mnist+untied_bias+noise_input":
        dpsgd = False
    else:
        dpsgd = True
    model = build_models(model_type)
    if dpsgd:
        optimizer = FixedDPKerasSGDOptimizer(
            batch_size = batch_size,
            num_parameters = num_parameters,
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            var_list = model.trainable_variables,
            #num_microbatches=batch_size//microbatch_size,
            microbatch_size = microbatch_size,
            learning_rate=lr_schedule)
        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True, reduction=tf.losses.Reduction.NONE)
    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule_no_dp)
        loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    # Compile model with Keras
    checkpoint_filepath = "./dp_data/"+file_name
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
      filepath=checkpoint_filepath,
      save_weights_only=True,
      monitor='val_accuracy',
      mode='max',
      save_best_only=True)
    #early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    ##    monitor='val_loss', min_delta=0, patience=5, verbose=0,
    #    mode='auto', baseline=None, restore_best_weights=True
    #)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    # Train model with Keras
    if dpsgd:
        history = model.fit(train_data, train_labels,
                epochs=epochs,
                validation_data=(test_data, test_labels),
                batch_size=microbatch_size, 
                callbacks = [model_checkpoint_callback], workers=1) # , early_stopping_callback
    else:
        history = model.fit(train_data, train_labels,
                epochs=epochs,
                validation_data=(test_data, test_labels),
                batch_size=batch_size, 
                callbacks = [model_checkpoint_callback], workers=1) # , early_stopping_callback
    
    #evaluated_result = model.evaluate(
    #    x=test_data, y=test_labels, batch_size=None, verbose=1, sample_weight=None, steps=None,
    #    callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False,
    #    return_dict=True)
    if model_type not in accuracies.keys():
        accuracies[model_type] = []
    accuracies[model_type].append(history)
    #accuracies[model_type].append(evaluated_result["accuracy"])

# Compute the privacy budget expended.
#if dpsgd:
#    eps = compute_epsilon(epochs * 60000 // batch_size)
#    print('For delta=1e-5, the current epsilon is: %.2f' % eps)
#else:
#    print('Trained with vanilla non-private SGD optimizer')

Epoch 1/120
compute the gradient
[True, False, True, False, True, False, True, False]
clipped_gradients: [<tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack/TensorListStack:0' shape=(None, 5, 5, 1, 16) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_1/TensorListStack:0' shape=(None, 16) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_2/TensorListStack:0' shape=(None, 5, 5, 16, 16) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_3/TensorListStack:0' shape=(None, 16) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_4/TensorListStack:0' shape=(None, 256, 100) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_5/TensorListStack:0' shape=(None, 100) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_6/TensorListStack:0' shape=(None, 100, 10) dtype=float32>, <tf.Tensor 'SGD/gradients/map/TensorArrayV2Stack_7/TensorListStack:0' shape=(None, 10) dtype=float32>]
[<tf.Tensor 'SGD/gradients/Sum:0' shape=(

UnknownError: 2 root error(s) found.
  (0) Unknown:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential/Considered1/Conv2D (defined at <ipython-input-7-75e03114ae70>:49) ]]
	 [[SGD/gradients/GreaterEqual_3/_92]]
  (1) Unknown:  Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node sequential/Considered1/Conv2D (defined at <ipython-input-7-75e03114ae70>:49) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_4070]

Function call stack:
train_function -> train_function


In [None]:
accuracies

In [None]:
# Perturbed label
def clip_noise(x, clip, noise_multiplier):
    def clip_features(x):
        return tf.clip_by_global_norm([x], clip)[0][0]
    clipped_features = tf.map_fn(clip_features, x)
    # Add noise to summed gradients.
    noise_stddev = clip * noise_multiplier
    noise = tf.random.normal(tf.shape(input=clipped_features), stddev=noise_stddev)
    return clipped_features + noise

noisy_train_data = tf.map_fn(lambda x: clip_noise(x, l2_norm_clip, noise_multiplier), train_data)

In [None]:
# check the training accuracy

parameter_list = {}
model_type = "noisy_permanent_input"
dpsgd = False
file_name = model_type + "_accuracy"
model = build_models("mnist+untied_bias")
if dpsgd:
    optimizer = FixedDPKerasSGDOptimizer(
        batch_size = batch_size,
        num_parameters = num_parameters,
        l2_norm_clip=l2_norm_clip,
        noise_multiplier=noise_multiplier,
        var_list = model.trainable_variables,
        #num_microbatches=batch_size//microbatch_size,
        microbatch_size = microbatch_size,
        learning_rate=learning_rate)
    # Compute vector of per-example loss rather than its mean over a minibatch.
    loss = tf.keras.losses.CategoricalCrossentropy(
        from_logits=True, reduction=tf.losses.Reduction.NONE)
else:
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Compile model with Keras
checkpoint_filepath = "./dp_data/"+file_name
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
  filepath=checkpoint_filepath,
  save_weights_only=True,
  monitor='val_accuracy',
  mode='max',
  save_best_only=True)
#early_stopping_callback = tf.keras.callbacks.EarlyStopping(
##    monitor='val_loss', min_delta=0, patience=5, verbose=0,
#    mode='auto', baseline=None, restore_best_weights=True
#)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
# Train model with Keras
if dpsgd:
    history = model.fit(noisy_train_data, train_labels,
            epochs=epochs,
            validation_data=(test_data, test_labels),
            batch_size=microbatch_size, 
            callbacks = [model_checkpoint_callback], workers=1) # , early_stopping_callback
else:
    history = model.fit(noisy_train_data, train_labels,
            epochs=epochs,
            validation_data=(test_data, test_labels),
            batch_size=batch_size, 
            callbacks = [model_checkpoint_callback], workers=1) # , early_stopping_callback

#evaluated_result = model.evaluate(
#    x=test_data, y=test_labels, batch_size=None, verbose=1, sample_weight=None, steps=None,
#    callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False,
#    return_dict=True)
if model_type not in accuracies.keys():
    accuracies[model_type] = []
accuracies[model_type].append(history)
#accuracies[model_type].append(evaluated_result["accuracy"])

In [None]:
accuracies

In [None]:
processed_accuracies = dict()
for key in accuracies.keys():   
    processed_accuracies[key] = accuracies[key][0].history

In [None]:
import matplotlib.pyplot as plt
#plt.plot(processed_accuracies['sphinx+mnist+tied_bias']['accuracy'])
plt.plot(processed_accuracies['sphinx+mnist+tied_bias']['val_accuracy'])
#plt.plot(processed_accuracies['ALLnoise+mnist+tied_bias']['accuracy'])
plt.plot(processed_accuracies['ALLnoise+mnist+tied_bias']['val_accuracy'])
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
import pickle
file = open("dp_data/results/accuracies_%.1f_%.1f_%.1f_%d"%(learning_rate, noise_multiplier, l2_norm_clip, batch_size),"wb")
pickle.dump(processed_accuracies, file)
file.close()