#### Import packages

In [1]:
import keras
import pandas as pd
import numpy as np
from keras.datasets import mnist


from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from keras.models import Sequential
from keras.utils import np_utils
from keras.layers import Dense, Dropout, GaussianNoise, Conv1D
from keras.preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt

Using TensorFlow backend.


#### Data import and clean

In [2]:
# read in mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# normalize data
x_train = x_train/255
x_test = x_test/255

# get flat datasets for non-conv tasks
x_train_flat = x_train.reshape(60000, 28*28)
x_test_flat = x_test.reshape(10000, 28*28)

# one hot encode targets
y_train = keras.utils.to_categorical(y_train, num_classes=None, dtype='float32')
y_test = keras.utils.to_categorical(y_test, num_classes=None, dtype='float32')

#### Model stuff

In [3]:
lot_size = 600

In [4]:
## PCA LAYER
n_components = 60

pca = PCA(n_components=n_components)
x_pca_train = pca.fit_transform(x_train_flat)
x_pca_test = pca.transform(x_test_flat)
pca_std = np.std(x_pca_train)

In [5]:
# Neural Net building on PCA
model = Sequential()
layers = 1
units = 1000

model.add(Dense(units, input_dim=n_components, activation='relu'))
model.add(Dense(10, activation = 'softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['categorical_accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1000)              61000     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                10010     
Total params: 71,010
Trainable params: 71,010
Non-trainable params: 0
_________________________________________________________________


In [7]:
model.fit(x_pca_train, y_train, epochs=100, batch_size=lot_size, validation_split=0.2, verbose=2)

Instructions for updating:
Use tf.cast instead.
Train on 48000 samples, validate on 12000 samples
Epoch 1/100
 - 1s - loss: 0.4957 - categorical_accuracy: 0.8911 - val_loss: 0.2079 - val_categorical_accuracy: 0.9419
Epoch 2/100
 - 0s - loss: 0.1725 - categorical_accuracy: 0.9511 - val_loss: 0.1379 - val_categorical_accuracy: 0.9622
Epoch 3/100
 - 1s - loss: 0.1170 - categorical_accuracy: 0.9674 - val_loss: 0.1105 - val_categorical_accuracy: 0.9691
Epoch 4/100
 - 1s - loss: 0.0874 - categorical_accuracy: 0.9766 - val_loss: 0.0922 - val_categorical_accuracy: 0.9744
Epoch 5/100
 - 1s - loss: 0.0685 - categorical_accuracy: 0.9819 - val_loss: 0.0815 - val_categorical_accuracy: 0.9778
Epoch 6/100
 - 0s - loss: 0.0556 - categorical_accuracy: 0.9850 - val_loss: 0.0762 - val_categorical_accuracy: 0.9781
Epoch 7/100
 - 0s - loss: 0.0457 - categorical_accuracy: 0.9883 - val_loss: 0.0707 - val_categorical_accuracy: 0.9804
Epoch 8/100
 - 0s - loss: 0.0381 - categorical_accuracy: 0.9904 - val_loss: 

Epoch 66/100
 - 0s - loss: 6.6062e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1111 - val_categorical_accuracy: 0.9837
Epoch 67/100
 - 0s - loss: 6.1164e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1108 - val_categorical_accuracy: 0.9838
Epoch 68/100
 - 0s - loss: 5.1504e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1119 - val_categorical_accuracy: 0.9831
Epoch 69/100
 - 0s - loss: 4.9787e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1120 - val_categorical_accuracy: 0.9831
Epoch 70/100
 - 0s - loss: 4.5290e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1124 - val_categorical_accuracy: 0.9833
Epoch 71/100
 - 0s - loss: 4.1279e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1122 - val_categorical_accuracy: 0.9833
Epoch 72/100
 - 0s - loss: 3.9634e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1133 - val_categorical_accuracy: 0.9833
Epoch 73/100
 - 0s - loss: 3.7482e-07 - categorical_accuracy: 1.0000 - val_loss: 0.1137 - val_categorical_accuracy: 0.9831
Epoch 74/100
 - 

<keras.callbacks.History at 0x104481f60>

In [8]:
model.evaluate(x_pca_test, y_test)



[0.10449246602895096, 0.985]

#### Import DP optimizers - straight from tutorial

In [7]:
# import os
# os.chdir('../../../privacy')
# os.getcwd()
from privacy import analysis
import sys


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [8]:
import tensorflow as tf
from privacy.analysis import privacy_ledger
from privacy.analysis.rdp_accountant import compute_rdp_from_ledger
from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.optimizers import dp_optimizer

In [9]:
tf.VERSION

'1.13.1'

In [10]:
# Compatibility with tf 1 and 2 APIs
try:
    GradientDescentOptimizer = tf.train.GradientDescentOptimizer
except:  # pylint: disable=bare-except
    GradientDescentOptimizer = tf.optimizers.SGD  # pylint: disable=invalid-name


In [11]:
flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
                        'train with vanilla SGD.')
flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1,
                      'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 256, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 256, 'Number of microbatches '
                        '(must evenly divide batch_size)')
flags.DEFINE_string('model_dir', None, 'Model directory')
flags.DEFINE_string('f', '', 'kernel')


FLAGS = flags.FLAGS

In [14]:
FLAGS.noise_multiplier

1.1

In [15]:
class EpsilonPrintingTrainingHook(tf.train.SessionRunHook):
    
    """Training hook to print current value of epsilon after an epoch."""
    
    def __init__(self, ledger):
        """Initalizes the EpsilonPrintingTrainingHook.
        Args:
          ledger: The privacy ledger.
        """
        self._samples, self._queries = ledger.get_unformatted_ledger()

    def end(self, session):
        orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))
        samples = session.run(self._samples)
        queries = session.run(self._queries)
        formatted_ledger = privacy_ledger.format_ledger(samples, queries)
        rdp = compute_rdp_from_ledger(formatted_ledger, orders)
        eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
        print('For delta=1e-5, the current epsilon is: %.2f' % eps)

In [16]:
def cnn_model_fn(features, labels, mode):
    """Model function for a CNN."""

    # Define CNN architecture using tf.keras.layers.
    input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
    y = tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu').apply(input_layer)
    y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
    y = tf.keras.layers.Conv2D(32, 4,
                             strides=2,
                             padding='valid',
                             activation='relu').apply(y)
    y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
    y = tf.keras.layers.Flatten().apply(y)
    y = tf.keras.layers.Dense(32, activation='relu').apply(y)
    logits = tf.keras.layers.Dense(10).apply(y)

    # Calculate loss as a vector (to support microbatches in DP-SGD).
    vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
    # Define mean of loss across minibatch (for reporting through tf.Estimator).
    scalar_loss = tf.reduce_mean(vector_loss)

    # Configure the training op (for TRAIN mode).
    if mode == tf.estimator.ModeKeys.TRAIN:

        if FLAGS.dpsgd:
            ledger = privacy_ledger.PrivacyLedger(
              population_size=60000,
              selection_probability=(FLAGS.batch_size / 60000),
              max_samples=1e6,
              max_queries=1e6)

      # Use DP version of GradientDescentOptimizer. Other optimizers are
      # available in dp_optimizer. Most optimizers inheriting from
      # tf.train.Optimizer should be wrappable in differentially private
      # counterparts by calling dp_optimizer.optimizer_from_args().
            optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer(
              l2_norm_clip=FLAGS.l2_norm_clip,
              noise_multiplier=FLAGS.noise_multiplier,
              num_microbatches=FLAGS.microbatches,
              ledger=ledger,
              learning_rate=FLAGS.learning_rate)
            training_hooks = [
              EpsilonPrintingTrainingHook(ledger)
            ]
            opt_loss = vector_loss
        else:
            optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
            training_hooks = []
            opt_loss = scalar_loss
            
        global_step = tf.train.get_global_step()
        train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
        # In the following, we pass the mean of the loss (scalar_loss) rather than
        # the vector_loss because tf.estimator requires a scalar loss. This is only
        # used for evaluation and debugging by tf.estimator. The actual loss being
        # minimized is opt_loss defined above and passed to optimizer.minimize().
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=scalar_loss,
                                          train_op=train_op,
                                          training_hooks=training_hooks)

    # Add evaluation metrics (for EVAL mode).
    elif mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {
            'accuracy':
                tf.metrics.accuracy(
                    labels=labels,
                    predictions=tf.argmax(input=logits, axis=1))
        }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=scalar_loss,
                                      eval_metric_ops=eval_metric_ops)

In [17]:
def load_mnist():
    """Loads MNIST and preprocesses to combine training and validation data."""
    train, test = tf.keras.datasets.mnist.load_data()
    train_data, train_labels = train
    test_data, test_labels = test

    train_data = np.array(train_data, dtype=np.float32) / 255
    test_data = np.array(test_data, dtype=np.float32) / 255

    train_labels = np.array(train_labels, dtype=np.int32)
    test_labels = np.array(test_labels, dtype=np.int32)

    assert train_data.min() == 0.
    assert train_data.max() == 1.
    assert test_data.min() == 0.
    assert test_data.max() == 1.
    assert train_labels.ndim == 1
    assert test_labels.ndim == 1

    return train_data, train_labels, test_data, test_labels

In [18]:
print(flags.FLAGS.dpsgd)

True


In [19]:
tf.VERSION

'1.13.1'

In [20]:
# Create tf.Estimator input functions for the training and test data.
train_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={'x': train_data},
  y=train_labels,
  batch_size=FLAGS.batch_size,
  num_epochs=FLAGS.epochs,
  shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={'x': test_data},
  y=test_labels,
  num_epochs=1,
  shuffle=False)

# Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size
for epoch in range(1, FLAGS.epochs + 1):
    # Train the model for one epoch.
    mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)

# Evaluate the model and print results
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
test_accuracy = eval_results['accuracy']
print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/y_/1vlns4rd4cz4jg4szvwt_22h0000gn/T/tmprsxhv3xv', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2a638860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


'_ExecutionSignature' object has no attribute 'name'
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
'_ExecutionSignature' object has no attribute 'name'
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/y_/1vlns4rd4cz4jg4szvwt_22h0000gn/T/tmprsxhv3xv/model.ckpt.
'_ExecutionSignature' object has no attribute 'name'
INFO:tensorflow:loss = 2.3165293, step = 1


KeyboardInterrupt: 