In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import math

n_teachers = 3
n_instances = 5000

# load data and transform it
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()


x_train = x_train.astype( float ) / 255.
x_test = x_test.astype( float ) / 255.

y_train = keras.utils.to_categorical( y_train )
y_test = keras.utils.to_categorical( y_test )

# shuffle data
idx = np.arange( len( x_train ) )
np.random.shuffle( idx )
x_train = x_train[ idx ]
y_train = y_train[ idx ]

# gather the teacher data
teacher_data_x = [ x_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]
teacher_data_y = [ y_train[ i * n_instances : ( i + 1 ) * n_instances ] for i in range( n_teachers ) ]

# gather the student data
student_data_x = x_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  
student_data_y = y_train[ n_teachers * n_instances : ( n_teachers + 1 ) * n_instances ]  

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


In [2]:
# train the teacher models
def get_model():
  model = keras.models.Sequential()
  model.add( keras.layers.Conv2D( 32, 3, 2, activation='relu', input_shape=x_train.shape[ 1: ] ) )
  model.add( keras.layers.MaxPooling2D( ) )
  model.add( keras.layers.Conv2D( 16, 3, 2, activation='relu' ) )
  model.add( keras.layers.Flatten() )
  model.add( keras.layers.Dense(32, activation='relu') )
  model.add( keras.layers.Dense(100, activation='softmax') )

  model.compile( optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'] )

  return model

# list of teacher models
teacher_models = [ get_model() for _ in range( n_teachers ) ]

# train teacher models
for i, (model, x, y) in enumerate( zip( teacher_models, teacher_data_x, teacher_data_y ) ):
  print( 'teacher', i )
  model.fit( x, y, epochs=25, verbose=0 )
  print( 'test accuracy:', model.evaluate( x_test, y_test, verbose=0 )[ 1 ] )

teacher 0
test accuracy: 0.15080000460147858
teacher 1
test accuracy: 0.1526000052690506
teacher 2
test accuracy: 0.15719999372959137


In [3]:
# label the data
labels = [ teacher.predict( student_data_x ) for teacher in teacher_models ]

# preform the voting
votes = np.zeros( ( student_data_x.shape[ 0 ], 100 ), dtype=np.float )
for i in range( len( student_data_x ) ):
  for j in range( n_teachers ):
    label = np.argmax( labels[ j ][ i ] )
    votes[ i, label ] += 1
  # add the noise per class
  for j in range( 100 ):
    votes[ i, j ] += np.random.laplace(loc=0.0, scale=5 )

student_data_y = keras.utils.to_categorical( np.argmax( votes, axis=1 ) )

In [4]:
# train model
student_model = get_model()
print( 'training student model' )
student_model.fit( student_data_x, student_data_y, epochs=25, verbose=0 )
print( 'test accuracy:', student_model.evaluate( student_data_x, student_data_y, verbose=0 )[ 1 ] )

training student model
test accuracy: 0.09179999679327011


In [5]:
!pip install syft==0.2.9

Collecting syft==0.2.9
  Downloading syft-0.2.9-py3-none-any.whl (433 kB)
[K     |████████████████████████████████| 433 kB 12.5 MB/s 
[?25hCollecting requests~=2.22.0
  Downloading requests-2.22.0-py2.py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 5.7 MB/s 
[?25hCollecting websocket-client~=0.57.0
  Downloading websocket_client-0.57.0-py2.py3-none-any.whl (200 kB)
[K     |████████████████████████████████| 200 kB 48.0 MB/s 
[?25hCollecting numpy~=1.18.1
  Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)
[K     |████████████████████████████████| 20.1 MB 1.4 MB/s 
[?25hCollecting notebook==5.7.8
  Downloading notebook-5.7.8-py2.py3-none-any.whl (9.0 MB)
[K     |████████████████████████████████| 9.0 MB 26.0 MB/s 
[?25hCollecting websockets~=8.1.0
  Downloading websockets-8.1-cp37-cp37m-manylinux2010_x86_64.whl (79 kB)
[K     |████████████████████████████████| 79 kB 7.1 MB/s 
[?25hCollecting aiortc==0.9.28
  Downloading aiortc-0.9.28-

In [6]:
# privacy analysis 
from syft.frameworks.torch.dp import pate


teacher_preds = np.argmax( np.array( labels ), axis=2 )
print( teacher_preds.shape )

data_dep_eps, data_indep_eps = pate.perform_analysis( teacher_preds=teacher_preds,
                                                      indices=np.argmax( votes, axis=1 ),
                                                      noise_eps=0.01,
                                                      delta=10/1500
                                                     )

print(data_dep_eps, data_indep_eps)

(3, 5000)
5.505317647047939 5.505317647048129
