In [1]:
import numpy as np

# Load public and private datasets

In [2]:
import keras
from keras.utils import to_categorical

Using TensorFlow backend.


In [3]:
def preprocess_data(dataset):
    
    (x_train, y_train), (x_test, y_test) = dataset
    
    # NOTE: this is the shape used by Tensorflow; other backends may differ
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test  = x_test.reshape(x_test.shape[0], 28, 28, 1)
    
    x_train  = x_train.astype('float32')
    x_test   = x_test.astype('float32')
    x_train /= 255
    x_test  /= 255

    y_train = to_categorical(y_train, 5)
    y_test  = to_categorical(y_test, 5)
    
    return (x_train, y_train), (x_test, y_test)

def load_data():
    
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    x_train_public = x_train[y_train < 5]
    y_train_public = y_train[y_train < 5]
    x_test_public  = x_test[y_test < 5]
    y_test_public  = y_test[y_test < 5]
    public_dataset = (x_train_public, y_train_public), (x_test_public, y_test_public)

    x_train_private = x_train[y_train >= 5]
    y_train_private = y_train[y_train >= 5] - 5
    x_test_private  = x_test[y_test >= 5]
    y_test_private  = y_test[y_test >= 5] - 5
    private_dataset = (x_train_private, y_train_private), (x_test_private, y_test_private)
    
    return preprocess_data(public_dataset), preprocess_data(private_dataset)

In [4]:
public_dataset, private_dataset = load_data()

# Pre-train on public dataset

In [5]:
feature_layers = [
    keras.layers.Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    keras.layers.Activation('sigmoid'),
    keras.layers.Conv2D(32, (3, 3), padding='same'),
    keras.layers.Activation('sigmoid'),
    keras.layers.AveragePooling2D(pool_size=(2,2)),
    keras.layers.Dropout(.25),
    keras.layers.Flatten()
]

classification_layers = [
    keras.layers.Dense(128),
    keras.layers.Activation('sigmoid'),
    keras.layers.Dropout(.50),
    keras.layers.Dense(5),
    keras.layers.Activation('softmax')
]

model = keras.models.Sequential(feature_layers + classification_layers)

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

(x_train, y_train), (x_test, y_test) = public_dataset

model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 30596 samples, validate on 5139 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4963edf4e0>

# Extract features from private data (unencrypted for now)

In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
average_pooling2d_1 (Average (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 6272)              0         
__________

In [7]:
flatten_layer = model.get_layer(index=7)
assert flatten_layer.name.startswith('flatten_')

extractor = keras.models.Model(
    inputs=model.input, 
    outputs=flatten_layer.output
)

In [8]:
(x_train_images, y_train), (x_test_images, y_test) = private_dataset

x_train_features = extractor.predict(x_train_images)
x_test_features  = extractor.predict(x_test_images)

# Save extracted features for use in fine-tuning

In [9]:
np.save('x_train_features.npy', x_train_features)
np.save('y_train.npy', y_train)

np.save('x_test_features.npy', x_test_features)
np.save('y_test.npy', y_test)