In [7]:
import numpy as np

np.random.seed(42)

In [8]:
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Activation, Conv2D, MaxPooling2D, AveragePooling2D, Dropout, Flatten, Dense
from keras.optimizers import SGD

# Data loading

In [9]:
NUM_CLASSES = 5

In [10]:
def preprocess_data(dataset):
    
    (x_train, y_train), (x_test, y_test) = dataset
    
    # NOTE: this is the shape used by Tensorflow; other backends may differ
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test  = x_test.reshape(x_test.shape[0], 28, 28, 1)
    
    x_train  = x_train.astype('float32')
    x_test   = x_test.astype('float32')
    x_train /= 255
    x_test  /= 255

    y_train = to_categorical(y_train, NUM_CLASSES)
    y_test  = to_categorical(y_test, NUM_CLASSES)
    
    return (x_train, y_train), (x_test, y_test)

In [11]:
def load_data():
    
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train_public = x_train[y_train < 5]
    y_train_public = y_train[y_train < 5]
    x_test_public  = x_test[y_test < 5]
    y_test_public  = y_test[y_test < 5]
    public_dataset = (x_train_public, y_train_public), (x_test_public, y_test_public)

    x_train_private = x_train[y_train >= 5]
    y_train_private = y_train[y_train >= 5] - 5
    x_test_private  = x_test[y_test >= 5]
    y_test_private  = y_test[y_test >= 5] - 5
    private_dataset = (x_train_private, y_train_private), (x_test_private, y_test_private)
    
    return preprocess_data(public_dataset), preprocess_data(private_dataset)

# Full training

From https://github.com/fchollet/keras/blob/master/examples/mnist_transfer_cnn.py 

but see also https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py

## Original

In [13]:
_, private_dataset = load_data()
(x_train, y_train), (x_test, y_test) = private_dataset

feature_layers = [
    Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    Activation('relu'),
    Conv2D(32, (3, 3), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten()
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(.50),
    Dense(NUM_CLASSES),
    Activation('softmax')
]

model = Sequential(feature_layers + classification_layers)

model.compile(
    loss='categorical_crossentropy', 
    optimizer='adam', 
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=1,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 29404 samples, validate on 4861 samples
Epoch 1/1


<keras.callbacks.History at 0xb1906ab10>

## Simplifying the optimizer: switching to SGD

In [7]:
_, private_dataset = load_data()
(x_train, y_train), (x_test, y_test) = private_dataset

feature_layers = [
    Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    Activation('relu'),
    Conv2D(32, (3, 3), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten()
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(.50),
    Dense(NUM_CLASSES),
    Activation('softmax')
]

model = Sequential(feature_layers + classification_layers)

model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(clipnorm=10000, clipvalue=10000),
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=1,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 29404 samples, validate on 4861 samples
Epoch 1/1


<keras.callbacks.History at 0x11b78bf60>

## Getting rid of comparisons: sigmoid activations and average pooling

In [11]:
_, private_dataset = load_data()
(x_train, y_train), (x_test, y_test) = private_dataset

feature_layers = [
    Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    Activation('sigmoid'),
    Conv2D(32, (3, 3), padding='same'),
    Activation('sigmoid'),
    AveragePooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten()
]

classification_layers = [
    Dense(128),
    Activation('sigmoid'),
    Dropout(.50),
    Dense(NUM_CLASSES),
    Activation('softmax')
]

model = Sequential(feature_layers + classification_layers)

model.compile(
    loss='categorical_crossentropy', 
    optimizer=SGD(clipnorm=10000, clipvalue=10000, lr=0.1, momentum=0.9),
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=25,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 29404 samples, validate on 4861 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x126c1f358>

## Getting rid of logarithms: MSE loss function

In [12]:
_, private_dataset = load_data()
(x_train, y_train), (x_test, y_test) = private_dataset

feature_layers = [
    Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    Activation('sigmoid'),
    Conv2D(32, (3, 3), padding='same'),
    Activation('sigmoid'),
    AveragePooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten()
]

classification_layers = [
    Dense(128),
    Activation('sigmoid'),
    Dropout(.50),
    Dense(NUM_CLASSES),
    Activation('softmax')
]

model = Sequential(feature_layers + classification_layers)

model.compile(
    loss='mean_squared_error', 
    optimizer=SGD(clipnorm=10000, clipvalue=10000, lr=0.1, momentum=0.9), 
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=25,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 29404 samples, validate on 4861 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x126a25a90>

# Transfer learning

In [10]:
public_dataset, private_dataset = load_data()

feature_layers = [
    Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    Activation('sigmoid'),
    Conv2D(32, (3, 3), padding='same'),
    Activation('sigmoid'),
    AveragePooling2D(pool_size=(2,2)),
    Dropout(.25),
    Flatten()
]

classification_layers = [
    Dense(128),
    Activation('sigmoid'),
    Dropout(.50),
    Dense(NUM_CLASSES),
    Activation('softmax')
]

model = Sequential(feature_layers + classification_layers)

# pre-train on public dataset

(x_train, y_train), (x_test, y_test) = public_dataset

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=1,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

# fix lower layers

for layer in feature_layers:
    layer.trainable = False

# train on private dataset

(x_train, y_train), (x_test, y_test) = private_dataset

model.compile(
    loss='categorical_crossentropy',
    optimizer=SGD(clipnorm=10000, clipvalue=10000, lr=0.1, momentum=0.0),
    metrics=['accuracy'])

model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Train on 30596 samples, validate on 5139 samples
Epoch 1/1
Train on 29404 samples, validate on 4861 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11c74a9b0>