In [None]:
'''Train a simple deep NN on the MNIST dataset.
Get to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility
nb_classes = 10
nb_epoch = 2
nb_params = 512   # rename to nb_hidden
batch_size = 128

from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
print(X_train.shape, y_train.shape)

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [None]:
def plot_10_by_10_images(images, figsize=None):
    """ Plot 100 MNIST images in a 10 by 10 table. Note that we crop
    the images so that they appear reasonably close together.  The
    image is post-processed to give the appearance of being continued."""
    fig = plt.figure(figsize=figsize)
    #images = [image[3:25, 3:25] for image in images]
    #image = np.concatenate(images, axis=1)
    for x in range(10):
        for y in range(10):
            ax = fig.add_subplot(10, 10, 10*y+x+1)
            ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))
    plt.show()
    
plot_10_by_10_images(X_train, figsize=(10,10))

In [None]:
def plot_mnist_digit(image, figsize=None):
    """ Plot a single MNIST image."""
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    if figsize:
        ax.set_figsize(*figsize)
    ax.matshow(image, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.show()

In [None]:
def plot_1_by_2_images(image, reconstruction, figsize=None):
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(1, 2, 1)
    ax.matshow(image, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    ax = fig.add_subplot(1, 2, 2)
    ax.matshow(reconstruction, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.show()

In [None]:
i = np.random.randint(len(X_train))
plot_mnist_digit(X_train[i])
print(i, ':', y_train[i])

In [None]:
def to_features(X):
    return X.reshape(-1, 784).astype("float32") / 255.0

def to_images(X):
    return (X*255.0).astype('uint8').reshape(-1, 28, 28)

print((X_train[0]-(to_images(to_features(X_train[0])))).max())

In [None]:
# the data, shuffled and split between train and test sets
X_train = to_features(X_train)
X_test = to_features(X_test)
print(X_train.shape, 'train samples')
print(X_test.shape, 'test samples')

In [None]:
# The labels need to be transformed into class indicators
from keras.utils import np_utils
y_train_cat = np_utils.to_categorical(y_train, nb_classes=nb_classes)
y_test_cat = np_utils.to_categorical(y_test, nb_classes=nb_classes)
print(y_train_cat.shape, 'train labels')
print(y_test_cat.shape, 'test labels')

## Simple Multi-Layer Perceptron

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
mlp = Sequential()
mlp.add(Dense(output_dim=nb_params, input_dim=784, init='uniform'))
mlp.add(Activation('sigmoid'))
mlp.add(Dense(output_dim=nb_classes, input_dim=nb_params, init='uniform'))
mlp.add(Activation('softmax'))

In [None]:
mlp.compile(loss='categorical_crossentropy', optimizer='SGD')

In [None]:
mlp.fit(X_train, y_train_cat, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, show_accuracy=True)

In [None]:
mlp.evaluate(X_test, y_test_cat, show_accuracy=True)

In [None]:
j = np.random.randint(len(X_test))
plot_mnist_digit(to_images(X_test)[j])
prediction = mlp.predict_classes(X_test[j:j+1], verbose=False)[0]
print(j, ':', '\tpredict:', prediction, '\tactual:', y_test[j])

## A Deeper MLP

In [None]:
from keras.models import Sequential
mlp2 = Sequential()
mlp2.add(Dense(output_dim=nb_params/2, input_dim=784, init='uniform'))
mlp2.add(Activation('sigmoid'))
mlp2.add(Dense(output_dim=nb_params/2, input_dim=nb_params/2, init='uniform'))
mlp2.add(Activation('sigmoid'))
mlp2.add(Dense(output_dim=nb_classes, input_dim=nb_params/2, init='uniform'))
mlp2.add(Activation('softmax'))

In [None]:
mlp2.compile(loss='categorical_crossentropy', optimizer='SGD')

In [None]:
mlp2.fit(X_train, y_train_cat, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1)

In [None]:
mlp2.evaluate(X_test, y_test_cat, show_accuracy=True)

## A More Modern MLP

In [None]:
from keras.layers.core import Dropout
mlp3 = Sequential()
nb_layers = 2
for i in range(nb_layers):
    mlp3.add(Dense(output_dim=nb_params/nb_layers, input_dim=nb_params/nb_layers if i>0 else 784, init='uniform'))
    mlp3.add(Activation('relu'))
    mlp3.add(Dropout(0.2))

mlp3.add(Dense(output_dim=nb_classes, input_dim=nb_params/nb_layers, init='uniform'))
mlp3.add(Activation('softmax'))

In [None]:
mlp3.compile(loss='categorical_crossentropy', optimizer='SGD')

In [None]:
mlp3.fit(X_train, y_train_cat, batch_size=128, nb_epoch=nb_epoch, show_accuracy=True, verbose=1)

In [None]:
mlp3.evaluate(X_test, y_test_cat, show_accuracy=True)

In [None]:
#open('mlp2_model.yaml', 'w').write(mlp3.to_yaml())
#mlp3.save_weights('mlp3_weights.hdf5')

## Simple Autoencoder

In [None]:
#creating the autoencoder
from keras.models import Sequential
from keras.layers import containers
from keras.layers.core import Dense, Activation, AutoEncoder
ae = Sequential()
encoder = containers.Sequential()
encoder.add(Dense(input_dim=784, output_dim=512, init='uniform'))
encoder.add(Activation('sigmoid'))
decoder = containers.Sequential()
decoder.add(Dense(512, 784, init='uniform'))
decoder.add(Activation('sigmoid'))
ae.add(AutoEncoder(encoder=encoder, decoder=decoder,
                   output_reconstruction=True))

In [None]:
ae.compile(loss='mean_squared_error', optimizer='SGD')

In [None]:
ae.fit(X_train, X_train, batch_size=128, nb_epoch=2, show_accuracy=True, verbose=1)

In [None]:
X_plot = X_train
j = np.random.randint(len(X_plot))
plot_mnist_digit(to_images(X_plot)[j])
prediction = ae.predict(X_plot[j:j+1], verbose=False)
plot_mnist_digit(to_images(prediction)[0])

## Manual Autoencoder

In [None]:
from keras.layers.core import Dropout
mae = Sequential()
nb_layers = 2
encoder = []
decoder = []
for i in range(nb_layers):
    encoder.append(Dropout(0.2))
    encoder.append(Dense(output_dim=nb_params/nb_layers, input_dim=784 if i==0 else nb_params/nb_layers, init='uniform'))
    encoder.append(Activation('relu'))
    
    decoder.append(Activation('relu'))
    decoder.append(Dense(output_dim=784 if i==0 else nb_params/nb_layers, input_dim=nb_params/nb_layers, init='uniform'))
    decoder.append(Dropout(0.2))

for layer in encoder:
    mae.add(layer)
for layer in reversed(decoder):
    mae.add(layer)

In [None]:
from keras.optimizers import SGD
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
#sgd = SGD(lr=0.1)

In [None]:
mae.compile(loss='mse', optimizer=sgd)

In [None]:
nb_epoch = 2000
mae.fit(X_train, X_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1)

In [None]:
X_plot = X_train
j = np.random.randint(len(X_plot))
prediction = mae.predict(X_plot[j:j+1], verbose=False)
plot_1_by_2_images(to_images(X_plot)[j], to_images(prediction)[0])

In [None]:
# SAVE
open('models/autoencoder_model.yaml', 'w').write(mae.to_yaml())
mae.save_weights('models/autoencoder_weights.hdf5', overwrite=True)

In [None]:
# LOAD
import keras.models
mae = keras.models.model_from_yaml(open('models/autoencoder_model.yaml'))
mae.load_weights('models/autoencoder_weights.hdf5')