# Deep Neural Networks

## Theano

  * Python library that provides efficient (low-level) tools for working with Neural Networks
  * In particular:
      * Automatic Differentiation (AD)
      * Compiled computation graphs
      * GPU accelerated computation

## Keras

   * High level library for specifying and training neural networks
   * Can use `Theano` or `TensorFlow` as backend

## The MNIST Dataset

  * 60,000 handwritten digits
  * As 28x28 pixel images

# TODO

  * Replace magic constants with variables
  * Differentiate between images_train and X_train
  * make random examples interactive widgets

In [None]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility
nb_classes = 10
nb_epoch = 1
nb_params = 512   # rename to nb_hidden
batch_size = 128

from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape, y_train.shape)

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [None]:
def plot_10_by_10_images(images, figsize=None):
    """ Plot 100 MNIST images in a 10 by 10 table. Note that we crop
    the images so that they appear reasonably close together.  The
    image is post-processed to give the appearance of being continued."""
    fig = plt.figure(figsize=figsize)
    #images = [image[3:25, 3:25] for image in images]
    #image = np.concatenate(images, axis=1)
    for x in range(10):
        for y in range(10):
            ax = fig.add_subplot(10, 10, 10*y+x+1)
            ax.matshow(images[10*y+x], cmap = matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))
    plt.show()

In [None]:
def plot_mnist_digit(image, figsize=None):
    """ Plot a single MNIST image."""
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    if figsize:
        ax.set_figsize(*figsize)
    ax.matshow(image, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.show()

In [None]:
def plot_1_by_2_images(image, reconstruction, figsize=None):
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(1, 2, 1)
    ax.matshow(image, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    ax = fig.add_subplot(1, 2, 2)
    ax.matshow(reconstruction, cmap = matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.show()

In [None]:
i = np.random.randint(len(X_train))
plot_mnist_digit(X_train[i])
print(i, ':', y_train[i])

In [None]:
def to_features(X):
    return X.reshape(-1, 784).astype("float32") / 255.0

def to_images(X):
    return (X*255.0).astype('uint8').reshape(-1, 28, 28)

print((X_train[0]-(to_images(to_features(X_train[0])))).max())

In [None]:
# the data, shuffled and split between train and test sets
X_train = to_features(X_train)
X_test = to_features(X_test)
print(X_train.shape, 'train samples')
print(X_test.shape, 'test samples')

In [None]:
# The labels need to be transformed into class indicators
from keras.utils import np_utils
y_train_cat = np_utils.to_categorical(y_train, nb_classes=nb_classes)
y_test_cat = np_utils.to_categorical(y_test, nb_classes=nb_classes)
print(y_train_cat.shape, 'train labels')
print(y_test_cat.shape, 'test labels')

## Simple Multi-Layer Perceptron

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
mlp = Sequential()
mlp.add(Dense(output_dim=nb_params, input_dim=784, init='uniform'))
mlp.add(Activation('sigmoid'))
mlp.add(Dense(output_dim=nb_classes, input_dim=nb_params, init='uniform'))
mlp.add(Activation('softmax'))

In [None]:
mlp.compile(loss='categorical_crossentropy', optimizer='SGD')

In [None]:
mlp.fit(X_train, y_train_cat, batch_size=batch_size, nb_epoch=nb_epoch,
        verbose=1, show_accuracy=True)

In [None]:
mlp.evaluate(X_test, y_test_cat, show_accuracy=True)

In [None]:
j = np.random.randint(len(X_test))
plot_mnist_digit(to_images(X_test)[j])
prediction = mlp.predict_classes(X_test[j:j+1], verbose=False)[0]
print(j, ':', '\tpredict:', prediction, '\tactual:', y_test[j])

## A Deeper MLP

In [None]:
from keras.models import Sequential
mlp2 = Sequential()
mlp2.add(Dense(output_dim=nb_params/2, input_dim=784, init='uniform'))
mlp2.add(Activation('sigmoid'))
mlp2.add(Dense(output_dim=nb_params/2, input_dim=nb_params/2, init='uniform'))
mlp2.add(Activation('sigmoid'))
mlp2.add(Dense(output_dim=nb_classes, input_dim=nb_params/2, init='uniform'))
mlp2.add(Activation('softmax'))

In [None]:
mlp2.compile(loss='categorical_crossentropy', optimizer='SGD')

In [None]:
mlp2.fit(X_train, y_train_cat, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1)

In [None]:
mlp2.evaluate(X_test, y_test_cat, show_accuracy=True)

## Manual Autoencoder

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
mae = Sequential()
nb_layers = 1
nb_params = 1000
encoder = []
decoder = []
for i in range(nb_layers):
    if i>0:
        encoder.append(Dropout(0.4))
    encoder.append(Dense(output_dim=nb_params/nb_layers,
                         input_dim=784 if i==0 else nb_params/nb_layers,
                         init='glorot_uniform'))
    encoder.append(Activation('sigmoid'))
    
    # Note that these are in reverse order
    decoder.append(Activation('sigmoid'))
    decoder.append(Dense(output_dim=784 if i==0 else nb_params/nb_layers,
                         input_dim=nb_params/nb_layers,
                         init='glorot_uniform'))
    #decoder.append(Dropout(0.2))

for layer in encoder:
    mae.add(layer)
for layer in reversed(decoder):
    mae.add(layer)

In [None]:
from keras.optimizers import SGD
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
#sgd = SGD(lr=0.1)

In [None]:
mae.compile(loss='mse', optimizer=sgd)

In [None]:
mae.fit(X_train, X_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1)

In [None]:
j = np.random.randint(len(X_train))
X_plot = X_train[j:j+1]
prediction = mae.predict(X_plot, verbose=False)
plot_1_by_2_images(to_images(X_plot)[0], to_images(prediction)[0])

In [None]:
wgts = mae.get_weights()
print(len(wgts))
for i, w in enumerate(wgts):
    print(i, w.shape)
w = wgts[0]

In [None]:
j = np.random.randint(w.shape[1])
X_plot = w[:,j:j+1]
plot_mnist_digit(to_images(X_plot.T)[0])

## Stacked Autoencoder

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout

class StackedAutoencoder(object):
    
    def __init__(self, layers,
                 activation='sigmoid', init='uniform',
                 dropout=0.2, optimizer='SGD'):
        self.layers = layers
        self.activation = activation
        self.init = init
        self.dropout = dropout
        self.optimizer = optimizer
        
        self.build()
        self.compile()
        
    def build(self):
        autoencoder = Sequential()
        encoder = []
        decoder = []
        for i in range(len(self.layers)-1):
            if i>0:
                encoder.append(Dropout(self.dropout))
            encoder.append(Dense(output_dim=self.layers[i+1],
                                 input_dim=self.layers[i],
                                 init=self.init))
            encoder.append(Activation(self.activation))

            # Note that the decoder layers are in reverse order
            decoder.append(Activation(self.activation))
            decoder.append(Dense(output_dim=self.layers[i],
                                 input_dim=self.layers[i+1], 
                                 init=self.init))
        for layer in encoder:
            autoencoder.add(layer)
        for layer in reversed(decoder):
            autoencoder.add(layer)
            
        self.encoder = encoder
        self.decoder = decoder
        self.autoencoder = autoencoder

    def compile(self):
        return self.autoencoder.compile(loss='mse', optimizer=self.optimizer)
    
    def fit(self, X_train, Y_train, batch_size, nb_epoch, verbose=1):
        return self.autoencoder.fit(X_train, Y_train,
                                    batch_size=batch_size, nb_epoch=nb_epoch,
                                    verbose=verbose)
    
    def evaluate(self, X_test, Y_test, show_accuracy=False):
        return self.autoencoder.evaluate(X_test, Y_test, show_accuracy=show_accuracy)
    
    def predict(self, X, verbose=False):
        return self.autoencoder.predict(X, verbose=verbose)

    def _get_paths(self, name):
        model_path = "models/{}_model.yaml".format(name)
        weights_path = "models/{}_weights.hdf5".format(name)
        return model_path, weights_path

    def save(self, name='autoencoder'):
        model_path, weights_path = self._get_paths(name)
        open(model_path, 'w').write(self.autoencoder.to_yaml())
        self.autoencoder.save_weights(weights_path, overwrite=True)
    
    def load(self, name='autoencoder'):
        model_path, weights_path = self._get_paths(name)
        self.autoencoder = keras.models.model_from_yaml(open(model_path))
        self.autoencoder.load_weights(weights_path)

In [None]:
from keras.optimizers import SGD
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

sae = StackedAutoencoder(layers=[784, nb_params],
                         activation='sigmoid', init='uniform',
                         dropout=0.2, optimizer='SGD')

In [None]:
sae.fit(X_train, X_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1)

In [None]:
j = np.random.randint(len(X_train))
X_plot = X_train[j:j+1]
prediction = sae.predict(X_plot, verbose=False)
plot_1_by_2_images(to_images(X_plot)[0], to_images(prediction)[0])

In [None]:
sae.evaluate(X_test, X_test, show_accuracy=True)

In [None]:
wgts = sae.autoencoder.get_weights()

In [None]:
for i, w in enumerate(wgts):
    print(i, w.shape)

# Questions that I have

  * How often are the Dropout neurons reset? Once an epoch or more often?
  * Is Dropout removed during the predict() call?
  * Should inputs be centered on zero for Dropout to be appropriate?