# Tricks of the trade TF/Keras [small dataset]

In this script we build a small multilayer perceptron with two hidden layers having 500 and 50 neurons each for classifying the MNIST database of handwritten digits using Keras. It uses the full data set better to run on a GPU.

Below are several experiments.

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import numpy as np

import time
import tensorflow as tf
tf.set_random_seed(1)

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
import keras
import sys
keras.__version__, tf.__version__, sys.version_info

In [None]:
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
# Loads (if necessary) and caches the MNIST training data
# Used one_hot encoding, and reshaping --> 784
mnist = read_data_sets("../data/", one_hot=True, reshape=True, validation_size=2000)
X_train = mnist.train.images
X_val = mnist.validation.images

Y_train = mnist.train.labels
Y_val = mnist.validation.labels

X_train.shape, Y_train.shape, Y_val.shape, Y_train.shape

### Suggestions for the experiment

Let the experiments run for 100 epochs. You might need to restart the kernel so that namings of the layers are the same

* with init zero 
* with sigmoid activation 
* with ReLU activation
* with dropout (p=0.3)
* with batch-normalization and dropout

In [None]:
### First model with all zeros
name = 'sigmoid_init0'
model = Sequential()
model.add(Dense(500, batch_input_shape=(None, 784), init='zero'))
model.add(Activation('sigmoid'))

model.add(Dense(50,init='zero'))
model.add(Activation('sigmoid'))

model.add(Dense(10, activation='softmax',init='zero'))
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [None]:
### Model with default initialization 
name = 'sigmoid'
model = Sequential()
model.add(Dense(500, batch_input_shape=(None, 784)))
#model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('sigmoid'))

model.add(Dense(50))
#model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('sigmoid'))

model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [None]:
### Model with default initialization 
name = 'relu'
model = Sequential()
model.add(Dense(500, batch_input_shape=(None, 784)))
#model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(50))
#model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [None]:
### Model with default initialization 
name = 'dropout'
model = Sequential()
model.add(Dense(500, batch_input_shape=(None, 784)))
model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(50))
model.add(Dropout(0.3))
#model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [None]:
### Model with default initialization 
name = 'dropout_batch'
model = Sequential()
model.add(Dense(500, batch_input_shape=(None, 784)))
model.add(Dropout(0.3))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(50))
model.add(Dropout(0.3))
model.add(keras.layers.normalization.BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [None]:
model.summary()

### Untrained model


In [None]:
np.log(0.1)

In [None]:
model.evaluate(X_train[0:2000], Y_train[0:2000])

## Training

In [None]:
log_dir='/notebooks/tensorflow/path_to_fc_nets/tb/' + name

In [None]:
tensorboard = keras.callbacks.TensorBoard(
    log_dir='/notebooks/tensorflow/path_to_fc_nets/tb_full_mnist/' + name + '/', 
    write_graph=True,
    histogram_freq=5
)
history = model.fit(X_train,Y_train,          
          nb_epoch=1000, 
          batch_size=128, 
          callbacks=[tensorboard],
          validation_data=[X_val, Y_val], verbose=2)

# Attention, if you run this in a docker container, setting verbose=1 sometimes kills the whole container 
# ERRO[0695] error getting events from daemon: EOF

# Sometimes also the TensorBoard callback kills the container