# Multilayer perceptron in TensorFlow (plain vanilla)

In this script we build a small multilayer perceptron with two hidden layers having 500 and 50 neurons each for classifying the MNIST database of handwritten digits.

For data-format see the other notebooks

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import numpy as np
import cPickle as pickle
import gzip
import time
import tensorflow as tf
tf.set_random_seed(1)

with gzip.open('../../lasagne/mnist_4000.pkl.gz', 'rb') as f:
    (X,y) = pickle.load(f)
PIXELS = len(X[0,0,0,:])

print(X.shape, y.shape, PIXELS) #As read

# We need to reshape for the MLP
X = X.reshape([4000, 784])
np.shape(X)

((4000, 1, 28, 28), (4000,), 28)


(4000, 784)

In [2]:
# Taken from http://stackoverflow.com/questions/29831489/numpy-1-hot-array
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

## Building the network
Build a network with the following architecture.

### Definition of the network (architecture)

* An Input Layer with the following 2-dimensions: 
    * 0: Batch Size yet unkown hence `None`
    * 1: 784 = 28*28 pixels
* A hidden layer with 500 units
* A second hidden layer with 50 units
* An output layer with 10 units

### Hints
* The weights can be specified and intitialized as
```{python}
    w_1 = tf.Variable(tf.random_normal([784, 500]))
```
* Use ```tf.nn.sigmoid``` activations for the hidden layer

In [3]:
tf.reset_default_graph()
tf.set_random_seed(1)
x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')

# From Input to first hidden layer
w_1 = tf.Variable(tf.random_normal([784, 500], stddev=0.1))
b_1 = tf.Variable(tf.random_normal([500]))
h_1_in = tf.add(tf.matmul(x, w_1), b_1)
h_1_out = tf.nn.relu(h_1_in)

# From first hidden layer to second hidden layer
# <--- Your code here --->
w_2 = tf.Variable(tf.random_normal([500, 50], stddev=0.1))
b_2 = tf.Variable(tf.random_normal([50]))
h_2_in = tf.add(tf.matmul(h_1_out, w_2), b_2)
h_2_out = tf.nn.relu(h_2_in)
# <--- End of your code here --->

# From second hidden layer to output
w_3 = tf.Variable(tf.random_normal([50, 10], stddev=0.1))
b_3 = tf.Variable(tf.random_normal([10]))
h_3_in = tf.add(tf.matmul(h_2_out, w_3), b_3)

# Output is softmax
out = tf.nn.softmax(h_3_in)
init_op = tf.global_variables_initializer() 

### Store the graph and visualize it in tensorflow

In [4]:
tf.summary.FileWriter("/tmp/dumm/mlp_tensorflow_solution/", tf.get_default_graph()).close() #<--- Where to store

### Doing a forward pass of the untrained network
Since we fixed the random seed, you should you should get a result like:

In [5]:
with tf.Session() as sess:
    sess.run(init_op)
    res_val = sess.run(out, feed_dict={x:X[0:2]})
res_val

array([[  2.89549232e-01,   6.02805465e-02,   5.36485866e-04,
          3.27582695e-02,   3.45717650e-04,   2.12644130e-01,
          1.91141307e-01,   1.51388312e-03,   1.65578678e-01,
          4.56517488e-02],
       [  4.15045202e-01,   8.72126669e-02,   2.26489659e-02,
          7.13404343e-02,   6.83235144e-03,   5.74614033e-02,
          7.40540475e-02,   1.08420523e-02,   2.10118622e-01,
          4.44443673e-02]], dtype=float32)

### Train the model

In [6]:
loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(out), reduction_indices=[1]))
#train_op = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
train_op = tf.train.AdagradOptimizer(0.1).minimize(loss)
init_op = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init_op)
    for i in range(400):
        idx = np.random.permutation(2400)[0:128] #Easy minibatch of size 64
        loss_, _ = sess.run((loss, train_op,), feed_dict={x:X[idx], y_true:convertToOneHot(y[idx], 10)})
        if (i % 50 == 0):
            loss_v = sess.run(loss, feed_dict={x:X[2400:3000], y_true:convertToOneHot(y[2400:3000], 10)})
            print("{} loss training {} validation {}".format(i, loss_, loss_v))
    # Get the results for the validation results (from 2400:3000)
    # Your code here
    res_val = sess.run((out), feed_dict={x:X[2400:3000]})
    res_val

0 loss training 3.54543757439 validation 4.35267066956
50 loss training 0.20824238658 validation 0.429773360491
100 loss training 0.0884831100702 validation 0.465079754591
150 loss training 0.0224867779762 validation 0.452041685581
200 loss training 0.0356599390507 validation 0.488810360432
250 loss training 0.0114144384861 validation 0.497321069241
300 loss training 0.00527211278677 validation 0.508672058582
350 loss training 0.00781277753413 validation 0.529542922974


In [7]:
# and estimate the preformance on the validation set
# Your code here
np.sum(np.argmax(res_val, axis = 1) == y[2400:3000]) / 600.0

0.89500000000000002