# Multilayer perceptron in TensorFlow (plain vanilla)

In this script we build a small multilayer perceptron with two hidden layers having 500 and 50 neurons each for classifying the MNIST database of handwritten digits.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import numpy as np
import time
import tensorflow as tf
import pandas as pd
tf.set_random_seed(1)
np.random.seed(1)
import sys
tf.__version__, sys.version_info

In [None]:
# To be compatible with python3 and python2
try:
    import cPickle as pickle
except ImportError:
    import pickle
import gzip

with gzip.open('../data/mnist_4000.pkl.gz', 'rb') as f:
    if sys.version_info.major > 2:
        (X,y) = pickle.load(f, encoding='latin1')
    else:
        (X,y) = pickle.load(f)
PIXELS = len(X[0,0,0,:])

print(X.shape, y.shape, PIXELS) #As read
# We need to reshape for the MLP
X = X.reshape([4000, 784])
np.shape(X)

In [None]:
# Taken from http://stackoverflow.com/questions/29831489/numpy-1-hot-array
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='int32')
    result[np.arange(len(vector)), vector] = 1
    return result

## Building the network
Build a network with the following architecture.

### Definition of the network (architecture)

* An Input Layer with the following 2-dimensions: 
    * 0: Batch Size yet unkown hence `None`
    * 1: 784 = 28*28 pixels
* A hidden layer with 500 units
* A second hidden layer with 50 units
* An output layer with 10 units

### Hints
* The weights can be specified and intitialized as
```{python}
    w_1 = tf.Variable(tf.random_normal([784, 500]))
```
* Use ```tf.nn.sigmoid``` activations for the hidden layer

In [None]:
tf.reset_default_graph()
tf.set_random_seed(1)
x = tf.placeholder(tf.float32, shape=[None, 784], name='x_data')
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_data')

# From Input to first hidden layer
w_1 = tf.Variable(tf.random_normal([784, 500], stddev=0.1))
b_1 = tf.Variable(tf.random_normal([500]))
h_1_in = tf.add(tf.matmul(x, w_1), b_1)
h_1_out = tf.nn.sigmoid(h_1_in)

# From first hidden layer to second hidden layer
# <--- Your code here --->
w_2 = ...
b_2 = ...
h_2_in = ...
h_2_out = ...
# <--- End of your code here --->

# From second hidden layer to output
w_3 = tf.Variable(tf.random_normal([50, 10], stddev=0.1))
b_3 = tf.Variable(tf.random_normal([10]))
h_3_in = tf.add(tf.matmul(h_2_out, w_3), b_3)

# Output is softmax
out = tf.nn.softmax(h_3_in)
init_op = tf.global_variables_initializer() 

### Store the graph and visualize it in tensorflow

In [None]:
tf.summary.FileWriter("tmp/fcn_minst_tensorflow/", tf.get_default_graph()) #<--- Where to store

### Doing a forward pass of the untrained network

In [None]:
with tf.Session() as sess:
    sess.run(init_op)
    res_val = sess.run(out, feed_dict={x:X[0:2]})
res_val

### Train the model

In [None]:
loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(out), reduction_indices=[1]))
# <---- Your code here (fix the optimzer)
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
init_op = tf.global_variables_initializer() 
vals = []
with tf.Session() as sess:
    sess.run(init_op)
    for i in range(4000):
        idx = np.random.permutation(2400)[0:128] #Easy minibatch of size 128
        loss_, _, res_ = sess.run((loss, train_op,out), feed_dict={x:X[idx], y_true:convertToOneHot(y[idx], 10)})
        if (i % 100 == 0):
            acc = np.average(np.argmax(res_, axis = 1) == y[idx]) 
            # <---------   Your code here -----------------
            # Get the results for the validation results (from 2400:3000)
            # Now calculate loss and result for validation set 2400:3000
            loss_v, res_val = sess.run([loss, out], feed_dict={x:...}
            acc_v = np.average(np.argmax(res_val, axis = 1) == y[2400:3000])
            # <---------  End of your code here
            vals.append([loss_, acc, loss_v, acc_v])
            print("{} Training: loss {} acc {} Validation: loss {} acc {}".format(i, loss_, acc, loss_v, acc_v))

In [None]:
vals_df = pd.DataFrame(vals)
vals_df.columns = ['tr_loss', 'tr_acc', 'val_loss', 'val_acc']
vals_df['epochs'] = (np.asarray(range(len(vals_df))) * 100.0 * 128. / 2400)
plt.plot(vals_df['epochs'],vals_df[["tr_acc","val_acc"]])
plt.legend(['tr_acc', 'val_acc'], loc='lower right')
plt.ylabel("Acc")
plt.xlabel("Epochs")
plt.title("model acc")
plt.show()
plt.plot(vals_df['epochs'],vals_df[["tr_loss","val_loss"]])
plt.legend(['tr_loss', 'val_loss'], loc='upper right')
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.title("model loss")
plt.show()

In [None]:
!tensorboard --logdir=tmp/fcn_minst_tensorflow/