# Multi-Layer Perceptron
We introduce a 3-layer perceptron, i.e. a 3 layer fully connected neural network. We do this using the MNIST data once again. We first write the model as we were doing before. Later, we will show how we can cleanly organize the functions to a Python `Class`. It's time to get object oriented :)

In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
num_features = mnist.train.images.shape[1]
num_classes  = mnist.train.labels.shape[1]
num_hidden_1 = 256
num_hidden_2 = 256

## Version 1

In [86]:
tf.reset_default_graph() # Clearing all tensors before this

In [87]:
with tf.name_scope('data'):
    X = tf.placeholder(tf.float32, shape=[None, num_features], name='Input-Images')
    Y = tf.placeholder(tf.float32, shape=[None, num_classes], name='Output-Labels')

In [88]:
with tf.name_scope('fc1'):
    W1 = tf.Variable(tf.random_normal([num_features, num_hidden_1]),name='weights')
    b1 = tf.Variable(tf.random_normal([num_hidden_1]),name='bias')

with tf.name_scope('fc2'):
    W2 = tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2]),name='weights')
    b2 = tf.Variable(tf.random_normal([num_hidden_2]),name='bias')

with tf.name_scope('out'):
    Wout = tf.Variable(tf.random_normal([num_hidden_2, num_classes]),name='weights')
    bout = tf.Variable(tf.random_normal([num_classes]),name='bias')

In [89]:
with tf.name_scope('multilayer_perceptron'):
    H1 = tf.nn.relu(X @ W1 + b1, name='H1')
    H2 = tf.nn.relu(H1 @ W2 + b2, name='H2')
    logits = tf.add(H2 @ Wout, bout, name='out')

In [90]:
with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                             logits=logits, labels=Y),name='loss')
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy           = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))   
    
with tf.name_scope('optimizer'):
    learning_rate = 0.01
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    update = optimizer.minimize(loss)

with tf.name_scope('summaries'):
    tf.summary.scalar('loss', loss)
    tf.summary.histogram('histogram-loss', loss)
    summary_op = tf.summary.merge_all()

In [91]:
# Train
num_epochs  = 25
batch_size  = 100

with tf.Session() as sess:
    writer = tf.summary.FileWriter('log/multilayer_perceptron1', sess.graph)
    
    sess.run(tf.global_variables_initializer())
    total_batch = int(mnist.train.num_examples/batch_size)
    for epoch in range(num_epochs):
        average_cost = 0
        for batch in range(total_batch):
            batch_X, batch_Y = mnist.train.next_batch(batch_size)
            _, c = sess.run([update, loss], feed_dict={X: batch_X,
                                                       Y: batch_Y})
            average_cost += c / total_batch
            summary = sess.run(summary_op, feed_dict={X: batch_X,
                                                      Y: batch_Y})
            global_step = epoch*total_batch + batch
            writer.add_summary(summary, global_step=global_step)
        print("Epoch:",epoch,"Cost:",average_cost)
    
    print("Test Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))    
    writer.close()

Epoch: 0 Cost: 61.8643349510972
Epoch: 1 Cost: 12.187720913453521
Epoch: 2 Cost: 7.424538441219112
Epoch: 3 Cost: 5.4271530052884
Epoch: 4 Cost: 3.982053814406861
Epoch: 5 Cost: 3.151202641244398
Epoch: 6 Cost: 2.6753018941547957
Epoch: 7 Cost: 2.1888300399692033
Epoch: 8 Cost: 1.813328645066876
Epoch: 9 Cost: 1.4697528642732964
Epoch: 10 Cost: 1.3372768463187745
Epoch: 11 Cost: 1.2398637867008184
Epoch: 12 Cost: 0.9818570881490868
Epoch: 13 Cost: 0.8850345878592402
Epoch: 14 Cost: 0.7661460467218224
Epoch: 15 Cost: 0.7395797746256713
Epoch: 16 Cost: 0.5951993956270213
Epoch: 17 Cost: 0.5810083896863325
Epoch: 18 Cost: 0.4626998142226008
Epoch: 19 Cost: 0.44251203135753675
Epoch: 20 Cost: 0.4166008296509719
Epoch: 21 Cost: 0.35637803889022446
Epoch: 22 Cost: 0.30885852929234
Epoch: 23 Cost: 0.29982214903542986
Epoch: 24 Cost: 0.2562230518264089
Test Accuracy: 0.9276


## Version 2

In [92]:
tf.reset_default_graph() # Clearing all tensors before this

In [93]:
with tf.name_scope('data'):
    X = tf.placeholder(tf.float32, shape=[None, num_features], name='Input-Images')
    Y = tf.placeholder(tf.float32, shape=[None, num_classes], name='Output-Labels')

In [94]:
with tf.name_scope('multilayer_perceptron'):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(X, num_hidden_1, tf.nn.relu, name='fc1')
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, num_hidden_2, tf.nn.relu, name='fc2')
    # Output fully connected layer with a neuron for each class
    logits = tf.layers.dense(layer_2, num_classes, name='out')

In [95]:
with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                             logits=logits, labels=Y),name='loss')
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy           = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))   
    
with tf.name_scope('optimizer'):
    learning_rate = 0.01
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    update = optimizer.minimize(loss)

with tf.name_scope('summaries'):
    tf.summary.scalar('loss', loss)
    tf.summary.histogram('histogram-loss', loss)
    summary_op = tf.summary.merge_all()

In [72]:
# Train
num_epochs  = 25
batch_size  = 100

with tf.Session() as sess:
    writer = tf.summary.FileWriter('log/multilayer_perceptron2', sess.graph)
    
    sess.run(tf.global_variables_initializer())
    total_batch = int(mnist.train.num_examples/batch_size)
    for epoch in range(num_epochs):
        average_cost = 0
        for batch in range(total_batch):
            batch_X, batch_Y = mnist.train.next_batch(batch_size)
            _, c = sess.run([update, loss], feed_dict={X: batch_X,
                                                       Y: batch_Y})
            average_cost += c / total_batch
            summary = sess.run(summary_op, feed_dict={X: batch_X,
                                                      Y: batch_Y})
            global_step = epoch*total_batch + batch
            writer.add_summary(summary, global_step=global_step)
        print("Epoch:",epoch,"Cost:",average_cost)
    
    print("Test Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))    
    writer.close()

Epoch: 0 Cost: 1.1429620449651363
Epoch: 1 Cost: 0.44573915083299925
Epoch: 2 Cost: 0.35664164621721617
Epoch: 3 Cost: 0.31739128164269714
Epoch: 4 Cost: 0.2910888133265754
Epoch: 5 Cost: 0.2713912793858482
Epoch: 6 Cost: 0.25498610958456974
Epoch: 7 Cost: 0.2407954240658066
Epoch: 8 Cost: 0.22825071882117884
Epoch: 9 Cost: 0.21688424812121837
Epoch: 10 Cost: 0.20677164849909882
Epoch: 11 Cost: 0.19733745005320408
Epoch: 12 Cost: 0.1889122671769422
Epoch: 13 Cost: 0.18079517665234468
Epoch: 14 Cost: 0.17358795196495266
Epoch: 15 Cost: 0.16686928619037983
Epoch: 16 Cost: 0.1604892960935832
Epoch: 17 Cost: 0.15470593801953564
Epoch: 18 Cost: 0.14925623104653593
Epoch: 19 Cost: 0.14406697281382297
Epoch: 20 Cost: 0.1391177260740237
Epoch: 21 Cost: 0.1345416298576378
Epoch: 22 Cost: 0.13013896662741917
Epoch: 23 Cost: 0.12604238837618725
Epoch: 24 Cost: 0.12211634944108385
Test Accuracy: 0.9623


## Version 3

In [3]:
tf.reset_default_graph() # Clearing all tensors before this

In [4]:
def multilayer_perceptron(X_dict):
    # TF Estimator input is a dict, in case of multiple inputs
    X = X_dict['images']
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.layers.dense(X, num_hidden_1, tf.nn.relu, name='fc1')
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.layers.dense(layer_1, num_hidden_2, tf.nn.relu, name='fc2')
    # Output fully connected layer with a neuron for each class
    logits = tf.layers.dense(layer_2, num_classes, name='out')
    return logits

In [5]:
def model_fn(features, labels, mode):
    logits = multilayer_perceptron(features)

    pred_classes = tf.argmax(logits, axis=1)
    pred_probas = tf.nn.softmax(logits)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    with tf.name_scope('loss'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                                 logits=logits, labels=labels),name='loss')
        accuracy = tf.metrics.accuracy(tf.argmax(logits, 1), tf.argmax(labels, 1))

    with tf.name_scope('optimizer'):
        learning_rate = 0.01
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        update = optimizer.minimize(loss,
                                    global_step=tf.train.get_global_step())

    with tf.name_scope('summaries'):
        tf.summary.scalar('loss', loss)
        tf.summary.histogram('histogram-loss', loss)
        summary_op = tf.summary.merge_all()

    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss,
        train_op=update,
        eval_metric_ops={'accuracy': accuracy})

    return estim_specs

In [8]:
num_steps = 5000
batch_size = 128
display_step = 500

# Build the Estimator
model = tf.estimator.Estimator(model_fn)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.train.images}, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)
# Train the Model
model.train(input_fn, steps=num_steps)

# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)

print("Testing Accuracy:", e['accuracy'])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/lj/p0jqksf54pldc98grzy8m6p00000gn/T/tmpv7e4ojg3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x181c97af98>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/lj/p0jqksf54pldc98grzy8m6p00000gn/T/tmpv7e4ojg3/model.ckpt.
INFO:tensorflow:loss = 2.3240983, step = 1
INFO:tensorflow:global_step/sec: 239.482
INFO:tensorflow:loss = 1.7728766, step = 101 (0.419 sec)
INFO:tensorflow:global_step/sec: 243.212
INFO:tensorflow:loss = 1.2416974, 

## Exercise
Improve the above network by even a bit.
For example, you can try CNN, dropout, different activation function, tune the learning rate, etc.