In [1]:
import tensorflow as tf

# Import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
# set parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1

# network parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

In [3]:
def multilayer_perceptron(x, weights, biases):
    # hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    
    # hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    
    # output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [4]:
# store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# construct model
pred = multilayer_perceptron(x, weights, biases)

# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initializing the variables
init = tf.global_variables_initializer()

## Session config

Basically tensorflow take all the available GPU memory, it is not good way when you run tensorflow in server. So in that case you should give argument when create session called `tf.Config`.<br>
In `tf.Config` there is some options like `allow_growth` and it means that tensorflow take VRAM when program need.

In [5]:
# session config
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))

sess = tf.Session(config=config)
sess.run(init)

for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)

    for i in range(total_batch):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                      y: batch_y})

        avg_cost += c / total_batch
    if epoch % display_step == 0:
        print("Epoch: {0:04d} cost={1:.5f}" .format(epoch+1, avg_cost))
print("Optimization Finished!")

# test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train accuracy: {0}" 
      .format(sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels})))
print("Test accuracy: {0}" 
      .format(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})))


Epoch: 0001 cost=162.32304
Epoch: 0002 cost=43.73151
Epoch: 0003 cost=27.30590
Epoch: 0004 cost=18.99481
Epoch: 0005 cost=14.06471
Epoch: 0006 cost=10.33647
Epoch: 0007 cost=7.81675
Epoch: 0008 cost=5.79247
Epoch: 0009 cost=4.32427
Epoch: 0010 cost=3.30768
Epoch: 0011 cost=2.61041
Epoch: 0012 cost=1.83468
Epoch: 0013 cost=1.35407
Epoch: 0014 cost=1.32741
Epoch: 0015 cost=0.97335
Optimization Finished!
Train accuracy: 0.991163969039917
Test accuracy: 0.9474000930786133


## Result

Accuracy of Logistic regression was 0.889<br>
Accuracy of MLP is **0.948**

But maybe, with some hyper-parameter tuning(learning rate?) and more deeper layer, you can achieve better result.<br>
Try it yourself!

In [6]:
# code in here....

## Overfit problem?

But if stack more and more layers, it occur overfit problem. That's why it it's hart to train neural net.<br>
However with some algorithm we can avoid overfit, like **dropout** or **good initialization**.
In this code I will show how to use dropout or another initialization in tensorflow. You can use this method in your *deep MNIST* or not..

In [7]:
# tensorflow contrib contain contributed code (maybe most of them merged someday into tensorflow internal)
from tensorflow.contrib.layers import variance_scaling_initializer, xavier_initializer

# this is Xavier and He initializer
xavier_init = xavier_initializer()
he_init = variance_scaling_initializer()

x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# note that keep prob in dropout is tf placeholder type
# it is because in train it should 0.5 but in test time it must 1.0
keep_prob = tf.placeholder("float", None)

# get_variable function create Variable if there is no Variable that has some name (e.g. W1, W2...)
# otherwise it raise ERROR or if you set reuse=True it return existed Value
# so in ipython, it is not good way because all the previous works is stored, 
# if you have some error I recommend restart ipython kernel
weights = {
    'h1': tf.get_variable("W1", [n_input, n_hidden_1], initializer=he_init),
    'h2': tf.get_variable("W2", [n_hidden_1, n_hidden_2], initializer=he_init),
    'out': tf.get_variable("W3", [n_hidden_2, n_classes], initializer=he_init)
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

def multilayer_perceptron(x, weights, biases):
    # hidden layer with RELU activation and dropout
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_1 = tf.nn.dropout(layer_1, keep_prob)
    
    # hidden layer with RELU activation and dropout
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_2 = tf.nn.dropout(layer_2, keep_prob)
    
    # output layer with linear activation and dropout
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer

In [8]:
# construct model
pred = multilayer_perceptron(x, weights, biases)

# define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initializing the variables
init = tf.global_variables_initializer()

# session config
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))

sess = tf.Session(config=config)
sess.run(init)

for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(mnist.train.num_examples/batch_size)

    for i in range(total_batch):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                      y: batch_y,
                                                      keep_prob: 0.5})

        avg_cost += c / total_batch
    if epoch % display_step == 0:
        print("Epoch: {0:04d} cost={1:.5f}" .format(epoch+1, avg_cost))
print("Optimization Finished!")

# test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train accuracy: {0}" 
      .format(sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels, keep_prob: 1.0})))
print("Test accuracy: {0}" 
      .format(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1.0})))


Epoch: 0001 cost=0.64609
Epoch: 0002 cost=0.27411
Epoch: 0003 cost=0.21550
Epoch: 0004 cost=0.18738
Epoch: 0005 cost=0.16653
Epoch: 0006 cost=0.14790
Epoch: 0007 cost=0.13530
Epoch: 0008 cost=0.12881
Epoch: 0009 cost=0.11582
Epoch: 0010 cost=0.11341
Epoch: 0011 cost=0.10958
Epoch: 0012 cost=0.10085
Epoch: 0013 cost=0.10075
Epoch: 0014 cost=0.09670
Epoch: 0015 cost=0.09417
Optimization Finished!
Train accuracy: 0.9910548329353333
Test accuracy: 0.9784001708030701


## Dropout works?

In [9]:
# code in here...