In [1]:
# Step1 load MNITST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
# Start tensorflow interactiveSession
# Tensorflow relies on a highly efficient C++ backend to do its computation. The connection to this backend
# to this backend is called as session.
# The common usage for Tensorflow programs is to first create a graph and then launch it in a session.
import tensorflow as tf
sess = tf.InteractiveSession()

In [3]:
# Build a softmax regression model
# Placeholders
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10]) # one-hot 10-dimensional vector

In [4]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [7]:
# Convolution and pooling
# TF gives us flexibility in convolution and pooling operations, which including how we handle the boundaries,
# what the stride size is. 
# In this toturial, it uses a stride of one and zero padding.
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

In [8]:
# First convolutional layer
# Consisting of convolution, followed by max pooling
# 32 features for each 5x5 patch. [5, 5, 1, 32]
# Biases are also added for each 32 channel
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

# Adjust the image
x_image = tf.reshape(x, [-1, 28, 28, 1])

# Convolve x_image with the weight tensor, add the bias, apply the ReLU runction, and max pooling.
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [9]:
# Second convolutional layer
# 64 features for each 5x5 patch
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [22]:
# Densely connected layer
# Reshape the tensor from the pooling layer into a batch of vectors
W_fc1 = weight_variable([4*4*64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 4*4*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [23]:
# Dropout 
# To reduce overfitting, we apply dropout before the readout layer.
# Create a placeholder for the probability that a neuron's output is kept during dropout.
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [24]:
# Densely connected layer
# Reshape the tensor from the pooling layer into a batch of vectors
W_fc2 = weight_variable([1024, 512])
b_fc2 = bias_variable([512])

h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

In [25]:
# Dropout 
# To reduce overfitting, we apply dropout before the readout layer.
# Create a placeholder for the probability that a neuron's output is kept during dropout.
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

In [26]:
# Readout layer
# Like for the sofrmax regression layer
W_fc3 = weight_variable([512, 10])
b_fc3 = bias_variable([10])

y_conv = tf.matmul(h_fc2_drop, W_fc3) + b_fc3

In [28]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
# train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.02).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.global_variables_initializer())

for i in range(6000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
        print("Step %d, training accuracy %g" % (i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

# Final result
print("test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, 
                                                    y_: mnist.test.labels, 
                                                    keep_prob: 1.0}))

Step 0, training accuracy 0.18
Step 100, training accuracy 0.76
Step 200, training accuracy 0.96
Step 300, training accuracy 0.9
Step 400, training accuracy 0.94
Step 500, training accuracy 0.98
Step 600, training accuracy 0.94
Step 700, training accuracy 0.94
Step 800, training accuracy 0.96
Step 900, training accuracy 0.96
Step 1000, training accuracy 0.96
Step 1100, training accuracy 0.96
Step 1200, training accuracy 1
Step 1300, training accuracy 0.96
Step 1400, training accuracy 0.96
Step 1500, training accuracy 0.98
Step 1600, training accuracy 0.92
Step 1700, training accuracy 0.98
Step 1800, training accuracy 0.92
Step 1900, training accuracy 0.96
Step 2000, training accuracy 0.98
Step 2100, training accuracy 1
Step 2200, training accuracy 0.94
Step 2300, training accuracy 1
Step 2400, training accuracy 0.98
Step 2500, training accuracy 0.98
Step 2600, training accuracy 1
Step 2700, training accuracy 0.96
Step 2800, training accuracy 0.94
Step 2900, training accuracy 0.98
Step 