In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

In [2]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# MNIST object has train, test, val sets built in
print("Train:%g\nVal:%g\nTest:%g\n"%(mnist.train.num_examples,
  mnist.validation.num_examples, mnist.test.num_examples))

# Within the train object are the images and one-hot labels
num_train_images, num_pixels = mnist.train.images.shape
_, num_labels = mnist.train.labels.shape
print("The training set has %d images that each have %d pixels and can be sorted into %d categories"%(
  num_train_images, num_pixels, num_labels))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Train:55000
Val:5000
Test:10000

The training set has 55000 images that each have 784 pixels and can be sorted into 10 categories


In [3]:
# Placeholders are used for your inputs or meta-parameters (independent variables)
# None indicates that we will specify that dimension later. It is for our batch size.
lr = tf.placeholder(tf.float32, shape=())
x = tf.placeholder(tf.float32, [None, num_pixels]) # Input images
y = tf.placeholder(tf.float32, [None, 10]) #Ground truth labels 

# Variables are persistent and can be learned
# We can write a simple regression (y = Wx + b) as:
W = tf.Variable(tf.zeros([num_pixels, num_labels]))
b = tf.Variable(tf.zeros([num_labels]))

# The rest of the graph can be described in terms of
# existing Variables and Placeholders
logits = tf.add(tf.matmul(x, W), b)
p_ = tf.nn.softmax(logits) # Network output is a probability
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(p_), reduction_indices=[1])) # Loss function

# The tf.train module has many types of optimization functions
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(cross_entropy)

# correct_prediction will give us a list of True/False values for if the
# ground truth matched the network outputs
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(p_,1))

# Then we cast the True/False values to floats and find the mean to get
# accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# You have to define a initializer operation to set up the graph in a session
init_op = tf.initialize_all_variables()

In [10]:
num_epochs = 10
batch_size = 100
learning_rate = 0.01
num_batches = int(num_train_images / batch_size)

## Once our graph is constructed, we can create a session to run the graph
with tf.Session() as sess:
    # initialize the graph
    sess.run(init_op, feed_dict={x:np.zeros((batch_size, num_pixels)), y:np.zeros((batch_size, num_labels))})
    
    for epoch in range(num_epochs):
        for batch in range(num_batches):
            (images, labels) = mnist.train.next_batch(batch_size)
            # Feed dictionary fills in placeholders for our model
            feed_dict = {x:images, y:labels, lr:learning_rate}
            sess.run(optimizer, feed_dict)
        val_accuracy = sess.run(accuracy, feed_dict={x:mnist.validation.images, y:mnist.validation.labels})
        print("Validation accuracy at epoch %g was %g%%"%(epoch, 100*val_accuracy))
    #Final test accuracy
    print("Test accuracy is %g%%"%(100*sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})))

Validation accuracy at epoch 0 was 85.14%
Validation accuracy at epoch 1 was 87.1%
Validation accuracy at epoch 2 was 87.98%
Validation accuracy at epoch 3 was 88.52%
Validation accuracy at epoch 4 was 89%
Validation accuracy at epoch 5 was 89.34%
Validation accuracy at epoch 6 was 89.52%
Validation accuracy at epoch 7 was 89.7%
Validation accuracy at epoch 8 was 89.88%
Validation accuracy at epoch 9 was 90.16%
Test accuracy is 90.18%
