______________________________________________________________________________________________________
This notebook follows the "Deep MNIST for Experts" tutorial on the TensorFlow website; at the time of 
writing this is located at https://www.tensorflow.org/versions/r0.8/tutorials/mnist/pros/index.html. 
This notebook contains a simple deep convolutional MNIST classifier.
_______________________________________________________________________________________________________

# Setup

In [3]:
# load MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

"""start TensorFlow InteractiveSession - connects to C++ backend
The common usage for TensorFlow programs is to first create a 
graph and then launch it in a session. Here we instead use the 
convenient 'InteractiveSession' class, which makes TensorFlow
more flexible about how you structure the code. It allows you to
interleave operations which build a computation graph with ones
that run the graph."""
import tensorflow as tf
sess = tf.InteractiveSession()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


# Build a Softmax Regression Model

In [3]:
# create placeholders for input and output
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
# note that the 'shape' argument to placeholder is optional, but it allows for error-checking

# create weights and biases for the model
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

# initialize variables
sess.run(tf.initialize_all_variables())

# define model
y = tf.nn.softmax(tf.matmul(x, W) + b)

# define cost function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

# define one step of gradient descent; this adds new operations to the computation graph
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
"""The returned operation train_step, when run, will apply the gradient descent updates to the
parameters. Training the model can therefore be accomplished by repeatedly running train_step"""

# train model
for i in range(1000):
    # load 50 training examples for each training iteration
    batch = mnist.train.next_batch(50)
    # use feed_dict to replace the placeholder tensors x and y_ with training examples
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
    
# evaluate the model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9092


# Build a Multilayer Convolutional Network

## Helper Functions

In [4]:
# create functions to initialize weights and biases

# no negative weights since we'll be using ReLUs
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

# constant initial biases are fine
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# create functions for convolution and pooling
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

## First Convolutional Layer

In [6]:
# create placeholders for input and output
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
# note that the 'shape' argument to placeholder is optional, but it allows for error-checking

# compute 32 features for each 5x5 patch
W_conv1 = weight_variable([5, 5, 1, 32]) # x_pix, y_pix, channels, features
b_conv1 = bias_variable([32])

# reshape x to [examples, width, height, channels]
x_image = tf.reshape(x, [-1, 28, 28, 1])

# convolve x_image with weight tensor, add bias and apply ReLU
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

# max pool
h_pool1 = max_pool_2x2(h_conv1)

## Second Convolutional Layer

In [7]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

## Densely Connected Layer

In [8]:
# image size now 7x7; add a fully-connected layer with 1024 neurons
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

# reshape the tensor from the pooling layer into a batch of vectors
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# create a placeholder to use for dropout in training
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

## Readout Layer

In [10]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

## Train and Evaluate the Model

In [15]:
# define loss
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))

# define one step of the optimization routine
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# define accuracy of model
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_conv, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# intialize all variables
sess.run(tf.initialize_all_variables())

# train
for i in range(2000):
    # get batch of data for this training step
    batch = mnist.train.next_batch(50)
    
    # print training updates
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
        
    # one step of optimization routine
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    
# print test accuracy
print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))


step 0, training accuracy 0.08
step 100, training accuracy 0.88
step 200, training accuracy 0.92
step 300, training accuracy 0.92
step 400, training accuracy 1
step 500, training accuracy 0.96
step 600, training accuracy 1
step 700, training accuracy 0.96
step 800, training accuracy 0.98
step 900, training accuracy 0.92
step 1000, training accuracy 0.98
step 1100, training accuracy 1
step 1200, training accuracy 1
step 1300, training accuracy 0.94
step 1400, training accuracy 0.94
step 1500, training accuracy 0.98
step 1600, training accuracy 0.96
step 1700, training accuracy 0.96
step 1800, training accuracy 0.96
step 1900, training accuracy 0.98
test accuracy 0.9733
