In [89]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
#The downloaded data is split into three parts, 55,000 data points 
#of training data (mnist.train), 10,000 points of test data (mnist.test), 
#and 5,000 points of validation data (mnist.validation).

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [90]:
print mnist.train.images.shape #the training images shape 28*28=784 flat
print mnist.train.labels.shape #train labels shape, note one_hot encoded!

(55000, 784)
(55000, 10)


In [91]:
#a value that we'll input when we ask TensorFlow to run a computation
#We represent this as a 2-D tensor of floating-point numbers, with a shape [None, 784]
x = tf.placeholder(tf.float32, [None, 784]) # <tf.Tensor 'Placeholder:0' shape=(?, 784) dtype=float32>

In [92]:
#initialize both W and b as tensors full of zeros
W = tf.Variable(tf.zeros([784, 10])) #W has a shape of [784, 10] 
b = tf.Variable(tf.zeros([10])) #b has a shape of [10] 

In [93]:
y = tf.nn.softmax(tf.matmul(x, W) + b) #model definition y=softmax(W*x+b)

In [94]:
#  Cross entropy cost function H(y) = - sum_i(y_*log(y_i) , y_i - predicted distribution, y_ - true distribution
y_ = tf.placeholder(tf.float32, [None, 10]) # new placeholder to input the correct answers
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

In [95]:
#optimization algorithm to modify the variables and reduce the cost learning rate 0.5
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 

In [96]:
init = tf.initialize_all_variables() #initialize the variables we created

In [97]:
#launch the session in a session 

#sess = tf.Session() #for beginners
sess = tf.InteractiveSession()
sess.run(init)

In [98]:
# training 1000 times 

for i in range(1000):
    # "batch" of one hundred random data points from our training set
    # stochastic gradient descent
  batch_xs, batch_ys = mnist.train.next_batch(100) 
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

In [99]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9182


In [100]:
#for experts
for i in range(1000):
  batch = mnist.train.next_batch(100)
  train_step.run(feed_dict={x: batch[0], y_: batch[1]})

In [102]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9158


In [104]:
# initialize neurons with a slightly positive initial bias to avoid 
# "dead neurons"
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [105]:

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [111]:
#First layer
#32 features for each 5x5 patch, weight tensor will have a shape of [5, 5, 1, 32]
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
#reshape x to a 4d tensor
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [112]:
#Second layer
#64 features for each 5x5 patch
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [113]:
# add a fully connected layer with 1024 neaurons 
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
#reshape the tensor from the pooling layer into a batch of vectors, 
#multiply by a weight matrix, add a bias, and apply a ReLU.
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [114]:
#Dropout
keep_prob = tf.placeholder(tf.float32) #probability that a neuron's output is kept during dropout
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #automatically handles scaling neuron outputs in addition to masking them

In [115]:
#adding softmax
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)


In [116]:
#train with ADAM optimizer
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess.run(tf.initialize_all_variables())
for i in range(20000):
  batch = mnist.train.next_batch(50)
  if i%100 == 0:
    train_accuracy = accuracy.eval(feed_dict={
        x:batch[0], y_: batch[1], keep_prob: 1.0})
    print("step %d, training accuracy %g"%(i, train_accuracy))
  train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g"%accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0.12
step 100, training accuracy 0.88
step 200, training accuracy 0.88
step 300, training accuracy 0.92
step 400, training accuracy 0.96
step 500, training accuracy 0.94
step 600, training accuracy 0.94
step 700, training accuracy 0.92
step 800, training accuracy 0.94
step 900, training accuracy 0.98
step 1000, training accuracy 0.94
step 1100, training accuracy 0.98
step 1200, training accuracy 1
step 1300, training accuracy 0.96
step 1400, training accuracy 0.98
step 1500, training accuracy 1
step 1600, training accuracy 0.98
step 1700, training accuracy 0.92
step 1800, training accuracy 0.96
step 1900, training accuracy 0.94
step 2000, training accuracy 0.96
step 2100, training accuracy 0.96
step 2200, training accuracy 0.96
step 2300, training accuracy 1
step 2400, training accuracy 0.94
step 2500, training accuracy 0.96
step 2600, training accuracy 1
step 2700, training accuracy 0.98
step 2800, training accuracy 0.92
step 2900, training accuracy 0.98
step