# MNIST

## 0. Import Data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
print type(mnist)

<class 'tensorflow.contrib.learn.python.learn.datasets.base.Datasets'>


In [4]:
print len(mnist.train.images), len(mnist.train.labels)
print type(mnist.train.images[0]), type(mnist.train.labels[0])
print mnist.train.images.shape, mnist.train.labels.shape
print mnist.train.images[0].shape, mnist.train.labels[0]

55000 55000
<type 'numpy.ndarray'> <type 'numpy.ndarray'>
(55000, 784) (55000, 10)
(784,) [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]


## I. First Model

* Softmax Regression: $y = softmax(Wx+b)$

In [5]:
import tensorflow as tf
from tensorflow import placeholder, Variable

** Define Predictive Function **

* $y = \texttt{softmax}(Wx+b)$

In [6]:
x = placeholder(tf.float32, [None, 784]) # 'None' means row-dim can be any number.
W = Variable(tf.zeros([784, 10]))
b = Variable(tf.zeros([10]))

In [7]:
y = tf.nn.softmax(tf.matmul(x,W)+b)

** Define Cost function (CrossEnt) **

* $H_{y'}(y) = -\sum_i{y_i'} \texttt{log}(y_i)$, where $y',y$ are true, predicted distribution respectively.

In [8]:
y_ = placeholder(tf.float32, [None, 10]) # true distribution.
crossent = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),reduction_indices=[1]))

** Training **

In [9]:
train_step = tf.train.GradientDescentOptimizer(learning_rate=.5).minimize(crossent)

In [12]:
sess = tf.Session()
# init = tf.initialize_all_variables() # deprecated 2017-03-02
init = tf.global_variables_initializer()
sess.run(init)

In [43]:
%%time
for i in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100) # next_batch returns a tuple of (x,y), x=100x784, y=100x10 here.
    sess.run(train_step, feed_dict={x:batch_xs, y_:batch_ys})

CPU times: user 2.4 s, sys: 4.05 s, total: 6.46 s
Wall time: 3.9 s


In [60]:
mnist.train.next_batch(10)[0]

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

** Evaluation **

In [21]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # tf.argmax(data, axis). we get boolean results here.

In [46]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [51]:
print type(correct_prediction)
print type(accuracy)

<class 'tensorflow.python.framework.ops.Tensor'>
<class 'tensorflow.python.framework.ops.Tensor'>


In [52]:
print sess.run(accuracy, feed_dict={x: mnist.train.images, y_: mnist.train.labels})*100
print sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})*100

91.8727278709
92.0799970627


## II. Second Model

* Convolutional Net

In [54]:
import tensorflow as tf
from tensorflow import placeholder, Variable

In [36]:
def weight_variable(shape):
    init = tf.truncated_normal(shape, stddev=.1)
    return Variable(init)
def bias_variable(shape):
    init = tf.constant(.1, shape=shape)
    return Variable(init)

In [37]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                             strides=[1, 2, 2, 1], padding='SAME')

In [38]:
# FIRST LAYER
W_conv1 = weight_variable([5, 5, 1, 32]) # [patch_width, path_height, input_dim, output_dim]
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1]) # [?, image_width, image_height, num_col_channel]
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1) # image_dim: 28x28 -> 14x14

In [40]:
# SECOND LAYER
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2) # image_dim: 14x14 -> 7x7

In [41]:
# DENSE LAYER
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [42]:
# DROPOUT
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [43]:
# SOFTMAX
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

In [54]:
# TRAIN
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(session=sess,feed_dict={x:batch[0],y_:batch[1],keep_prob:1.0})
        print "step %d, train accuracy %g" % (i, train_accuracy)
    train_step.run(session=sess,feed_dict={x:batch[0],y_:batch[1],keep_prob:0.5})
print "test accuracy %g" % accuracy.eval(session=sess,feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})

step 0, train accuracy 0.06
step 100, train accuracy 0.78
step 200, train accuracy 0.9
step 300, train accuracy 0.92
step 400, train accuracy 0.94
step 500, train accuracy 0.82
step 600, train accuracy 0.94
step 700, train accuracy 0.96
step 800, train accuracy 0.94
step 900, train accuracy 0.92
step 1000, train accuracy 0.96
step 1100, train accuracy 0.96
step 1200, train accuracy 0.96
step 1300, train accuracy 0.98
step 1400, train accuracy 1
step 1500, train accuracy 1
step 1600, train accuracy 0.98
step 1700, train accuracy 0.96
step 1800, train accuracy 0.9
step 1900, train accuracy 0.98
step 2000, train accuracy 0.98
step 2100, train accuracy 0.96
step 2200, train accuracy 1
step 2300, train accuracy 0.98
step 2400, train accuracy 0.96
step 2500, train accuracy 0.96
step 2600, train accuracy 1
step 2700, train accuracy 0.98
step 2800, train accuracy 0.96
step 2900, train accuracy 0.96
step 3000, train accuracy 0.98
step 3100, train accuracy 1
step 3200, train accuracy 0.92
step 3

### MORE

* MLP (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py)