In [1]:
# load tensorflow
import tensorflow as tf

In [2]:
# load MNIST dataset
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# some utility functions
def weight_variable(shape, name='weights'):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)

def bias_variable(shape, name='biases'):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name=name)

def conv2d(x, W, name=None):
    return tf.nn.conv2d(x, W, 
                        strides=[1, 1, 1, 1], 
                        padding='SAME',
                        name=name)

def max_pool_2x2(x, name=None):
    return tf.nn.max_pool(x, 
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], 
                          padding='SAME',
                          name=name)

def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

In [4]:
# build inference graph

# input layer
with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, shape=[None, 784], name='raw_x')
    y_ = tf.placeholder(tf.float32, shape=[None, 10], name='label')
    x_image = tf.reshape(x, [-1, 28, 28, 1], 'reshape_x')  # reshape to 2D image
    tf.summary.image("x_image", x_image, max_outputs=6)

# first layer: conv1
with tf.name_scope('conv1'):
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    variable_summaries(W_conv1)
    variable_summaries(b_conv1)
    variable_summaries(h_conv1)
    variable_summaries(h_pool1)

# second layer: conv2
with tf.name_scope('conv2'):
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

# third layer: fc1
with tf.name_scope('fc1'):
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# forth layer: dropout
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32)
    tf.summary.scalar('dropout_keep_probability', keep_prob)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# output layer
with tf.name_scope('output'):
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

In [5]:
# build train graph
with tf.name_scope('cross_entropy'):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    tf.summary.scalar('cross_entropy', cross_entropy)
with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(1E-4).minimize(cross_entropy)

In [6]:
# evaluation graph
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)

In [7]:
# create session and initialize model
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
# write summay to checkpoint files
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('MNIST_data/train', sess.graph)
test_writer = tf.summary.FileWriter('MNIST_data/test', sess.graph)

In [None]:
# training
for step in range(10000):
    batch = mnist.train.next_batch(50)
    if step % 100 == 0:
        acc, summary = sess.run([accuracy, merged], feed_dict={
                x:batch[0], y_: batch[1], keep_prob: 1.0})
        print('step %d, accuracy %.3f'%(step, acc))
        test_writer.add_summary(summary, step)
    _, summary = sess.run([train_step, merged], 
             feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    train_writer.add_summary(summary, step)

step 0, accuracy 0.040
step 100, accuracy 0.840
step 200, accuracy 0.920
step 300, accuracy 0.920
step 400, accuracy 0.940
step 500, accuracy 1.000
step 600, accuracy 0.980
step 700, accuracy 0.960
step 800, accuracy 0.940
step 900, accuracy 0.980
step 1000, accuracy 0.960
step 1100, accuracy 0.920
step 1200, accuracy 0.940
step 1300, accuracy 0.940


Back to [2_mnist_softmax.ipynb](2_mnist_softmax.ipynb)