In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
import time

## Inference

In [2]:
INPUT_SIZE = 784
LAYOUT_1_SIZE = 500
OUTPUT_SIZE = 10

In [3]:
def get_weight_variable(shape, regularizer):
    weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
    if regularizer != None:
        tf.add_to_collection('losses', regularizer(weights))
    return weights

In [4]:
def inference(input_tensor, regularizer):
    with tf.variable_scope("layer_1"):
        weights = get_weight_variable([INPUT_SIZE, LAYOUT_1_SIZE], regularizer)
        biases = tf.get_variable("biases", [LAYOUT_1_SIZE], initializer=tf.constant_initializer(0.0))
        layer_1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
    with tf.variable_scope("layer_2"):
        weights = get_weight_variable([LAYOUT_1_SIZE, OUTPUT_SIZE], regularizer)
        biases = tf.get_variable("biases", [OUTPUT_SIZE], initializer=tf.constant_initializer(0.0))
        layer_2 = tf.matmul(layer_1, weights) + biases
    return layer_2

## Train

In [5]:
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "./model"
MODEL_NAME = "model"

In [6]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_SIZE], name="x")
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE], name="y_")
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = inference(x, regularizer)
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1 ))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name="train")
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(1, TRAINING_STEPS + 1):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("Step %d, loss = %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=step)

In [7]:
mnist = input_data.read_data_sets("./data", one_hot=True)

Extracting ./data/train-images-idx3-ubyte.gz
Extracting ./data/train-labels-idx1-ubyte.gz
Extracting ./data/t10k-images-idx3-ubyte.gz
Extracting ./data/t10k-labels-idx1-ubyte.gz


In [8]:
train(mnist)

Step 1000, loss = 0.303477.
Step 2000, loss = 0.192179.
Step 3000, loss = 0.128198.
Step 4000, loss = 0.116234.
Step 5000, loss = 0.116696.
Step 6000, loss = 0.121661.
Step 7000, loss = 0.085938.
Step 8000, loss = 0.0794227.
Step 9000, loss = 0.0720801.
Step 10000, loss = 0.0679493.
Step 11000, loss = 0.0673448.
Step 12000, loss = 0.0601233.
Step 13000, loss = 0.06746.
Step 14000, loss = 0.0523719.
Step 15000, loss = 0.0527718.
Step 16000, loss = 0.0475236.
Step 17000, loss = 0.0503256.
Step 18000, loss = 0.0492024.
Step 19000, loss = 0.0469151.
Step 20000, loss = 0.0442539.
Step 21000, loss = 0.0428752.
Step 22000, loss = 0.0376176.
Step 23000, loss = 0.0423614.
Step 24000, loss = 0.037276.
Step 25000, loss = 0.0366846.
Step 26000, loss = 0.0345971.
Step 27000, loss = 0.0324602.
Step 28000, loss = 0.0353654.
Step 29000, loss = 0.0354306.
Step 30000, loss = 0.0320305.


## Evaluate

In [9]:
def evaluate(mnist):
    with tf.Graph().as_default() as g:
        x = tf.placeholder(tf.float32, [None, INPUT_SIZE], name="x-input")
        y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE], name="y-input")
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        y = inference(x, None)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
        variable_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variable_to_restore)
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
                print("Setp %s, accuracy = %g" % (global_step, accuracy_score))
            else:
                print("Checkpoint not found")
                return

In [10]:
evaluate(mnist)

INFO:tensorflow:Restoring parameters from ./model/model-30000
Setp 30000, accuracy = 0.9864
