In [13]:
import tensorflow as tf
# TensorFlow 提供了一个类来处理minist 数据集。自动下载并转化格式
from tensorflow.examples.tutorials.mnist import input_data

#### 1.设置输入和输出节点的个数,配置神经网络的参数。

In [14]:
INPUT_NODE = 784     # 输入节点 28x28=784
OUTPUT_NODE = 10     # 输出节点 0~9
LAYER1_NODE = 500    # 隐藏层数       
                              
BATCH_SIZE = 100     # 每次batch打包的样本个数        

# 模型相关的参数
LEARNING_RATE_BASE = 0.8   # origin learning rate    
LEARNING_RATE_DECAY = 0.99  # decay
REGULARAZTION_RATE = 0.0001  # weight regulazation lambda
TRAINING_STEPS = 10000        
MOVING_AVERAGE_DECAY = 0.99  # 滑动平均decay

#### 2. 定义辅助函数来计算前向传播结果，使用ReLU做为激活函数。
#### three layers, 1 input data, 1 hidden layer, 1 output layer.

In [15]:
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 不使用滑动平均类
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2

    else:
        # 使用滑动平均类
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)  

#### 3. 定义训练过程。

In [16]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input') # [batch,784]
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input') # [batch,10]
    # 生成隐藏层的参数。
    # truncated_normal:截断高斯函数(<2*stddev)　P67,table3-2
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1)) # [784,500]
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层的参数。
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

    # 计算不含滑动平均类的前向传播结果
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    # Average class
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # 定义一个更新滑动平均的操作。
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # 滑动平均输出
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵及其平均值
    # tf.nn.softmax_cross_entropy_with_logits() = softmax + cross_entropy
    # 在只有一个答案的分类问题中，只用`sparse_softmax_cross_entropy_with_logits`加速计算
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 损失函数的计算
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    regularaztion = regularizer(weights1) + regularizer(weights2)
    # Total loss
    loss = cross_entropy_mean + regularaztion
    
    # 设置指数衰减的学习率。
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, # starter learning rate
        global_step,
        mnist.train.num_examples / BATCH_SIZE, # decay_steps, # 衰减速度
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数和更新每一个参数的滑动平均值
    # 为了一次完成更新参数和滑动平均参数,tf提供了`tf.control_dependencies` 和`tf.group`两种机制。
    # 等价于：train_op = tf.group(train_step, variables_averages_op)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 计算正确率
    # tf.argmax: Returns the index with the largest value across axes of a tensor.
    # tf.equal: Returns the truth value of (x == y) element-wise.
    # correct_prediction.shape: [batch_size,1]
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1)) # axis=1,along colums
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # A float number.
    
    # 初始化会话，并开始训练过程。
    with tf.Session() as sess:
        tf.global_variables_initializer().run() # Init all variables
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 循环的训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            # Feed train data.
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})

        test_acc=sess.run(accuracy,feed_dict=test_feed)
        print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
   

#### 4. 主程序入口，这里设定模型训练次数为5000次。

In [17]:
def main(argv=None):
    mnist = input_data.read_data_sets("../../0_datasets/MNIST_data/", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()

Extracting ../../0_datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../../0_datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../0_datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../0_datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
correct_prediction.shape:  (?,)
accuracy.shape:  ()
After 0 training step(s), validation accuracy using average model is 0.116 
After 1000 training step(s), validation accuracy using average model is 0.977 
After 2000 training step(s), validation accuracy using average model is 0.982 
After 3000 training step(s), validation accuracy using average model is 0.9836 
After 4000 training step(s), validation accuracy using average model is 0.9834 
After 5000 training step(s), validation accuracy using average model is 0.9846 
After 6000 training step(s), validation accuracy using average model is 0.9828 
After 7000 training step(s), validation accuracy using average model is 0.9838 
After 8000 training step(s), validation accuracy usi