In [13]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# MNIST数据集相关常数
INPUT_NODE = 784
OUTPUT_NODE = 10

# 配置神经网络参数
LAYER1_NODE = 500
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99

# 辅助函数:计算神经网络前向传播结果
# layer1为隐藏层，实现三层结构
# 使用ReLu激活函数去线性化
# 支持滑动平均模型计算变量
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1)+biases1)
        return tf.matmul(layer1, weights2)+biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1))+avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2))+avg_class.average(biases2)

# 训练模型
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    '''
    生成权重与偏移量
    '''
    # 生成隐藏层参数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层参数
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    '''
    前向传播
    '''
    # 无滑动平均的前向传播
    y = inference(x, None, weights1, biases1, weights2, biases2)
    # 定义训练轮数
    global_step = tf.Variable(0, trainable=False)
    # 初始化滑动平均模型
    # 给定训练轮数可以加快训练早期变量的更新速度
    variable_averages = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step)
    # 定义滑动平均模型更新操作
    # trainable_variables()函数获取所有神经网络中可训练的变量
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # 带滑动平均的前向传播
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    '''
    损失函数
    '''
    # 计算损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_,1), logits=y)#labels神经网络期望的输出，logits神经网络最后一层的输入即softmax层
    # 计算当前batch中所有样例的交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    # 定义L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 计算模型的正则化损失
    regularization = regularizer(weights1)+regularizer(weights2)
    # 最终的总损失函数
    loss = cross_entropy_mean+regularization
    
    '''
    反向传播
    '''
    # 设置指数衰减的学习率
    # global_step当前迭代的轮数
    # mnist.train.num_examples/BATCH_SIZE过完所有训练数据需要的迭代次数
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
    # 定义优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    '''
    1、依据反向传播更新神经网络参数
    2、更新每一个参数的滑动平均值
    '''
    # 更新参数[反向传播+滑动平均]
    # Tensorflow提供两种机制tf.control_dependencies和tf.group
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')
    # 等价于
    # train_op = tf.group(train_step, variables_averages_op)
    
    '''
    检验使用滑动平均模型的神经网络前向传播结果是否正确
    '''
    # 计算模型预测精度
    # tf.argmax()函数返回最大值的下标
    # tf.equal()函数比较两个张量是否相等
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_,1))
    # 将比尔型转化为float32并求平均值，即得一组数据的正确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 初始化会话并开始训练
    with tf.Session() as sess:
        # 初始化所有变量
        tf.global_variables_initializer().run()
        # 准备验证数据
        validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
        # 准备测试数据
        test_feed = {x:mnist.test.images, y_:mnist.test.labels}
    
        # 迭代训练神经网络
        for i in range(TRAINING_STEPS):
            if i%1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g"%(i, validate_acc))
            
            # 产生本轮batch的训练数据，并运行训练程序
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs, y_:ys})
    
        # 训练结束，显示最终正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g"%(TRAINING_STEPS, test_acc))
    
# 主程序入口
def main(argv=None):
    # 如果指定路径下没有数据，则自动下载
    mnist = input_data.read_data_sets("MNIST_DATA", one_hot=True)
    train(mnist)

# TensorFlow提供的一个主程序入口
# tf.app.run()会调用上面的main()
if __name__ == '__main__':
    tf.app.run()

Extracting MNIST_DATA/train-images-idx3-ubyte.gz
Extracting MNIST_DATA/train-labels-idx1-ubyte.gz
Extracting MNIST_DATA/t10k-images-idx3-ubyte.gz
Extracting MNIST_DATA/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.0828
After 1000 training step(s), validation accuracy using average model is 0.9754
After 2000 training step(s), validation accuracy using average model is 0.9816
After 3000 training step(s), validation accuracy using average model is 0.9832
After 4000 training step(s), validation accuracy using average model is 0.983
After 5000 training step(s), validation accuracy using average model is 0.984
After 6000 training step(s), validation accuracy using average model is 0.983
After 7000 training step(s), validation accuracy using average model is 0.984
After 8000 training step(s), validation accuracy using average model is 0.9836
After 9000 training step(s), validation accuracy using average model is 0.9838
After 10000 training st

NameError: name 'TREANING_STEPS' is not defined