In [2]:
# -*- coding:utf-8 -*-
'''
本例是使用TensorBoard
part2: mnist_train.py
主要用来训练神经网络，并将测试与验证数据分离
'''
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference

import time

# 配置神经网络参数
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 30000
MOVING_AVERAGE_DECAY = 0.99
# 模型保存的路径和文件名
MODEL_SAVE_PATH = 'MNIST_MODEL'
MODEL_NAME = 'model.ckpt'

'''
训练模型
'''
def train(mnist):
    # 定义输入输出的placeholder
    # 将处理输入数据的计算都放在名字input的命名空间
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
    
    # 定义L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) 
    # 前向传播
    y = mnist_inference.inference(x, None, regularizer)
    global_step = tf.Variable(0, trainable=False)
    
    # 带滑动平均模型的前向传播
    # 将处理滑动平均相关的计算都放在moving_average的命名空间
    with tf.name_scope('moving_average'):
        variable_averages = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY, num_updates=global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
    #average_y = mnist_inference.inference(x, variable_averages, regularizer)
    
    # 计算损失函数
    # 将计算损失函数放在loss_function的命名空间
    with tf.name_scope('loss_function'):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_,1), logits=y)
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        loss = cross_entropy_mean+tf.add_n(tf.get_collection('losses'))
    
    # 反向传播
    # 将定义学习率、优化方法及训练操作都放在train_step的命名空间
    with tf.name_scope('train_step'):
        # 设置指数衰减的学习率
        learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
        # 定义优化损失函数
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
        # 更新参数[不带滑动平均]
        # 下面的语句将无效，然后在sess.run中，改为：
        # sess.run(train_step, feed_dict={x:xs, y_:ys})
        # 更新参数[反向传播+滑动平均]
        with tf.control_dependencies([train_step, variables_averages_op]):
            train_op = tf.no_op(name='train')# 什么也不做
    
        # 初始化Tensorflow持久化类
        saver = tf.train.Saver()
        
        # 将当前的计算图输出到TensorBoard日志文件
        writer = tf.summary.FileWriter('log/'+str(int(time.time())), tf.get_default_graph())
    
        # 初始化会话并开始训练
        with tf.Session() as sess:
            # 初始化所有变量
            tf.global_variables_initializer().run()
        
            # 迭代训练神经网络
            for i in range(TRAINING_STEPS):
                # 产生本轮batch的训练数据，并运行训练程序
                xs, ys = mnist.train.next_batch(BATCH_SIZE)
                
                # 每1000轮保存一次模型
                # 记录运行状态
                if i%1000 == 0:
                    # 配置运行时需要记录的信息
                    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                    # 运行时记录运行信息的proto
                    run_metadata = tf.RunMetadata()
                    # 将以上两个参数传入sess.run完成信息的记录
                    _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x:xs, y_:ys}, options=run_options, run_metadata=run_metadata)
                    # 将节点信息写入日志文件
                    writer.add_run_metadata(run_metadata, 'step%03d'%i)
                    # 通过损失函数的大小了解本轮训练的基本情况
                    print("After %d training step(s), loss on training batch is %g"%(step, loss_value))
                    # 保存模型，给出global_step参数可以让每个被保存的文件名末尾加上训练的轮数
                    saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
                else:
                    _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x:xs, y_:ys})
    
        writer.close()
    
# 主程序入口
def main(argv=None):
    # 如果指定路径下没有数据，则自动下载
    mnist = input_data.read_data_sets("MNIST_DATA", one_hot=True)
    train(mnist)
# TensorFlow提供的一个主程序入口
if __name__ == '__main__':
    tf.app.run()

Extracting MNIST_DATA/train-images-idx3-ubyte.gz
Extracting MNIST_DATA/train-labels-idx1-ubyte.gz
Extracting MNIST_DATA/t10k-images-idx3-ubyte.gz
Extracting MNIST_DATA/t10k-labels-idx1-ubyte.gz
After 1 training step(s), loss on training batch is 3.17171
After 1001 training step(s), loss on training batch is 0.264185
After 2001 training step(s), loss on training batch is 0.167638
After 3001 training step(s), loss on training batch is 0.144205
After 4001 training step(s), loss on training batch is 0.141952
After 5001 training step(s), loss on training batch is 0.103511
After 6001 training step(s), loss on training batch is 0.107374
After 7001 training step(s), loss on training batch is 0.0870436
After 8001 training step(s), loss on training batch is 0.0855941
After 9001 training step(s), loss on training batch is 0.0721913
After 10001 training step(s), loss on training batch is 0.0671718
After 11001 training step(s), loss on training batch is 0.0691625
After 12001 training step(s), loss 

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
