In [1]:
import warnings
warnings.filterwarnings('ignore')  # 不打印 warning 

import tensorflow as tf

# 设置GPU按需增长
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

import numpy as np

# 用tensorflow 导入数据
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../../../../data/MNIST_data', one_hot=True) 

# 看看咱们样本的数量
print(mnist.test.labels.shape)
print(mnist.train.labels.shape)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../../data/MNIST_data\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../../data/MNIST_data\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ../../../../data/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ../../../../data/MNIST_data\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
(10000, 10)
(55000, 10)


In [2]:
lr = 1e-3
input_size = 28      # 每个时刻的输入特征是28维的，就是每个时刻输入一行，一行有 28 个像素
timestep_size = 28   # 时序持续长度为28，即每做一次预测，需要先输入28行
hidden_size = 256    # 隐含层的数量
layer_num = 2        # LSTM layer 的层数
class_num = 10       # 最后输出分类类别数量，如果是回归预测的话应该是 1
cell_type = "block_gru"   # gru 或者 block_gru

In [3]:
X_input = tf.placeholder(tf.float32, [None,784])
y_input = tf.placeholder(tf.float32, [None, class_num])
batch_size = tf.placeholder(tf.int32, [])
keep_prob = tf.placeholder(tf.float32, [])

In [4]:
X = tf.reshape(X_input, [-1, 28, 28])

In [5]:
def gru_cell(cell_type, num_nodes, keep_prob):
    assert(cell_type in ['gru', 'block_gru'], 'wrong cell type')
    if cell_type == 'gru':
        cell = tf.contrib.rnn.GRUCell(num_units=num_nodes)
    else:
        cell = tf.contrib.rnn.GRUBlockCellV2(num_units=num_nodes)
    cell = tf.contrib.rnn.DropoutWrapper(cell=cell, output_keep_prob=keep_prob)
    return cell

In [6]:
cells_fw = [gru_cell(cell_type=cell_type, num_nodes=hidden_size, keep_prob=keep_prob) for _ in range(layer_num)]
cells_bw = [gru_cell(cell_type=cell_type, num_nodes=hidden_size, keep_prob=keep_prob) for _ in range(layer_num)]

In [7]:
outputs, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
    cells_bw=cells_bw, cells_fw=cells_fw, inputs=X, dtype=tf.float32)
print(outputs.shape)

(?, 28, 512)


In [8]:
h_state = tf.reduce_mean(axis=1, input_tensor=outputs, keepdims=False)
print(h_state)

Tensor("Mean:0", shape=(?, 512), dtype=float32)


In [9]:
import time

In [10]:
W = tf.Variable(tf.truncated_normal([hidden_size*2, class_num], stddev=0.1), 
                dtype=tf.float32)
b = tf.Variable(tf.constant(0.1, shape=[class_num]), dtype=tf.float32)
y_pre = tf.nn.softmax(tf.matmul(h_state, W) + b)

In [11]:
cross_entropy = -tf.reduce_mean(y_input*tf.log(y_pre))
train_op = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [12]:
correct_pred = tf.equal(tf.argmax(y_input,1), tf.argmax(y_pre, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, 'float'))

In [13]:
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter('./log', sess.graph)

In [15]:
time0 = time.time()
for i in range(201):
    _batch_size = 100
    X_batch, y_batch = mnist.train.next_batch(batch_size=_batch_size)
    cost, acc, _ = sess.run([cross_entropy, accuracy, train_op], 
                            feed_dict={X_input:X_batch, y_input:y_batch, keep_prob:0.5, batch_size:_batch_size})
    if i%50 == 0:
        test_acc = 0.0
        test_cost = 0.0
        N = 10
        for j in range(N):
            X_batch, y_batch = mnist.test.next_batch(batch_size=_batch_size)
            _cost,_acc = sess.run([cross_entropy, accuracy], 
                                  feed_dict={X_input:X_batch, y_input:y_batch, keep_prob:1, batch_size:_batch_size})
            test_acc += _acc
            test_cost += _cost
        print("step {}, train cost={:.6f}, acc={:.6f}; test cost={:.6f}, acc={:.6f}; pass {}s".format(i+1, cost, acc, test_cost/N, test_acc/N, time.time() - time0))

step 1, train cost=0.031210, acc=0.890000; test cost=0.028272, acc=0.903000; pass 2.911722183227539s
step 51, train cost=0.011690, acc=0.960000; test cost=0.023203, acc=0.933000; pass 36.240747928619385s
step 101, train cost=0.007786, acc=0.970000; test cost=0.016109, acc=0.938000; pass 70.10521221160889s
step 151, train cost=0.026945, acc=0.920000; test cost=0.019548, acc=0.943000; pass 100.65363097190857s
step 201, train cost=0.010894, acc=0.960000; test cost=0.012655, acc=0.959000; pass 131.8401596546173s
