In [1]:
import warnings
warnings.filterwarnings("ignore")
import os, time, sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf

[TensorFlow中RNN实现的正确打开方式
](https://zhuanlan.zhihu.com/p/28196873)

In [3]:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=128)
print(cell.state_size, cell.output_size)

128 128


In [4]:
inputs = tf.placeholder(np.float32, shape=(32, 100))
h0 = cell.zero_state(32, np.float32)  # 通过zero_state得到一个全0的初始状态
outputs, h1 = cell.__call__(inputs, h0)  # call 一个 time step
print(outputs)
print(h1)

Tensor("basic_rnn_cell/Tanh:0", shape=(32, 128), dtype=float32)
Tensor("basic_rnn_cell/Tanh:0", shape=(32, 128), dtype=float32)


In [6]:
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=128)   # LSTM
inputs = tf.placeholder(np.float32, shape=(32, 100)) 
h0 = lstm_cell.zero_state(32, np.float32) 
outputs, h1 = lstm_cell.__call__(inputs, h0)

print(h1.h)  # shape=(32, 128)
print(h1.c)  # shape=(32, 128)

Tensor("basic_lstm_cell_1/Mul_2:0", shape=(32, 128), dtype=float32)
Tensor("basic_lstm_cell_1/Add_1:0", shape=(32, 128), dtype=float32)


In [7]:
def get_a_cell():
    return tf.nn.rnn_cell.LSTMCell(num_units=128)

cell = tf.nn.rnn_cell.MultiRNNCell([get_a_cell() for _ in range(3)])  # MultiRNNCell
print(cell.state_size, cell.output_size)

(LSTMStateTuple(c=128, h=128), LSTMStateTuple(c=128, h=128), LSTMStateTuple(c=128, h=128)) 128


In [8]:
inputs = tf.placeholder(tf.float32, shape=(32, 100))
h0 = cell.zero_state(32, tf.float32)
outputs, h1 = cell.__call__(inputs, h0)
h1

(LSTMStateTuple(c=<tf.Tensor 'multi_rnn_cell/cell_0/lstm_cell/add_1:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'multi_rnn_cell/cell_0/lstm_cell/mul_2:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'multi_rnn_cell/cell_1/lstm_cell/add_1:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'multi_rnn_cell/cell_1/lstm_cell/mul_2:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'multi_rnn_cell/cell_2/lstm_cell/add_1:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'multi_rnn_cell/cell_2/lstm_cell/mul_2:0' shape=(32, 128) dtype=float32>))

In [9]:
outputs

<tf.Tensor 'multi_rnn_cell/cell_2/lstm_cell/mul_2:0' shape=(32, 128) dtype=float32>

In [10]:
h1[2][1] == outputs  # state 的最后一个与 output 相等

True

In [11]:
tf.reset_default_graph()

In [12]:
def get_a_cell():
    return tf.nn.rnn_cell.LSTMCell(num_units=128)

cell = tf.nn.rnn_cell.MultiRNNCell([get_a_cell() for _ in range(3)])

inputs = tf.placeholder(tf.float32, shape=(32, 100, 50))
outputs, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)  # dynamic RNN

In [13]:
outputs, len(tf.unstack(outputs, axis=1))

(<tf.Tensor 'rnn/transpose_1:0' shape=(32, 100, 128) dtype=float32>, 100)

In [14]:
state

(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_3:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_4:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_5:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_6:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_7:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_8:0' shape=(32, 128) dtype=float32>))

In [15]:
tf.reset_default_graph()
def get_a_cell():
    return tf.nn.rnn_cell.LSTMCell(num_units=128)

cell = tf.nn.rnn_cell.MultiRNNCell([get_a_cell() for _ in range(3)])

inputs = tf.placeholder(tf.float32, shape=(32, 100, 50))
inputs = tf.unstack(inputs, axis=1)
outputs, state = tf.nn.static_rnn(cell, inputs, dtype=tf.float32)  # static rnn

In [16]:
outputs

[<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_2:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_5:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_8:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_11:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_14:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_17:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_20:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_23:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_26:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_29:0' shape=(32, 128) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/

In [17]:
state

(LSTMStateTuple(c=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_0/lstm_cell/add_199:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_0/lstm_cell/mul_299:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/lstm_cell/add_199:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/lstm_cell/mul_299:0' shape=(32, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/add_199:0' shape=(32, 128) dtype=float32>, h=<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_2/lstm_cell/mul_299:0' shape=(32, 128) dtype=float32>))

In [18]:
outputs[-1] == state[2][1]

True

### LSTM example

[https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py)

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [5]:
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

In [6]:
num_inputs = 28
time_steps = 28
num_hidden = 128
num_classes = 10
learning_rate = 0.001

In [7]:
tf.reset_default_graph()

In [8]:
X = tf.placeholder(tf.float32, shape=[None, time_steps, num_inputs])
y = tf.placeholder(tf.int32, shape=[None])

weights = tf.Variable(tf.random_normal([num_hidden, num_classes]))
biases = tf.Variable(tf.random_normal([num_classes]))

def RNN(x, weights, biases):
    x = tf.unstack(x, time_steps, 1)
    lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=num_hidden, forget_bias=1.0)   
    outputs, states = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights) + biases

def RNN2(x, weights, biases):
    lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=num_hidden)   ### GRU Cell
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
 #   outputs = tf.transpose(outputs, [1, 0, 2])
  #  return tf.matmul(outputs[-1], weights) + biases
  #  return tf.layers.dense(outputs[-1], num_classes)
    return tf.layers.dense(tf.reshape(outputs[:, -1, :], (-1, num_hidden)), num_classes)

logits = RNN2(X, weights, biases)
prediction = tf.nn.softmax(logits)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y))
# optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.cast(y, tf.int64))
# correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()

In [9]:
def generate_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [10]:
class mnist:
    def __init__(self, X, y):
        indices = np.random.permutation(len(X))
        self.X = X[indices]
        self.y = y[indices]
        self.i = 0
        
    def generate_batch(self, batch_size):
        a = self.X[self.i * batch_size : (self.i + 1) * batch_size]
        b = self.y[self.i * batch_size : (self.i + 1) * batch_size]
        self.i += 1
        return a, b

In [11]:
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

loss_summary = tf.summary.scalar("Loss", loss)
acc_train_summary = tf.summary.scalar("Training_Accuracy", accuracy)
acc_test_summary = tf.summary.scalar("Test_Accuracy", accuracy)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [12]:
learning_rate = 0.001
n_epochs = 10
batch_size = 128

with tf.Session() as sess:  # tensorboard
    init.run()
    for epoch in range(n_epochs):
        start_time = time.time()
        data = mnist(X_train, y_train)
        n_batch = len(X_train) // batch_size
        for i in range(n_batch):
            print("\r{}%".format(100 * i // n_batch), end="")
            sys.stdout.flush()
            X_batch, y_batch = data.generate_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_train, acc_train = sess.run([loss, accuracy], feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        summary_loss, summary_train_acc = sess.run([loss_summary, acc_train_summary], \
                                                    feed_dict={X: X_batch, y: y_batch})
        summary_test_acc = sess.run(acc_test_summary, feed_dict={X: X_test, y: y_test})
        file_writer.add_summary(summary_loss)
        file_writer.add_summary(summary_train_acc)
        file_writer.add_summary(summary_test_acc)
        print("\rEpoch ", str(epoch + 1), 
              "\ttraining loss: {:.4f}".format(loss_train), 
              "\ttraining accuracy: {:.4f}".format(acc_train), 
              "\ttest accuracy: {:.4f}".format(acc_test), 
              "\ttime: {:.2f}".format(time.time() - start_time))

Epoch  1 	training loss: 0.14 	training accuracy: 0.97 	test accuracy: 0.96 	time: 39.84
Epoch  2 	training loss: 0.11 	training accuracy: 0.97 	test accuracy: 0.97 	time: 35.63
Epoch  3 	training loss: 0.04 	training accuracy: 0.99 	test accuracy: 0.98 	time: 35.07
Epoch  4 	training loss: 0.01 	training accuracy: 1.00 	test accuracy: 0.98 	time: 34.84
Epoch  5 	training loss: 0.03 	training accuracy: 0.99 	test accuracy: 0.98 	time: 35.43
Epoch  6 	training loss: 0.07 	training accuracy: 0.98 	test accuracy: 0.99 	time: 35.08
Epoch  7 	training loss: 0.03 	training accuracy: 0.98 	test accuracy: 0.98 	time: 35.07
Epoch  8 	training loss: 0.02 	training accuracy: 0.99 	test accuracy: 0.99 	time: 35.42
Epoch  9 	training loss: 0.01 	training accuracy: 1.00 	test accuracy: 0.99 	time: 35.27
Epoch  10 	training loss: 0.02 	training accuracy: 1.00 	test accuracy: 0.99 	time: 35.62
