# 循环神经网络

In [106]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import warnings

warnings.simplefilter('ignore')

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [107]:
import tensorflow as tf

# 基本RNN

## 手工 RNN

In [112]:
tf.reset_default_graph()

n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

H0 = tf.tanh(tf.matmul(X0, Wx) + b)
H1 = tf.tanh(tf.matmul(H0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [113]:
X0_batch = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]]) # t = 0
X1_batch = np.array([[4, 5, 6], [5, 6, 7], [6, 7, 8], [7, 8, 9]]) # t = 1

with tf.Session() as sess:
    init.run()
    H0_val, H1_val = sess.run([H0, H1], feed_dict={X0: X0_batch, X1: X1_batch})

In [114]:
print(H0_val.shape==H1_val.shape)
print(H0_val)

True
[[ 0.          0.          0.          0.          0.        ]
 [-0.98710734 -0.987816    0.2626741  -0.44695795 -0.998247  ]
 [-0.9999159  -0.99992496  0.49143997 -0.7450718  -0.9999985 ]
 [-0.9999995  -0.9999997   0.6678964  -0.8942353  -1.        ]]


## 使用 `static_rnn()`

In [3]:
n_inputs = 3
n_neurons = 5

In [17]:
tf.reset_default_graph()

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.nn.static_rnn(basic_cell, [X0, X1], dtype=tf.float32)

H0, H1 = output_seqs

In [20]:
init = tf.global_variables_initializer()

X0_batch = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]])
X1_batch = np.array([[4, 5, 6], [5, 6, 7], [6, 7, 8], [7, 8, 9]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([H0, H1], feed_dict={X0: X0_batch, X1: X1_batch})
    s_val, H0_val, H1_val = sess.run([states,H0,H1], feed_dict={X0: X0_batch, X1: X1_batch})
    output_seqs_val =sess.run([output_seqs], feed_dict={X0: X0_batch, X1: X1_batch})

In [19]:
print(s_val == H1_val)

[[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]]


In [33]:
print(len(output_seqs_val[0]))

2


## 使用 `dynamic_rnn()`

In [24]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [25]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

In [34]:
X_batch = np.array([
        [[0, 0, 0], [4, 5, 6]], # instance 1
        [[1, 1, 1], [5, 6, 7]], # instance 2
        [[2, 2, 2], [6, 7, 8]], # instance 3
        [[3, 3, 3], [7, 8, 9]], # instance 4
    ])
X_batch.shape

(4, 2, 3)

In [27]:
init = tf.global_variables_initializer()
X_batch = np.array([
        [[0, 0, 0], [4, 5, 6]], # instance 1
        [[1, 1, 1], [5, 6, 7]], # instance 2
        [[2, 2, 2], [6, 7, 8]], # instance 3
        [[3, 3, 3], [7, 8, 9]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    states_val = states.eval(feed_dict={X: X_batch})

In [28]:
print(states_val)

[[-0.9577261  -0.9951577  -0.99985605 -0.7528284  -0.7155064 ]
 [-0.9387486  -0.99966097 -0.99998814 -0.48122504 -0.93438   ]
 [-0.9542085  -0.9999398  -0.9999989  -0.30927062 -0.97431576]
 [-0.9739977  -0.9999842  -0.9999997  -0.25144023 -0.9872973 ]]


In [36]:
print(outputs_val[:,1,:])

[[-0.9577261  -0.9951577  -0.99985605 -0.7528284  -0.7155064 ]
 [-0.9387486  -0.99966097 -0.99998814 -0.48122504 -0.93438   ]
 [-0.9542085  -0.9999398  -0.9999989  -0.30927062 -0.97431576]
 [-0.9739977  -0.9999842  -0.9999997  -0.25144023 -0.9872973 ]]


## 处理变长输入

In [41]:
n_steps = 2
n_inputs = 3
n_neurons = 5

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)

In [46]:
seq_length = tf.placeholder(tf.int32, None)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
                                    sequence_length=seq_length)

In [67]:
init = tf.global_variables_initializer()
X_batch = np.array([
        # 时间步 0     时间步 1
        [[0, 0, 0], [4, 5, 6]], # 实例 1
        [[1, 1, 1], [5, 6, 7]], # 实例 2
        [[2, 2, 2], [6, 7, 8]], # 实例 3（长度为 1，后面的数据被丢弃）
        [[3, 3, 3], [7, 8, 9]], # 实例 4
    ])
seq_length_batch = np.array([2, 2, 1, 2]) # 实例 3 的长度被设为 1

In [68]:
with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})

In [69]:
print(outputs_val)

[[[ 0.          0.          0.          0.          0.        ]
  [ 0.9999274   0.78506315 -0.99982464  0.45810056  0.9633542 ]]

 [[ 0.8261823   0.28237453 -0.6885522   0.02922334  0.45902106]
  [ 0.99999136  0.7682828  -0.9999783   0.8550243   0.9970976 ]]

 [[ 0.9820439   0.523044   -0.9341975   0.05839681  0.7582737 ]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 0.9982769   0.701771   -0.9875282   0.08747088  0.90299493]
  [ 0.9999999   0.94575775 -0.9999997   0.9217818   0.99974984]]]


In [70]:
print(states_val)

[[ 0.9999274   0.78506315 -0.99982464  0.45810056  0.9633542 ]
 [ 0.99999136  0.7682828  -0.9999783   0.8550243   0.9970976 ]
 [ 0.9820439   0.523044   -0.9341975   0.05839681  0.7582737 ]
 [ 0.9999999   0.94575775 -0.9999997   0.9217818   0.99974984]]


## Tensorflow 循环网络实战

In [78]:
tf.reset_default_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
    y = tf.placeholder(tf.int32, [None])

with tf.name_scope("rnn"):
    basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
    outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
    
with tf.name_scope("fc"):
    logits = tf.layers.dense(states, n_outputs)

with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [79]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./tmp/data/")
X4 = mnist.test.images
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Extracting ./tmp/data/train-images-idx3-ubyte.gz
Extracting ./tmp/data/train-labels-idx1-ubyte.gz
Extracting ./tmp/data/t10k-images-idx3-ubyte.gz
Extracting ./tmp/data/t10k-labels-idx1-ubyte.gz


In [77]:
y_test.shape

(10000,)

In [80]:
n_epochs = 100
batch_size = 100

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.95 Test accuracy: 0.9258
1 Train accuracy: 0.99 Test accuracy: 0.9384
2 Train accuracy: 0.99 Test accuracy: 0.9598
3 Train accuracy: 0.93 Test accuracy: 0.9653
4 Train accuracy: 0.98 Test accuracy: 0.9649
5 Train accuracy: 1.0 Test accuracy: 0.9685
6 Train accuracy: 0.98 Test accuracy: 0.9722
7 Train accuracy: 0.98 Test accuracy: 0.9662
8 Train accuracy: 0.99 Test accuracy: 0.9681
9 Train accuracy: 0.96 Test accuracy: 0.9646
10 Train accuracy: 1.0 Test accuracy: 0.9731
11 Train accuracy: 0.97 Test accuracy: 0.9645
12 Train accuracy: 0.98 Test accuracy: 0.977
13 Train accuracy: 0.95 Test accuracy: 0.9691
14 Train accuracy: 0.97 Test accuracy: 0.9758
15 Train accuracy: 1.0 Test accuracy: 0.9774
16 Train accuracy: 0.99 Test accuracy: 0.9789
17 Train accuracy: 0.95 Test accuracy: 0.9732
18 Train accuracy: 0.98 Test accuracy: 0.9714
19 Train accuracy: 1.0 Test accuracy: 0.978
20 Train accuracy: 0.99 Test accuracy: 0.9738
21 Train accuracy: 0.97 Test accuracy: 0.9737
22 T

## tf.estimator 循环网络实战

In [81]:
def rnn_model_fn(features, labels, mode):
  input_layer = tf.reshape(features["x"], [-1, 28, 28])

  basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=150)
  outputs, states = tf.nn.dynamic_rnn(basic_cell, input_layer, dtype=tf.float32)

  logits = tf.layers.dense(states, n_outputs)

  predictions = {
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])}
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [83]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./tmp/data/")
train_data = mnist.train.images  # 返回的是 np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # 返回的是 np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

# 创建 Estimator 对象，模型函数使用 rnn_model_fn，模型保存在 model_dir 目录中
mnist_classifier = tf.estimator.Estimator(
    model_fn=rnn_model_fn, model_dir="/tmp/mnist_rnn_model")

# 训练模型
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": train_data}, y=train_labels, batch_size=100,
    num_epochs=None, shuffle=True)
mnist_classifier.train( input_fn=train_input_fn, steps=20000)

# 评估模型并打印评估结果
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

Extracting ./tmp/data/train-images-idx3-ubyte.gz
Extracting ./tmp/data/train-labels-idx1-ubyte.gz
Extracting ./tmp/data/t10k-images-idx3-ubyte.gz
Extracting ./tmp/data/t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_master': '', '_log_step_count_steps': 100, '_session_config': None, '_service': None, '_is_chief': True, '_task_type': 'worker', '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/mnist_rnn_model', '_num_worker_replicas': 1, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc308251eb8>}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/mnist_rnn_model/model.ckpt.
INFO:tensorflow:step = 1, loss = 2.3687696
INFO:tensorflow:global_step/sec: 24.9666
INFO:tensorflow:

INFO:tensorflow:step = 7401, loss = 0.29265976 (3.976 sec)
INFO:tensorflow:global_step/sec: 25.1972
INFO:tensorflow:step = 7501, loss = 0.33486122 (3.969 sec)
INFO:tensorflow:global_step/sec: 25.3579
INFO:tensorflow:step = 7601, loss = 0.27511376 (3.944 sec)
INFO:tensorflow:global_step/sec: 23.6245
INFO:tensorflow:step = 7701, loss = 0.47377288 (4.233 sec)
INFO:tensorflow:global_step/sec: 24.8572
INFO:tensorflow:step = 7801, loss = 0.30049488 (4.023 sec)
INFO:tensorflow:global_step/sec: 25.1435
INFO:tensorflow:step = 7901, loss = 0.3639042 (3.977 sec)
INFO:tensorflow:global_step/sec: 25.3183
INFO:tensorflow:step = 8001, loss = 0.2357262 (3.950 sec)
INFO:tensorflow:global_step/sec: 25.3166
INFO:tensorflow:step = 8101, loss = 0.2941084 (3.950 sec)
INFO:tensorflow:global_step/sec: 25.3183
INFO:tensorflow:step = 8201, loss = 0.3178193 (3.950 sec)
INFO:tensorflow:global_step/sec: 25.1629
INFO:tensorflow:step = 8301, loss = 0.41584778 (3.974 sec)
INFO:tensorflow:global_step/sec: 25.1618
INFO

INFO:tensorflow:step = 15501, loss = 0.0618818 (4.006 sec)
INFO:tensorflow:global_step/sec: 25.2271
INFO:tensorflow:step = 15601, loss = 0.19846804 (3.962 sec)
INFO:tensorflow:global_step/sec: 25.2996
INFO:tensorflow:step = 15701, loss = 0.14752124 (3.953 sec)
INFO:tensorflow:global_step/sec: 25.0879
INFO:tensorflow:step = 15801, loss = 0.16215582 (3.985 sec)
INFO:tensorflow:global_step/sec: 25.1228
INFO:tensorflow:step = 15901, loss = 0.168251 (3.981 sec)
INFO:tensorflow:global_step/sec: 24.5264
INFO:tensorflow:step = 16001, loss = 0.22507143 (4.077 sec)
INFO:tensorflow:global_step/sec: 24.7463
INFO:tensorflow:step = 16101, loss = 0.2095325 (4.042 sec)
INFO:tensorflow:global_step/sec: 24.1559
INFO:tensorflow:step = 16201, loss = 0.22724357 (4.139 sec)
INFO:tensorflow:global_step/sec: 25.1971
INFO:tensorflow:step = 16301, loss = 0.25522813 (3.969 sec)
INFO:tensorflow:global_step/sec: 24.8618
INFO:tensorflow:step = 16401, loss = 0.14047818 (4.022 sec)
INFO:tensorflow:global_step/sec: 25

## tf.keras 循环网络实战

In [108]:
from tensorflow.python.keras.datasets import mnist
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, SimpleRNN, LSTM, GRU
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras.losses import categorical_crossentropy
from tensorflow.python.keras.optimizers import Adadelta

In [109]:
batch_size = 128
num_classes = 10
epochs = 12
steps = 28
input_size = 28

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], steps, input_size)
x_test = x_test.reshape(x_test.shape[0], steps, input_size)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# 将 y_train 和 y_test 转换成 one-hot 编码
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [110]:
model = Sequential()
model.add(SimpleRNN(
    units=150,
    batch_input_shape=(None, steps, input_size)
    ))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer=Adadelta(),
              metrics=['accuracy'])

In [111]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12

Test loss: 0.08116694699320942
Test accuracy: 0.9756


## LSTM 网络

In [None]:
model = Sequential()
model.add(LSTM(
    units=150,
    batch_input_shape=(None, steps, input_size)
    ))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer=Adadelta(),
              metrics=['accuracy'])

In [102]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12

Test loss: 0.05405718424441293
Test accuracy: 0.9834


## GRU 网络

In [104]:
model = Sequential()
model.add(GRU(
    units=150,
    batch_input_shape=(None, steps, input_size)
    ))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=categorical_crossentropy,
              optimizer=Adadelta(),
              metrics=['accuracy'])

In [105]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/12

Epoch 2/12

Epoch 3/12

Epoch 4/12

Epoch 5/12

Epoch 6/12

Epoch 7/12

Epoch 8/12

Epoch 9/12

Epoch 10/12

Epoch 11/12

Epoch 12/12

Test loss: 0.06714096867516636
Test accuracy: 0.9787
