In [1]:
import tensorflow as tf
import numpy as np

In [2]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# TensorFlow中的基本RNN

In [3]:
n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, shape=(None, n_inputs))
X1 = tf.placeholder(tf.float32, shape=(None, n_inputs))

Wx = tf.Variable(tf.random_normal(shape=(n_inputs, n_neurons), dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=(n_neurons, n_neurons), dtype=tf.float32))
b = tf.Variable(tf.zeros(shape=(1, n_neurons), dtype=tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

In [4]:
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    
print(Y0_val)
print(Y1_val)

[[ 0.8960613  -0.2378181  -0.13065062 -0.7704926   0.83936375]
 [ 0.9996792  -0.69378126 -0.9424555   0.9999672   0.02875524]
 [ 0.99999905 -0.8991875  -0.99771976  1.         -0.82152474]
 [ 0.99748886  0.99777275 -0.9999996   1.         -1.        ]]
[[ 0.99999654 -0.98873156 -0.99986076  1.         -0.99999905]
 [ 0.8602821  -0.6243707   0.80740273 -0.9987659  -0.99889207]
 [ 0.99999183 -0.94118714 -0.99101806  1.         -0.99998206]
 [ 0.9997068  -0.9877578  -0.34135965  0.9993494  -0.99935824]]


# 通过时间静态展开

In [5]:
reset_graph()

X0 = tf.placeholder(tf.float32, shape=(None, n_inputs))
X1 = tf.placeholder(tf.float32, shape=(None, n_inputs))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype=tf.float32)

Y0, Y1 = output_seqs

init = tf.global_variables_initializer()

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    
print(Y0_val)
print(Y1_val)

[[ 0.30741334 -0.32884315 -0.6542847  -0.9385059   0.52089024]
 [ 0.99122757 -0.9542542  -0.7518079  -0.9995208   0.9820235 ]
 [ 0.9999268  -0.99783254 -0.8247353  -0.9999963   0.99947774]
 [ 0.996771   -0.68750614  0.8419969   0.9303911   0.8120684 ]]
[[ 0.99998885 -0.9997605  -0.06679298 -0.9999804   0.99982214]
 [-0.6524944  -0.51520866 -0.37968954 -0.59225935 -0.08968385]
 [ 0.998624   -0.997152   -0.03308626 -0.9991565   0.9932902 ]
 [ 0.99681675 -0.9598194   0.39660636 -0.8307605   0.7967197 ]]


In [6]:
reset_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs, dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

init = tf.global_variables_initializer()

X_batch = np.array([
    [[0, 1, 2], [9, 8, 7]],
    [[3, 4, 5], [0, 0, 0]],
    [[6, 7, 8], [6, 5, 4]],
    [[9, 0, 1], [3, 2, 1]],
])
print(X_batch.shape)

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    
print(outputs_val)

(4, 2, 3)
[[[-0.45652324 -0.68064123  0.40938237  0.63104504 -0.45732826]
  [-0.94288003 -0.9998869   0.94055814  0.9999985  -0.9999997 ]]

 [[-0.8001535  -0.9921827   0.7817797   0.9971031  -0.9964609 ]
  [-0.637116    0.11300932  0.5798437   0.43105593 -0.63716984]]

 [[-0.93605185 -0.9998379   0.9308867   0.9999815  -0.99998295]
  [-0.9165386  -0.9945604   0.89605415  0.99987197 -0.9999751 ]]

 [[ 0.9927369  -0.9981933  -0.55543643  0.9989031  -0.9953323 ]
  [-0.02746334 -0.73191994  0.7827872   0.9525682  -0.97817713]]]


# 通过时间动态展开

In [8]:
reset_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

init = tf.global_variables_initializer()

X_batch = np.array([
    [[0, 1, 2], [9, 8, 7]],
    [[3, 4, 5], [0, 0, 0]],
    [[6, 7, 8], [6, 5, 4]],
    [[9, 0, 1], [3, 2, 1]],
])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    
print(outputs_val)

[[[-0.85115266  0.87358344  0.5802911   0.8954789  -0.0557505 ]
  [-0.9999959   0.9999958   0.9981815   1.          0.37679598]]

 [[-0.9983293   0.9992038   0.98071456  0.999985    0.2519265 ]
  [-0.70818055 -0.07723375 -0.8522789   0.5845348  -0.7878095 ]]

 [[-0.9999827   0.99999535  0.9992863   1.          0.5159071 ]
  [-0.9993956   0.9984095   0.83422637  0.9999999  -0.47325212]]

 [[ 0.87888587  0.07356028  0.97216916  0.9998546  -0.7351168 ]
  [-0.91345143  0.36009577  0.7624866   0.99817705  0.80142   ]]]


# 输入序列长度可变

In [9]:
reset_graph()

n_steps = 2
n_inputs = 3
n_neurons = 5

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)

seq_length = tf.placeholder(tf.int32, shape=(None))

outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
                                    sequence_length=seq_length)

init = tf.global_variables_initializer()

X_batch = np.array([
    [[0, 1, 2], [9, 8, 7]],
    [[3, 4, 5], [0, 0, 0]],
    [[6, 7, 8], [6, 5, 4]],
    [[9, 0, 1], [3, 2, 1]],
])
seq_length_batch = np.array([2, 1, 2, 2])

with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})
    
print(outputs_val)

[[[ 0.89885193  0.8505537   0.56778944 -0.8374146  -0.2971686 ]
  [ 0.3383331   0.99999976  0.9999229  -0.81598383 -0.9884344 ]]

 [[ 0.9687243   0.9997934   0.9865295  -0.97417724 -0.7423587 ]
  [ 0.          0.          0.          0.          0.        ]]

 [[ 0.9905691   0.99999976  0.9996664  -0.99614036 -0.92241997]
  [ 0.03740451  0.9997974   0.9984369   0.17223889 -0.9581747 ]]

 [[-0.99994785  0.9370876   0.9552368   0.9998339  -0.9967721 ]
  [ 0.06362314  0.9876109   0.9854438   0.29195604  0.7661818 ]]]


In [10]:
print(states_val)

[[ 0.3383331   0.99999976  0.9999229  -0.81598383 -0.9884344 ]
 [ 0.9687243   0.9997934   0.9865295  -0.97417724 -0.7423587 ]
 [ 0.03740451  0.9997974   0.9984369   0.17223889 -0.9581747 ]
 [ 0.06362314  0.9876109   0.9854438   0.29195604  0.7661818 ]]


# 训练RNN

In [3]:
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))
y = tf.placeholder(tf.int32, shape=(None))

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

loss = tf.reduce_mean(xentropy)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

training_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [4]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [5]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]
print(X_train.shape)
# X_train = X_train.reshape((-1, n_steps, n_inputs))
# X_valid = X_valid.reshape((-1, n_steps, n_inputs))
# X_test = X_test.reshape((-1, n_steps, n_inputs))
# print(X_train.shape)

(55000, 28, 28)


In [6]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
#         for iteration in range(mnist.train.num_examples // batch_size):
#             X_batch, y_batch = mnist.train.next_batch(batch_size)
#             X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_train, y: y_train})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Training acc:", acc_train, "Validation acc:", acc_val)

0 Training acc: 0.514491 Validation acc: 0.515
1 Training acc: 0.534909 Validation acc: 0.5348
2 Training acc: 0.534782 Validation acc: 0.532
3 Training acc: 0.544873 Validation acc: 0.5388
4 Training acc: 0.549491 Validation acc: 0.5464
5 Training acc: 0.553927 Validation acc: 0.5448
6 Training acc: 0.557455 Validation acc: 0.555
7 Training acc: 0.558927 Validation acc: 0.5516
8 Training acc: 0.560018 Validation acc: 0.5524
9 Training acc: 0.567691 Validation acc: 0.5642
10 Training acc: 0.5698 Validation acc: 0.5726
11 Training acc: 0.572036 Validation acc: 0.5598
12 Training acc: 0.564964 Validation acc: 0.5606
13 Training acc: 0.572018 Validation acc: 0.5674
14 Training acc: 0.567873 Validation acc: 0.5656
15 Training acc: 0.572945 Validation acc: 0.5626
16 Training acc: 0.573382 Validation acc: 0.5696
17 Training acc: 0.584055 Validation acc: 0.5772
18 Training acc: 0.579618 Validation acc: 0.571
19 Training acc: 0.574255 Validation acc: 0.567
20 Training acc: 0.580291 Validation 

In [8]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("./mnist/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./mnist/train-labels-idx1-ubyte.gz
Extracting ./mnist/t10k-images-idx3-ubyte.gz
Extracting ./mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [9]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

0 Train accuracy: 0.933333 Test accuracy: 0.9311
1 Train accuracy: 0.966667 Test accuracy: 0.9522
2 Train accuracy: 0.973333 Test accuracy: 0.9584
3 Train accuracy: 0.96 Test accuracy: 0.9613
4 Train accuracy: 0.966667 Test accuracy: 0.9659
5 Train accuracy: 0.966667 Test accuracy: 0.9694
6 Train accuracy: 0.973333 Test accuracy: 0.9692
7 Train accuracy: 0.973333 Test accuracy: 0.9741
8 Train accuracy: 0.953333 Test accuracy: 0.972
9 Train accuracy: 0.98 Test accuracy: 0.973
10 Train accuracy: 0.98 Test accuracy: 0.972
11 Train accuracy: 0.973333 Test accuracy: 0.9675
12 Train accuracy: 0.98 Test accuracy: 0.9707
13 Train accuracy: 0.973333 Test accuracy: 0.9732
14 Train accuracy: 0.973333 Test accuracy: 0.9734
15 Train accuracy: 0.986667 Test accuracy: 0.9729
16 Train accuracy: 1.0 Test accuracy: 0.9717
17 Train accuracy: 0.986667 Test accuracy: 0.9732
18 Train accuracy: 0.98 Test accuracy: 0.9746
19 Train accuracy: 0.986667 Test accuracy: 0.9751
20 Train accuracy: 0.98 Test accuracy:

# 时间序列

In [37]:
t_min, t_max = 0, 30
resolution = 0.1

def time_series(t):
    return t * np.sin(t) / 3 + 2 * np.sin(t*5)

def next_batch(batch_size, n_steps):
    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
    Ts = t0 + np.arange(0., n_steps + 1) * resolution
    ys = time_series(Ts)
    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)

In [38]:
reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))
y = tf.placeholder(tf.float32, shape=(None, n_steps, n_outputs))

#cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
cell = tf.contrib.rnn.OutputProjectionWrapper(
    tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu), output_size=n_outputs)

outputs, ststes = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

learning_rate = 0.001

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

saver = tf.train.Saver()

In [39]:
n_iterations = 1500
batch_size = 50

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if iteration % 100 == 0:
            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
            print(iteration, "\tMSE:", mse)
            
    saver.save(sess, "./model/rnn")

0 	MSE: 11.9672575
100 	MSE: 0.52584445
200 	MSE: 0.14959829
300 	MSE: 0.07381975
400 	MSE: 0.06171744
500 	MSE: 0.05968452
600 	MSE: 0.055536177
700 	MSE: 0.047983035
800 	MSE: 0.050073363
900 	MSE: 0.04728166
1000 	MSE: 0.047391903
1100 	MSE: 0.048076287
1200 	MSE: 0.040791243
1300 	MSE: 0.0479767
1400 	MSE: 0.04211445


### 预测

In [44]:
n_steps = 20
resolution = 0.1
t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1)

In [48]:
with tf.Session() as sess:
    saver.restore(sess, "./model/rnn")
    X_new = time_series(np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
    y_pred = sess.run(outputs, feed_dict={X: X_new})

INFO:tensorflow:Restoring parameters from ./model/rnn


In [49]:
print(y_pred)

[[[-3.406515 ]
  [-2.4501145]
  [-1.1305888]
  [ 0.7817146]
  [ 2.2008858]
  [ 3.138787 ]
  [ 3.399897 ]
  [ 3.3627138]
  [ 2.8842826]
  [ 2.2652197]
  [ 1.650614 ]
  [ 1.5299088]
  [ 1.8965788]
  [ 2.731001 ]
  [ 3.9058359]
  [ 5.14548  ]
  [ 6.144004 ]
  [ 6.6781516]
  [ 6.6523943]
  [ 6.0744486]]]


In [51]:
reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

X = tf.placeholder(tf.float32, shape=(None, n_steps, n_inputs))
y = tf.placeholder(tf.float32, shape=(None, n_steps, n_outputs))

cell = tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

rnn_outputs, ststes = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

stacked_rnn_outputs = tf.reshape(rnn_outputs, shape=(-1, n_neurons))
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, shape=(-1, n_steps, n_outputs))

learning_rate = 0.001

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

In [None]:
n_iterations = 1500
batch_size = 50

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if iteration % 100 == 0:
            mse = loss.eval(feed_dict={X: X_batch, y: y_batch})
            print(iteration, "\tMSE:", mse)
        