In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
def reset_graph(seed = 42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Basic RNN in TensorFlow

We will create an RNN composed of a layer of five recurrent neurons, using the tanh activation function. We will assume that the RNN runs over only two time steps, taking input vectors of size 3 at each time step.

In [4]:
reset_graph()

n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape = [n_inputs, n_neurons], dtype = tf.float32))
Wy = tf.Variable(tf.random_normal(shape = [n_neurons,n_neurons], dtype = tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype = tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

Instructions for updating:
Colocations handled automatically by placer.


In [5]:
#Mini-batch:        Instance 0,     1,         2,         3   
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])     #t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])     #t = 1

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict = {X0: X0_batch, X1: X1_batch})

This mini-batch contains four instances, each with an input sequence composed of exactly two inputs. At the end, Y0_val and Y1_val contain the outputs of the network at both time steps for all neurons and all instances in the mini-batch.

In [6]:
print(Y0_val)    #output at t = 0

[[-0.0664006   0.9625767   0.68105793  0.7091854  -0.898216  ]
 [ 0.9977755  -0.719789   -0.9965761   0.9673924  -0.9998972 ]
 [ 0.99999774 -0.99898803 -0.9999989   0.9967762  -0.9999999 ]
 [ 1.         -1.         -1.         -0.99818915  0.9995087 ]]


In [7]:
print(Y1_val)    #output at t = 1

[[ 1.         -1.         -1.          0.4020025  -0.9999998 ]
 [-0.12210419  0.62805265  0.9671843  -0.9937122  -0.2583937 ]
 [ 0.9999983  -0.9999994  -0.9999975  -0.85943305 -0.9999881 ]
 [ 0.99928284 -0.99999815 -0.9999058   0.9857963  -0.92205757]]


Using TensorFlow's RNN operations

Using static_rnn()

The static_rnn() function creates an unrolled RNN network by chaining cells.

In [8]:
n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1], dtype = tf.float32)

Y0, Y1 = output_seqs


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API


In [9]:
init = tf.global_variables_initializer()

In [10]:
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]])
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]])

with tf.Session() as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict = {X0: X0_batch, X1: X1_batch})

In [11]:
Y0_val

array([[ 0.43304   ,  0.84509647, -0.49599633,  0.70050293, -0.59927976],
       [ 0.03501973,  0.9954992 , -0.99537265,  0.9963196 , -0.95661724],
       [-0.3744288 ,  0.9998789 , -0.9999681 ,  0.9999614 , -0.9960836 ],
       [-0.9999052 ,  0.10390692, -0.9956875 ,  0.7440089 , -0.61860865]],
      dtype=float32)

In [12]:
Y1_val

array([[-9.6329939e-01,  9.9918967e-01, -9.9999893e-01,  9.9997318e-01,
        -9.9773598e-01],
       [-1.4756016e-01, -5.8684641e-01, -3.7418893e-01, -7.4381530e-01,
        -3.5059705e-01],
       [-8.8282663e-01,  9.1975665e-01, -9.9985534e-01,  9.8879361e-01,
        -9.5658535e-01],
       [-6.0985601e-01, -2.0184807e-01, -9.7303903e-01, -3.0905005e-05,
        -1.1064763e-01]], dtype=float32)

Packing sequences

In [13]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [14]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm = [1, 0, 2]))

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons)
output_seqs, states = tf.nn.static_rnn(basic_cell, X_seqs, dtype = tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm = [1, 0, 2])

In [15]:
init = tf.global_variables_initializer()

Now, we can run the network by feeding it a single tensor that contains all the mini-batch sequences.

In [16]:
                    #  t = 0   ,   t = 1
X_batch = np.array([[[0, 1, 2], [9, 8, 7]], # instance 1
                    [[3, 4, 5], [0, 0, 0]], # instance 2
                    [[6, 7, 8], [6, 5, 4]], # instance 3
                    [[9, 0, 1], [3, 2, 1]], # instance 4
                    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict = {X: X_batch})

In [17]:
print(outputs_val)

[[[-0.45652324 -0.68064123  0.40938237  0.63104504 -0.45732826]
  [-0.9428799  -0.9998869   0.94055814  0.9999985  -0.9999997 ]]

 [[-0.8001535  -0.9921827   0.7817797   0.9971032  -0.9964609 ]
  [-0.637116    0.11300927  0.5798437   0.4310559  -0.6371699 ]]

 [[-0.93605185 -0.9998379   0.9308867   0.9999815  -0.99998295]
  [-0.9165386  -0.9945604   0.896054    0.99987197 -0.9999751 ]]

 [[ 0.9927369  -0.9981933  -0.55543643  0.9989031  -0.9953323 ]
  [-0.02746338 -0.73191994  0.7827872   0.9525682  -0.9781773 ]]]


Using dynamic_rnn()

The dynamic_rnn() function uses a while_loop() operation to run over the cell the appropriate number of times, and you can set "swap_memory = True" if you want it to swap the GPU’s memory to the CPU’s memory during backpropagation to avoid OOM errors.

Conveniently, it also accepts a single tensor for all inputs at every time step (shape of [None, n_steps, n_inputs]) and it outputs a single tensor for all outputs at every time step (shape of [None, n_steps, n_neurons]); there is no need to stack, unstack, or transpose.

In [18]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [19]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API


In [20]:
init = tf.global_variables_initializer()

In [21]:
X_batch = np.array([
                    [[0, 1, 2], [9, 8, 7]], # instance 1
                    [[3, 4, 5], [0, 0, 0]], # instance 2
                    [[6, 7, 8], [6, 5, 4]], # instance 3
                    [[9, 0, 1], [3, 2, 1]], # instance 4
                    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict = {X: X_batch})

In [22]:
print(outputs_val)

[[[-0.85115266  0.87358344  0.5802911   0.8954789  -0.0557505 ]
  [-0.999996    0.99999577  0.9981815   1.          0.37679607]]

 [[-0.9983293   0.9992038   0.98071456  0.999985    0.25192663]
  [-0.7081804  -0.0772338  -0.85227895  0.5845349  -0.78780943]]

 [[-0.9999827   0.99999535  0.9992863   1.          0.5159072 ]
  [-0.9993956   0.9984095   0.83422637  0.99999976 -0.47325212]]

 [[ 0.87888587  0.07356028  0.97216916  0.9998546  -0.7351168 ]
  [-0.9134514   0.3600957   0.7624866   0.99817705  0.80142   ]]]


Handling Variable-Length Input Sequences

In [23]:
n_steps = 2
n_inputs = 3
n_neurons = 5

In [24]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons)

seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32, sequence_length = seq_length)

Instructions for updating:
Use tf.cast instead.


In [25]:
init = tf.global_variables_initializer()

In [26]:
                   #  step 0  ,  step 1 
X_batch = np.array([
                    [[0, 1, 2], [9, 8, 7]], # instance 1
                    [[3, 4, 5], [0, 0, 0]], # instance 2 (padded with zero vectors)
                    [[6, 7, 8], [6, 5, 4]], # instance 3
                    [[9, 0, 1], [3, 2, 1]], # instance 4
                    ])
seq_length_batch = np.array([2, 1, 2, 2])

In [27]:
with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict = {X: X_batch, seq_length: seq_length_batch})

In [28]:
print(outputs_val)

[[[-0.9123188   0.16516446  0.5548655  -0.39159346  0.20846416]
  [-1.          0.956726    0.99831694  0.99970174  0.96518576]]

 [[-0.9998612   0.6702289   0.9723653   0.6631046   0.74457586]
  [ 0.          0.          0.          0.          0.        ]]

 [[-0.99999976  0.8967997   0.9986295   0.9647514   0.93662   ]
  [-0.9999526   0.9681953   0.96002865  0.98706263  0.85459226]]

 [[-0.96435434  0.99501586 -0.36150697  0.9983378   0.999497  ]
  [-0.9613586   0.9568762   0.7132288   0.97729224 -0.0958299 ]]]


In [29]:
print(states_val)

[[-1.          0.956726    0.99831694  0.99970174  0.96518576]
 [-0.9998612   0.6702289   0.9723653   0.6631046   0.74457586]
 [-0.9999526   0.9681953   0.96002865  0.98706263  0.85459226]
 [-0.9613586   0.9568762   0.7132288   0.97729224 -0.0958299 ]]


Training a Sequence Classifier

Now, we will train an RNN to classify MNIST images.

In [30]:
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

basic_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype = tf.float32)

logits = tf.layers.dense(states, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
accuracy = tf.reduce_mean(correct)

init = tf.global_variables_initializer()

Instructions for updating:
Use keras.layers.dense instead.


In [31]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [32]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [33]:
X_test = X_test.reshape((-1, n_steps, n_inputs))

In [34]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict = {X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
        print(epoch, "Last batch accuracy:", acc_train, "Test accuracy:", acc_test)

0 Last batch accuracy: 0.9533333 Test accuracy: 0.9288
1 Last batch accuracy: 0.96 Test accuracy: 0.9471
2 Last batch accuracy: 0.96 Test accuracy: 0.9499
3 Last batch accuracy: 0.96 Test accuracy: 0.9563
4 Last batch accuracy: 0.98 Test accuracy: 0.9677
5 Last batch accuracy: 0.93333334 Test accuracy: 0.9651
6 Last batch accuracy: 0.98 Test accuracy: 0.9685
7 Last batch accuracy: 0.96666664 Test accuracy: 0.9678
8 Last batch accuracy: 0.97333336 Test accuracy: 0.9693
9 Last batch accuracy: 0.99333334 Test accuracy: 0.9714
10 Last batch accuracy: 0.98 Test accuracy: 0.9752
11 Last batch accuracy: 0.9866667 Test accuracy: 0.9743
12 Last batch accuracy: 0.94666666 Test accuracy: 0.9716
13 Last batch accuracy: 0.97333336 Test accuracy: 0.9658
14 Last batch accuracy: 1.0 Test accuracy: 0.9772
15 Last batch accuracy: 0.98 Test accuracy: 0.974
16 Last batch accuracy: 0.99333334 Test accuracy: 0.9779
17 Last batch accuracy: 0.9866667 Test accuracy: 0.9775
18 Last batch accuracy: 0.9866667 Tes

 Training to predict Time Series

Now, we will create the RNN. It will contain 100 recurrent neurons and we will unroll it over 20 time steps since each training instance will be 20 inputs long. Each input will contain only one feature (the value at that time). The targets are also sequences of 20 inputs, each containing a single value.

In [43]:
t_min, t_max = 0, 30
resolution = 0.1

def time_series(t):
    return t * np.sin(t) / 3 + 2 * np.sin(t*5)

def next_batch(batch_size, n_steps):
    t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution)
    Ts = t0 + np.arange(0., n_steps + 1) * resolution
    ys = time_series(Ts)
    return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1)

In [44]:
reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
cell = tf.contrib.rnn.BasicRNNCell(num_units = n_neurons, activation = tf.nn.relu)
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype = tf.float32)

Using OutputProjectionWrapper

At each time step we now have an output vector of size 100. But what we actually want is a single output value at each time step. The simplest solution is to wrap the cell in an OutputProjectionWrapper.

A cell wrapper acts like a normal cell, proxying every method call to an underlying cell, but it also adds some functionality. The OutputProjectionWrapper adds a fully connected layer of linear neurons (i.e., without any activation function) on top of each output (but it does not affect the cell state).

In [57]:
reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100
n_outputs = 1

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

In [58]:
cell = tf.contrib.rnn.OutputProjectionWrapper(tf.contrib.rnn.BasicRNNCell(num_units = n_neurons, activation = tf.nn.relu), 
                                              output_size = n_outputs)

In [59]:
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype = tf.float32)

In [60]:
learning_rate = 0.001

loss = tf.reduce_mean(tf.square(outputs - y))     # MSE, insted of cross-entropy
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

In [61]:
saver = tf.train.Saver()

In [62]:
n_iterations = 1500
batch_size = 50

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        if iteration % 100 == 0:
            mse = loss.eval(feed_dict = {X: X_batch, y: y_batch})
            print(iteration, "\tMSE:", mse)
    
    saver.save(sess, "./my_time_series_model")

0 	MSE: 10.261381
100 	MSE: 0.38792896
200 	MSE: 0.10900874
300 	MSE: 0.061354414
400 	MSE: 0.059336416
500 	MSE: 0.058288667
600 	MSE: 0.052280974
700 	MSE: 0.047044784
800 	MSE: 0.049216457
900 	MSE: 0.0473833
1000 	MSE: 0.047798716
1100 	MSE: 0.047832422
1200 	MSE: 0.041717164
1300 	MSE: 0.046195257
1400 	MSE: 0.04128444


Without using OutputProjectionWrapper

In [65]:
reset_graph()

n_steps = 20
n_inputs = 1
n_neurons = 100

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

In [66]:
cell = tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons, activation = tf.nn.relu)
rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype = tf.float32)

In [67]:
n_outputs = 1
learning_rate = 0.001

Then, we stack all the outputs using the reshape() operation, apply the fully connected linear layer (without using any activation function; this is just a projection), and finally unstack all the outputs, again using reshape().

In [68]:
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

In [69]:
loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [70]:
n_iterations = 1500
batch_size = 50

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        if iteration % 100 == 0:
            mse = loss.eval(feed_dict = {X: X_batch, y: y_batch})
            print(iteration, "\tMSE:", mse)
    
    saver.save(sess, "./my_time_series_model")

0 	MSE: 13.907029
100 	MSE: 0.5056698
200 	MSE: 0.19735886
300 	MSE: 0.101214476
400 	MSE: 0.06850145
500 	MSE: 0.06291986
600 	MSE: 0.055129297
700 	MSE: 0.049436502
800 	MSE: 0.050434686
900 	MSE: 0.0482007
1000 	MSE: 0.04809868
1100 	MSE: 0.04982501
1200 	MSE: 0.041912545
1300 	MSE: 0.049292978
1400 	MSE: 0.043140374


Creative RNN

Now, that we have a model that can predict the future, we can use it to generate some creative sequences.

In [71]:
with tf.Session() as sess:
    saver.restore(sess, "./my_time_series_model")

    sequence = [0.] * n_steps
    for iteration in range(300):
        X_batch = np.array(sequence[-n_steps:]).reshape(1, n_steps, 1)
        y_pred = sess.run(outputs, feed_dict = {X: X_batch})
        sequence.append(y_pred[0, -1, 0])

INFO:tensorflow:Restoring parameters from ./my_time_series_model


Deep RNN

Stacking multiple layers of cells gives a deep RNN.

To implement a deep RNN in TensorFlow, we can create several cells and stack them into a MultiRNNCell.

In [78]:
reset_graph()

n_inputs = 2
n_steps = 5

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

In [79]:
n_neurons = 100
n_layers = 3      #stack three identical cells 

layers = [tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons) for layer in range(n_layers)]
multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(layers)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

In [80]:
init = tf.global_variables_initializer()

In [81]:
X_batch = np.random.rand(2, n_steps, n_inputs)

In [82]:
with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run([outputs, states], feed_dict = {X: X_batch})

In [83]:
outputs_val.shape

(2, 5, 100)

Distributing a Deep RNN Across Multiple GPUs

We can efficiently distribute deep RNNs across multiple GPUs by pinning each layer to a different GPU. However, if we try to create each cell in a different device() block, it will not work.

So, we have to create our own cell wrapper.

In [87]:
import tensorflow as tf

class DeviceCellWrapper(tf.nn.rnn_cell.RNNCell):
  def __init__(self, device, cell):
    self._cell = cell
    self._device = device

  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._cell.output_size

  def __call__(self, inputs, state, scope=None):
    with tf.device(self._device):
        return self._cell(inputs, state, scope)

This wrapper simply proxies every method call to another cell, except it wraps the __call__() function within a device block.

In [88]:
reset_graph()

n_inputs = 5
n_steps = 20
n_neurons = 100

X = tf.placeholder(tf.float32, shape = [None, n_steps, n_inputs])

Now, we can distribute each layer on a different GPU.

In [89]:
devices = ["/gpu:0", "/gpu:1", "/gpu:2"]
cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units = n_neurons)) for dev in devices]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype = tf.float32)

In [90]:
init = tf.global_variables_initializer()

Applying Dropout

If we build a very deep RNN, it may end up overfitting the training set. To prevent that, a common technique is to apply dropout.

In [92]:
reset_graph()

n_inputs = 1
n_neurons = 100
n_layers = 3
n_steps = 20
n_outputs = 1

In [93]:
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])

In [94]:
keep_prob = tf.placeholder_with_default(1.0, shape = ())

cells = [tf.nn.rnn_cell.BasicRNNCell(num_units = n_neurons) for layer in range(n_layers)]
cells_drop = [tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob = keep_prob) for cell in cells]
multi_layer_cell = tf.nn.rnn_cell.MultiRNNCell(cells_drop)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype = tf.float32)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [95]:
learning_rate = 0.01

stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [96]:
n_iterations = 1500
batch_size = 50
train_keep_prob = 0.5

with tf.Session() as sess:
    init.run()
    for iteration in range(n_iterations):
        X_batch, y_batch = next_batch(batch_size, n_steps)
        _, mse = sess.run([training_op, loss], feed_dict = {X: X_batch, y: y_batch, keep_prob: train_keep_prob})
        if iteration % 100 == 0:
            print(iteration, "Training MSE:", mse)
    
    saver.save(sess, "./my_dropout_time_series_model")

0 Training MSE: 15.988591
100 Training MSE: 4.8960195
200 Training MSE: 3.974632
300 Training MSE: 3.2563307
400 Training MSE: 2.876809
500 Training MSE: 3.024914
600 Training MSE: 3.0242488
700 Training MSE: 3.1380572
800 Training MSE: 3.9774392
900 Training MSE: 4.0770907
1000 Training MSE: 3.221789
1100 Training MSE: 3.440483
1200 Training MSE: 2.7763686
1300 Training MSE: 3.4578915
1400 Training MSE: 4.674328


LSTM (Long Short-Term Memory) Cell

In [97]:
reset_graph()

lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units = n_neurons)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.


In [98]:
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
n_layers = 3

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

lstm_cells = [tf.nn.rnn_cell.BasicLSTMCell(num_units = n_neurons) for layer in range(n_layers)]
multi_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_cells)
outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype = tf.float32)
top_layer_h_state = states[-1][1]
logits = tf.layers.dense(top_layer_h_state, n_outputs, name = "softmax")
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
loss = tf.reduce_mean(xentropy, name = "loss")
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.cast(tf.nn.in_top_k(logits, y, 1), tf.float32)
accuracy = tf.reduce_mean(correct)

init = tf.global_variables_initializer()

In [99]:
states

(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 150) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_4:0' shape=(?, 150) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_5:0' shape=(?, 150) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_6:0' shape=(?, 150) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_7:0' shape=(?, 150) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_8:0' shape=(?, 150) dtype=float32>))

In [100]:
top_layer_h_state

<tf.Tensor 'rnn/while/Exit_8:0' shape=(?, 150) dtype=float32>

In [101]:
n_epochs = 10
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict = {X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
        print(epoch, "Last batch accuracy:", acc_batch, "Test accuracy:", acc_test)

0 Last batch accuracy: 0.97333336 Test accuracy: 0.9561
1 Last batch accuracy: 0.94666666 Test accuracy: 0.9702
2 Last batch accuracy: 0.9533333 Test accuracy: 0.9672
3 Last batch accuracy: 1.0 Test accuracy: 0.9797
4 Last batch accuracy: 0.96666664 Test accuracy: 0.9849
5 Last batch accuracy: 0.99333334 Test accuracy: 0.9855
6 Last batch accuracy: 0.9866667 Test accuracy: 0.9859
7 Last batch accuracy: 0.99333334 Test accuracy: 0.9868
8 Last batch accuracy: 1.0 Test accuracy: 0.9868
9 Last batch accuracy: 0.99333334 Test accuracy: 0.9879


GRU (Gated Recurrent Unit) Cell

The GRU cell is a simplified version of the LSTM cell.

In [102]:
gru_cell = tf.contrib.rnn.GRUCell(num_units = n_neurons)

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
