In [1]:
import numpy as np
import tensorflow as tf

In [2]:
sess = tf.Session()

We want to create a network that has only one LSTM cell. We have to pass 2 elemnts to LSTM, the prv_output and prv_state, so called, h and c. Therefore, we initialize a state vector, state. Here, state is a tuple with 2 elements, each one is of size [1 x 4], one for passing prv_output to next time step, and another for passing the prv_state to next time stamp.

In [3]:
LSTM_CELL_SIZE = 4 # output size (dimension) which is same as hidden size in the cell
lstm_cell = tf.contrib.rnn.BasicLSTMCell(LSTM_CELL_SIZE, state_is_tuple = True)
state = (tf.zeros([2, LSTM_CELL_SIZE]),)* 2
state

(<tf.Tensor 'zeros:0' shape=(2, 4) dtype=float32>,
 <tf.Tensor 'zeros:0' shape=(2, 4) dtype=float32>)

Let define a sample input. In this example, batch_size =2,and seq_len= 6:

In [4]:
sample_input = tf.constant([[1,2,3,4,3,2], [3,2,2,2,2,2]], dtype = tf.float32)
print(sess.run(sample_input))

[[ 1.  2.  3.  4.  3.  2.]
 [ 3.  2.  2.  2.  2.  2.]]


Now, we can pass the input to lstm_cell, and check the new state:

In [5]:
with tf.variable_scope("LSTM_sample1"):
    output, state_new = lstm_cell(sample_input, state)
    sess.run(tf.global_variables_initializer())
    print(sess.run(state_new))

LSTMStateTuple(c=array([[-0.27207065, -0.14977001, -0.50924844, -0.04473792],
       [-0.51826453,  0.13118115, -0.20965381, -0.06719033]], dtype=float32), h=array([[-0.00298158, -0.06040691, -0.2331679 , -0.00418494],
       [-0.01149935,  0.09326159, -0.11418024, -0.01398612]], dtype=float32))


As we can see, the states has 2 parts, the new state, c, and also the output, h. Lets check the output again:

In [6]:
print(sess.run(output))

[[-0.00298158 -0.06040691 -0.2331679  -0.00418494]
 [-0.01149935  0.09326159 -0.11418024 -0.01398612]]


# Stacked LSTM basecs

What about if we want to have a RNN with stacked LSTM? For example, a 2-layer LSTM. In this case, the output of the first layer will become the input of the second.

In [7]:
## let start the new session 
sess = tf.Session()

In [8]:
LSTM_CELL_SIZE = 4 # 4 hidden nodes = state_dim  = the output_dim
input_dim = 6
num_layers = 2

In [9]:
cells = []
for _ in range(num_layers):
    cell = tf.contrib.rnn.LSTMCell(LSTM_CELL_SIZE)
    cells.append(cell)
    stacked_lstm = tf.contrib.rnn.MultiRNNCell(cells)

In [10]:
data = tf.placeholder(tf.float32, [None, None, input_dim])
output, state = tf.nn.dynamic_rnn(cell, data, dtype = tf.float32)
print(data, output, state)

Tensor("Placeholder:0", shape=(?, ?, 6), dtype=float32) Tensor("rnn/transpose:0", shape=(?, ?, 4), dtype=float32) LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(?, 4) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(?, 4) dtype=float32>)


Lets say the input sequence length is 3, and the dimensionality of the inputs is 6. The input should be a Tensor of shape: [batch_size, max_time, dimension], in our case it would be (2, 3, 6)

In [11]:
#Batch size x time steps x features.
sample_input = [[[1,2,3,4,3,2], [1,2,1,1,1,2],[1,2,2,2,2,2]],[[1,2,3,4,3,2],[3,2,2,1,1,2],[0,0,0,0,3,2]]]
sample_input

[[[1, 2, 3, 4, 3, 2], [1, 2, 1, 1, 1, 2], [1, 2, 2, 2, 2, 2]],
 [[1, 2, 3, 4, 3, 2], [3, 2, 2, 1, 1, 2], [0, 0, 0, 0, 3, 2]]]

In [12]:
sess.run(tf.global_variables_initializer())
sess.run(output, feed_dict={data: sample_input})

array([[[-0.05511501,  0.01852267, -0.00904508, -0.19907627],
        [-0.01939808,  0.02371921, -0.01738074, -0.03937931],
        [-0.06129809,  0.02666961, -0.02447925, -0.04834963]],

       [[-0.05511501,  0.01852267, -0.00904508, -0.19907627],
        [-0.05639194,  0.00520131, -0.00815779,  0.09399685],
        [-0.1300499 ,  0.12428377, -0.03200072,  0.08908086]]], dtype=float32)