# A simple example of RNN
In this problem, we want to predict the next steps of a walking path on a plane using __RNN__. 
## Generating data

In [20]:
import numpy as np

size = 100000
seq_len = 20
batch_size = 128
state_size = 128

In [21]:
# randomly generate a walking (on a plance) path, each point has two coordinates
# stack sequences of length seq_len into Xs and Ys, where Ys is one step forward from Xs.
def gen_data(size = 10000, seq_len = 20):
    path = np.array(np.random.normal(0.5,.01,size=(size + seq_len,2)))
    Xs = np.zeros((size,seq_len,2))
    Ys = np.zeros((size,seq_len,2))
    for i in xrange(size):
        Xs[i,:,:] = path[i : i+seq_len , :]
        Ys[i,:,:] = path[i+1 : i+1+seq_len , :]
    return (Xs, Ys, path)

In [22]:
(Xs, Ys, path) = gen_data(size, seq_len)
Xs.shape, Ys.shape, path.shape

((100000, 20, 2), (100000, 20, 2), (100020, 2))

## Using Keras
### Building the model

In [23]:
from keras.models import Sequential
from keras.layers import LSTM, SimpleRNN
from keras.layers import Dense, Activation, Dropout
from keras.layers.wrappers import TimeDistributed

In [24]:
print('Build model...')
model = Sequential()
model.add(LSTM(state_size, return_sequences=True, input_shape=(seq_len, 2)))  
model.add(LSTM(state_size, return_sequences=True))
model.add(Dropout(0.2))
model.add(TimeDistributed(Dense(2)))
model.compile(loss='mse', optimizer='rmsprop')
print ('model is made')

Build model...
model is made


In [25]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 20, 128)           67072     
_________________________________________________________________
lstm_4 (LSTM)                (None, 20, 128)           131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 20, 128)           0         
_________________________________________________________________
time_distributed_2 (TimeDist (None, 20, 2)             258       
Total params: 198,914
Trainable params: 198,914
Non-trainable params: 0
_________________________________________________________________
None


### Print a sample before training

In [26]:
def print_array(a):
    # A helper function for printing an array
    # The shape of the array is 1 x None x 2, print with 3 decimal places and with groups of 2 numbers
    a = np.around(a,3)
    for item in a[0,:,:]:
        print item,
    
def print_sample():
    # A helper function for printing a sample with predicted future steps
    start_index = 0
    seq_len = 20
    print('--------------- Generating with seed: ----------------')
    x = np.zeros((1,seq_len,2))
    x[0,:,:] = path[start_index:start_index+seq_len]
    print_array(x)
    print()
    print('--------------------- Prediting ----------------------')
    
    for i in range(20):
        preds = model.predict(x, verbose=0)
        next_step = np.zeros((1, 1, 2))
        next_step[0,:,:] = preds[:,-1,:]
        print_array(next_step)
        past_steps = x[:,1:,:]
        x = np.concatenate(( past_steps, next_step ), axis = 1)
        
    print()
        

In [27]:
print("---------- Before Training ----------")
print_sample()

---------- Before Training ----------
--------------- Generating with seed: ----------------
[ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] ()
--------------------- Prediting ----------------------
[-0.037 -0.057] [-0.035 -0.052] [-0.031 -0.046] [-0.027 -0.038] [-0.023 -0.031] [-0.019 -0.025] [-0.015 -0.02 ] [-0.012 -0.015] [-0.01  -0.012] [-0.007 -0.009] [-0.006 -0.007] [-0.004 -0.005] [-0.003 -0.003] [-0.002 -0.002] [-0.002 -0.002] [-0.001 -0.001] [-0.001 -0.   ] [-0. -0.] [-0.  0.] [ 0.  0.] ()


### Training the rnn with demonstration at each iteration

In [28]:
for iteration in range(1,3):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    print_sample()
    history = model.fit(Xs, Ys, batch_size = batch_size, epochs = 1, verbose = 1)
    if iteration==1 or iteration==3 or iteration==5 or iteration==10 or iteration==20 or iteration==30 or iteration==40 or iteration==50:
        model.save_weights('rnn_rw_' + str(iteration) + '.h5', overwrite=True)
    print('loss is')
    print(history.history['loss'][0])
    print(history)
    print()
    model.save('rnn_rw')

()
--------------------------------------------------
('Iteration', 1)
--------------- Generating with seed: ----------------
[ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] ()
--------------------- Prediting ----------------------
[-0.037 -0.057] [-0.035 -0.052] [-0.031 -0.046] [-0.027 -0.038] [-0.023 -0.031] [-0.019 -0.025] [-0.015 -0.02 ] [-0.012 -0.015] [-0.01  -0.012] [-0.007 -0.009] [-0.006 -0.007] [-0.004 -0.005] [-0.003 -0.003] [-0.002 -0.002] [-0.002 -0.002] [-0.001 -0.001] [-0.001 -0.   ] [-0. -0.] [-0.  0.] [ 0.  0.] ()
Epoch 1/1
loss is
0.00329963634294
<keras.callbacks.History object at 0x10600ba90>
()
()
--------------------------------------------------
('Iteration', 2)
--------------- Generating with seed: ----------------
[ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5] [ 0.5  0.5]

## Using TensorFlow API

In [44]:
def get_batch(Xs, Ys, batch_size = 128):
    s = int(np.random.uniform(0, len(Xs) - batch_size))
    data_x = Xs[s : s+batch_size]
    data_y = Ys[s : s+batch_size]
    return data_x, data_y

In [45]:
data_x, data_y = get_batch(Xs, Ys)
data_x.shape, data_y.shape

((128, 20, 2), (128, 20, 2))

In [78]:
import tensorflow as tf
state_size = 128
learning_rate = 0.01
num_layers = 2

### Definition of the LSTM cells

In [88]:
tf.reset_default_graph()
tf.set_random_seed(1)

# Placeholders
x = tf.placeholder(tf.float32, [batch_size, seq_len, 2], name = 'data_x')
y = tf.placeholder(tf.float32, [batch_size, seq_len, 2], name = 'data_y')
dropout = tf.placeholder(tf.float32)

print('building the model')

# create 2 LSTM cells with dropout between layers
rnn_layers = []
for _ in range(num_layers):
    cell = tf.contrib.rnn.BasicLSTMCell(state_size)
    cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob = 1.0 - dropout)
    rnn_layers.append(cell)
    
# create a RNN cell composed sequentially of a number of RNNCells
multi_rnn_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers)


"""
# create 2 LSTM cells with dropout between layers
rnn_layers = [tf.nn.rnn_cell.LSTMCell(state_size) for _ in range(num_layers)]
# create a RNN cell composed sequentially of a number of RNNCells
multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
"""

# outputs is a tensor of shape [batch_size, seq_len, state_size]
# state is a N-tuple where N is the number of LSTMCells containing a tf.contrib.rnn.LSTMStateTuple for each cell
init_state = multi_rnn_cell.zero_state(batch_size, tf.float32)
print('compiling the model')
outputs, state = tf.nn.dynamic_rnn(cell = multi_rnn_cell,
                                  inputs = x,
                                  initial_state = init_state,
                                  dtype = tf.float32)
print('model is made')

building the model
compiling the model
model is made


In [89]:
outputs, state

(<tf.Tensor 'rnn/transpose:0' shape=(128, 20, 128) dtype=float32>,
 (LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(128, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(128, 128) dtype=float32>),
  LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(128, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(128, 128) dtype=float32>)))

### Computation of the MSE loss

In [90]:
# reshape outputs and y so that we can compute the prediction y_ and loss easily
outputs_reshaped = tf.reshape(outputs, [-1, state_size])
y_reshaped = tf.reshape(y,[-1, 2])
outputs_reshaped, y_reshaped

(<tf.Tensor 'Reshape:0' shape=(2560, 128) dtype=float32>,
 <tf.Tensor 'Reshape_1:0' shape=(2560, 2) dtype=float32>)

In [93]:
with tf.variable_scope('output'):
    V = tf.get_variable('V', [state_size, 2])
    bv = tf.get_variable('bv', [2], initializer=tf.constant_initializer(0.0))
y_ = tf.matmul(outputs_reshaped, V) + bv
y_, y_reshaped

(<tf.Tensor 'add_1:0' shape=(2560, 2) dtype=float32>,
 <tf.Tensor 'Reshape_1:0' shape=(2560, 2) dtype=float32>)

In [94]:
total_loss = tf.reduce_mean(tf.square(tf.subtract(y_, y_reshaped)))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

### Training

In [95]:
X , Y = None, None
count = 0
sum_tr_losses = 0
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(2000):
        X , Y = get_batch(Xs, Ys, batch_size = batch_size)
        tr_losses, _ = sess.run([total_loss, train_step], feed_dict = {x:X, y:Y, dropout:0.2})
        count += 1
        sum_tr_losses += tr_losses
        if (i < 10) or ( i % 200 == 0 ):
            print("{} {}".format(i, sum_tr_losses / count))
            count = 0
            sum_tr_losses = 0

0 0.246577113867
1 0.435334533453
2 0.294419586658
3 0.240839630365
4 0.1694983989
5 0.0970379263163
6 0.0473904311657
7 0.08383063972
8 0.0276837144047
9 0.0355570539832
200 0.00263611524194
400 0.000402510143467
600 0.000245328727615
800 0.00017049994276
1000 0.000120039147987
1200 8.46549955531e-05
1400 5.95513816006e-05
1600 4.19430745933e-05
1800 2.9245350388e-05
