# Recurrent Neural Networks

## Seoul AI Meetup, August 5

Martin Kersner, <m.kersner@gmail.com>

In [1]:
import numpy as np
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [2]:
# source https://github.com/ageron/handson-ml
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "b<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

### Recurrent Neurons
### Memory Cells
### Input and Output Sequences

### Basic RNN

In [3]:
n_features = 3
n_neurons  = 5
n_steps    = 2

In [4]:
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

In [5]:
# source https://github.com/ageron/handson-ml
reset_graph()

X0 = tf.placeholder(tf.float32, [None, n_features])
X1 = tf.placeholder(tf.float32, [None, n_features])

# weight for original connection
Wx = tf.Variable(tf.random_normal(shape=[n_features, n_neurons], dtype=tf.float32))

# weight for recurrent connection
Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))
b  = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

# Y0 = tf.matmul(X0, Wx)
# [None, n_features] * [n_features, n_neurons] = [None, n_neurons]
Y0 = tf.tanh(tf.matmul(X0, Wx) + b)

# tf.matmul(Y0, Wy) : [None,   n_neurons] * [n_neurons, n_neurons] = [None, n_neurons]
# tf.matmul(X1, Wx) : [None,  n_features] * [n_neurons, n_neurons] = [None, n_neurons]
# b :[1, n_neurons]
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

In [6]:
show_graph(tf.get_default_graph())

In [7]:
def train_rnn(X0_batch, X1_batch):    
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        init.run()
        Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    
    print("Y0\n",   Y0_val)
    print("\nY1\n", Y1_val)

In [8]:
train_rnn(X0_batch, X1_batch)

Y0
 [[-0.0664006   0.96257669  0.68105787  0.70918542 -0.89821595]
 [ 0.9977755  -0.71978885 -0.99657625  0.9673925  -0.99989718]
 [ 0.99999774 -0.99898815 -0.99999893  0.99677622 -0.99999988]
 [ 1.         -1.         -1.         -0.99818915  0.99950868]]

Y1
 [[ 1.         -1.         -1.          0.40200216 -1.        ]
 [-0.12210433  0.62805319  0.96718419 -0.99371207 -0.25839335]
 [ 0.99999827 -0.9999994  -0.9999975  -0.85943311 -0.9999879 ]
 [ 0.99928284 -0.99999815 -0.99990582  0.98579615 -0.92205751]]


## `static_rnn()`

* [tf.contrib.rnn.BasicRNNCell](https://www.tensorflow.org/api_docs/python/tf/contrib/rnn/BasicRNNCell)
* [tf.nn.static_rnn](https://www.tensorflow.org/api_docs/python/tf/nn/static_rnn) creates one cell per time step.
* Each input placeholder (`X0`, `X1`) have to be manually defined.

In [9]:
# source https://github.com/ageron/handson-ml
reset_graph()

X0 = tf.placeholder(tf.float32, [None, n_features])
X1 = tf.placeholder(tf.float32, [None, n_features])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.nn.static_rnn(basic_cell, [X0, X1],
                                                dtype=tf.float32)
Y0, Y1 = output_seqs

In [10]:
show_graph(tf.get_default_graph())

## `static_rnn()` output

In [11]:
train_rnn(X0_batch, X1_batch)

Y0
 [[ 0.30741334 -0.32884315 -0.65428472 -0.93850589  0.52089024]
 [ 0.99122757 -0.95425421 -0.75180793 -0.99952078  0.98202348]
 [ 0.99992681 -0.99783254 -0.82473528 -0.9999963   0.99947774]
 [ 0.99677098 -0.68750614  0.84199691  0.93039107  0.8120684 ]]

Y1
 [[ 0.99998885 -0.99976051 -0.06679298 -0.99998039  0.99982214]
 [-0.65249437 -0.51520866 -0.37968954 -0.59225935 -0.08968385]
 [ 0.99862403 -0.99715197 -0.03308626 -0.99915648  0.99329019]
 [ 0.99681675 -0.95981938  0.39660636 -0.83076048  0.79671967]]


### `static_rnn()` with single input placeholder

In [12]:
# source https://github.com/ageron/handson-ml
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_features])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.nn.static_rnn(basic_cell, X_seqs,
                                                dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

In [13]:
def train_rnn2(X0_batch, X1_batch):   
    X0_batch_tmp = X0_batch[:,:,np.newaxis]
    X1_batch_tmp = X1_batch[:,:,np.newaxis]

    X_batch = np.concatenate((X0_batch_tmp, X1_batch_tmp), axis=2)
    X_batch = np.transpose(X_batch, (0, 2, 1))    

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        init.run()
        outputs_val = outputs.eval(feed_dict={X: X_batch})

    print("Y0\n", np.transpose(outputs_val, axes=[1, 0, 2])[0])
    print("\nY1\n", np.transpose(outputs_val, axes=[1, 0, 2])[1])

In [14]:
# Y0 output at t = 0
# Y1 output at t = 0
train_rnn2(X0_batch, X1_batch)

Y0
 [[-0.45652324 -0.68064123  0.40938237  0.63104504 -0.45732826]
 [-0.80015349 -0.99218267  0.78177971  0.9971031  -0.99646091]
 [-0.93605185 -0.99983788  0.93088669  0.99998152 -0.99998295]
 [ 0.99273688 -0.99819332 -0.55543643  0.9989031  -0.9953323 ]]

Y1
 [[-0.94288003 -0.99988687  0.94055814  0.99999851 -0.9999997 ]
 [-0.63711601  0.11300932  0.5798437   0.43105593 -0.63716984]
 [-0.9165386  -0.99456042  0.89605415  0.99987197 -0.99997509]
 [-0.02746334 -0.73191994  0.7827872   0.95256817 -0.97817713]]


In [15]:
show_graph(tf.get_default_graph())

## `dynamic_rnn()` 

* [tf.nn.dynamic_rnn](https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn)
* No need to unstack, stack and transpose.
* Input `[None, n_steps, n_features]`.
* Output `[None, n_steps, n_neurons]`

In [18]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_features])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

train_rnn2(X0_batch, X1_batch)

Y0
 [[ 0.80872238 -0.52312446 -0.6716494  -0.69762248 -0.54384488]
 [ 0.99547106 -0.02155113 -0.99482894  0.17964774 -0.83173698]
 [ 0.99990267  0.49111056 -0.9999314   0.8413834  -0.9444679 ]
 [-0.80632919  0.93928123 -0.97309881  0.99996096  0.97433066]]

Y1
 [[ 0.9995454   0.99339807 -0.99998379  0.99919224 -0.98379493]
 [-0.06013332  0.4030143   0.02884481 -0.29437575 -0.85681593]
 [ 0.99406189  0.95815992 -0.99768937  0.98646194 -0.91752487]
 [ 0.95047355 -0.51205158 -0.27763969  0.83108062  0.81631833]]


In [19]:
show_graph(tf.get_default_graph())

### Variable Length Input Sequences
### Variable Length Output Sequences