In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.python.util import nest # for `nest.is_sequence`
from tensorflow.contrib.rnn import *

### `BasicRNNCell` is the most basic RNN cell class.

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell_impl.py

- All other RNNCells have slightly different `__init__` and `__call__` s.

- Most RNN functions work as below.

```
outputs = []
cell = RNNCell(hidden_size)
for i in range(rnn_steps):
    output, state = cell(input, state) 
outputs.append(output)
return  outputs, state
```


    1) Initialize an empty list 
    2) Create an RNNCell class
    3) Run the RNNCell for rnn_steps times
    4) Collect all the intermediate outputs in list
    5) Return list of all outputs and the last state

In [2]:
class BasicRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tf.nn.tanh, reuse=None):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation
    self._reuse = reuse

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
    with _checked_scope(self, scope or "basic_rnn_cell", reuse=self._reuse):
      output = self._activation(
          _linear([inputs, state], self._num_units, True))
    return output, output

### What is `_linear`?
- `_linear` is Tensorflow RNNCells' core 'linear mapping function'.
- It internally creates weight variables (+ bias if `bias` argument is set as True)
- It changes dimension of input variable into `output_size`
$$
\begin{equation}
\_linear(X, W) = \sum_{i} X[i] * W[i]
\end{equation}
$$

### Then let's look through `_linear`
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py#L1020

In [3]:
_BIAS_VARIABLE_NAME = "biases"
_WEIGHTS_VARIABLE_NAME = "weights"

def _linear(args, output_size, bias, bias_start=0.0):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  scope = tf.get_variable_scope()
  with tf.variable_scope(scope) as outer_scope:
    weights = tf.get_variable(
        _WEIGHTS_VARIABLE_NAME, [total_arg_size, output_size], dtype=dtype)
    if len(args) == 1:
      res = tf.matmul(args[0], weights)
    else:
      res = tf.matmul(tf.concat(args, 1), weights)
    if not bias:
      return res
    with tf.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      biases = tf.get_variable(
          _BIAS_VARIABLE_NAME, [output_size],
          dtype=dtype,
          initializer=tf.constant_initializer(bias_start, dtype=dtype))
    return tf.bias_add(res, biases)

### Example 1 - Basic API

In [4]:
tf.reset_default_graph()
x = tf.constant([[1,2,3,4,5],
             [2,4,6,8,10],
             [3,4,5,6,7]], dtype=tf.float64)

In [5]:
x_mapped = _linear(
    args=x,
    output_size=4,
    bias=False)

In [6]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
x_mapped.eval()

array([[-0.51489446,  3.94604097,  0.93969094, -4.92853462],
       [-1.02978893,  7.89208194,  1.87938187, -9.85706924],
       [-0.71129123,  5.21109571,  3.1175112 , -8.20703003]])

In [7]:
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

[<tf.Variable 'weights:0' shape=(5, 4) dtype=float64_ref>]

- the name `weights` came from `_WEIGHTS_VARIABLE_NAME` above

### Example 2 - list of tensor

In [8]:
tf.reset_default_graph()
x = tf.constant([[1,2,3,4,5],
             [2,4,6,8,10],
             [3,4,5,6,7]], dtype=tf.float64)
x = [x, x, x]
x

[<tf.Tensor 'Const:0' shape=(3, 5) dtype=float64>,
 <tf.Tensor 'Const:0' shape=(3, 5) dtype=float64>,
 <tf.Tensor 'Const:0' shape=(3, 5) dtype=float64>]

In [9]:
x_mapped = _linear(
    args=x,
    output_size=4,
    bias=False)

In [10]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
x_mapped.eval()



array([[-2.30220387, -4.29702178, -2.01037625,  3.16663845],
       [-4.60440774, -8.59404357, -4.0207525 ,  6.33327691],
       [-4.91712829, -6.13587478, -4.28061929,  3.8996834 ]])

### Example 3 - Custom Weights

In [11]:
tf.reset_default_graph()
x = tf.constant([[1,2],
             [2,4],
             [3,4]], dtype=tf.float32)
x = [x, x, x]
w = tf.get_variable('weights',
                    initializer=tf.ones(shape=[6, 4]),
                    dtype=tf.float32)
scope = tf.get_variable_scope()
scope.reuse_variables()

In [12]:
x_mapped = _linear(
    args=x,
    output_size=4,
    bias=False)

In [13]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
x_mapped.eval()



array([[ 9.,  9.,  9.,  9.],
       [18., 18., 18., 18.],
       [21., 21., 21., 21.]], dtype=float32)

##### `_linear` works as below.

    1) Concatenate all the input by axis=1
    
    2) Multiply weight to all rows with broadcasting

In [14]:
[x_.eval() for x_ in x]

[array([[1., 2.],
        [2., 4.],
        [3., 4.]], dtype=float32), array([[1., 2.],
        [2., 4.],
        [3., 4.]], dtype=float32), array([[1., 2.],
        [2., 4.],
        [3., 4.]], dtype=float32)]

In [15]:
tf.concat(x, 1).eval()

array([[1., 2., 1., 2., 1., 2.],
       [2., 4., 2., 4., 2., 4.],
       [3., 4., 3., 4., 3., 4.]], dtype=float32)

In [16]:
tf.matmul(tf.concat(x, 1), w).eval()

array([[ 9.,  9.,  9.,  9.],
       [18., 18., 18., 18.],
       [21., 21., 21., 21.]], dtype=float32)