In [17]:
import tensorflow as tf
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
from tensorflow.python.ops import array_ops
from tensorflow.python.util import nest
from tensorflow.contrib.rnn import RNNCell
import time

_BIAS_VARIABLE_NAME = "biases"
_WEIGHTS_VARIABLE_NAME = "weights"


In [18]:
class GRUCell(core_rnn_cell_impl.RNNCell):
  """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
  Args:
    num_units: int, The number of units in the GRU cell.
    activation: Nonlinearity to use.  Default: `tanh`.
    reuse: (optional) Python boolean describing whether to reuse variables
     in an existing scope.  If not `True`, and the existing scope already has
     the given variables, an error is raised.
    kernel_initializer: (optional) The initializer to use for the weight and
    projection matrices.
    bias_initializer: (optional) The initializer to use for the bias.
    name: String, the name of the layer. Layers with the same name will
      share weights, but to avoid mistakes we require reuse=True in such
      cases.
  """

  def __init__(self,
               num_units,
               activation=None,
               reuse=None,
               kernel_initializer=None,
               bias_initializer=None,
               name=None):
    super(GRUCell, self).__init__(_reuse=reuse, name=name)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)

    self._num_units = num_units
    self._activation = activation or math_ops.tanh
    self._kernel_initializer = kernel_initializer
    self._bias_initializer = bias_initializer

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    self._gate_kernel = self.add_variable(
        "gates/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + self._num_units, 2 * self._num_units],
        initializer=self._kernel_initializer)
    self._gate_bias = self.add_variable(
        "gates/%s" % _BIAS_VARIABLE_NAME,
        shape=[2 * self._num_units],
        initializer=(
            self._bias_initializer
            if self._bias_initializer is not None
            else init_ops.constant_initializer(1.0, dtype=self.dtype)))
    self._candidate_kernel = self.add_variable(
        "candidate/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + self._num_units, self._num_units],
        initializer=self._kernel_initializer)
    self._candidate_bias = self.add_variable(
        "candidate/%s" % _BIAS_VARIABLE_NAME,
        shape=[self._num_units],
        initializer=(
            self._bias_initializer
            if self._bias_initializer is not None
            else init_ops.zeros_initializer(dtype=self.dtype)))

    self.built = True

  def call(self, inputs, state):
    """Gated recurrent unit (GRU) with nunits cells."""

    gate_inputs = math_ops.matmul(
        array_ops.concat([inputs, state], 1), self._gate_kernel)
    gate_inputs = nn_ops.bias_add(gate_inputs, self._gate_bias)

    value = math_ops.sigmoid(gate_inputs)
    r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

    r_state = r * state

    candidate = math_ops.matmul(
        array_ops.concat([inputs, r_state], 1), self._candidate_kernel)
    candidate = nn_ops.bias_add(candidate, self._candidate_bias)

    c = self._activation(candidate)
    new_h = u * state + (1 - u) * c
    return new_h, new_h



In [19]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()
    
    
batch_sz = 8
n_timesteps = 12
n_hidden = 512
n_inputs = 513 #f
n_outputs = n_inputs

def build_gru_single_layer():
    reset_graph()

    x = tf.placeholder(tf.float32, [None, n_timesteps, n_inputs])
    y = tf.placeholder(tf.float32, [None, n_timesteps, n_outputs])
    init_state = tf.placeholder(tf.float32, [None, n_hidden])

    # Unpack columns
    rnn_inputs = tf.unstack(x, axis=1) # <b arrays of shape (t,f)>. Converted like this for iteration
    rnn_target_outputs = tf.unstack(y, axis=1) # <b arrays of shape (t,o)>


    gru_cell = GRUCell(n_hidden)
    hidden_l1_outputs, hidden_l1_final_state = tf.contrib.rnn.static_rnn(gru_cell, rnn_inputs, 
                                                                         initial_state=init_state)
    

    with tf.variable_scope('output_layer'):
        W_hy = tf.get_variable('W_hy', [n_hidden, n_outputs])
        b_y = tf.get_variable('b_y', [n_outputs], initializer=tf.constant_initializer(0.0))

    pre_activation_outputs = [tf.matmul(hidden_l1_output, W_hy) + b_y for hidden_l1_output in hidden_l1_outputs]
    post_activation_outputs = [tf.nn.sigmoid(pre_activation_output) for pre_activation_output in pre_activation_outputs]
    post_activation_outputs = tf.stack(post_activation_outputs, axis=1) # <n, t, o>

    squared_losses = tf.pow(tf.subtract(y, post_activation_outputs), 2) # <n,t,o>
    sum_of_squared_losses = tf.reduce_mean(squared_losses)
    optimizer = tf.train.AdagradOptimizer(0.1).minimize(sum_of_squared_losses)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        predictions = post_activation_outputs,
        total_loss = sum_of_squared_losses,
        optimizer = optimizer
    )

def test_weight_compressed_gru():
    # Build Computational Graph

    t = time.time()
    g = build_gru_single_layer()
    print("It took", time.time() - t, "seconds to build the graph.")
    for item in tf.trainable_variables():
        print(item)

In [20]:
test_gru()

TypeError: object.__init__() takes no parameters