In [17]:
import tensorflow as tf
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
from tensorflow.python.ops import array_ops
from tensorflow.python.util import nest
from tensorflow.contrib.rnn import RNNCell
import time

_BIAS_VARIABLE_NAME = "biases"
_WEIGHTS_VARIABLE_NAME = "weights"


In [18]:
class BasicLSTMCell(RNNCell):
  """Basic LSTM recurrent network cell.
  The implementation is based on: http://arxiv.org/abs/1409.2329.
  We add forget_bias (default: 1) to the biases of the forget gate in order to
  reduce the scale of forgetting in the beginning of the training.
  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.
  For advanced models, please use the full @{tf.nn.rnn_cell.LSTMCell}
  that follows.
  """

  def __init__(self, num_units, forget_bias=1.0,
               state_is_tuple=True, activation=None, reuse=None):
    """Initialize the basic LSTM cell.
    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    """
    super(BasicLSTMCell, self).__init__(_reuse=reuse)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    self._num_units = num_units
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh

  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)

  @property
  def output_size(self):
    return self._num_units

  def call(self, inputs, state):
    """Peephole Long short-term memory cell (LSTM)."""
    sigmoid = math_ops.sigmoid
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

    concat = _linear([inputs, h], 4 * self._num_units, True)
    
    with vs.variable_scope('peephole_conn'):
        w_f_diag = vs.get_variable(
              "w_f_diag", shape=[self._num_units], dtype=dtype)
        w_i_diag = vs.get_variable(
              "w_i_diag", shape=[self._num_units], dtype=dtype)
        w_o_diag = vs.get_variable(
              "w_o_diag", shape=[self._num_units], dtype=dtype)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

    new_c = (
        c * sigmoid(f + w_f_diag * c) + sigmoid(i + w_i_diag * c) * self._activation(j))
    new_h = self._activation(new_c) * sigmoid(o + w_o_diag * c)

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state

In [19]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()
    
    
batch_sz = 8
n_timesteps = 12
n_hidden = 512
n_inputs = 513 #f
n_outputs = n_inputs

def build_gru_single_layer():
    reset_graph()

    x = tf.placeholder(tf.float32, [None, n_timesteps, n_inputs])
    y = tf.placeholder(tf.float32, [None, n_timesteps, n_outputs])
    init_state = tf.placeholder(tf.float32, [None, n_hidden])

    # Unpack columns
    rnn_inputs = tf.unstack(x, axis=1) # <b arrays of shape (t,f)>. Converted like this for iteration
    rnn_target_outputs = tf.unstack(y, axis=1) # <b arrays of shape (t,o)>


    gru_cell = GRUCell(n_hidden)
    hidden_l1_outputs, hidden_l1_final_state = tf.contrib.rnn.static_rnn(gru_cell, rnn_inputs, 
                                                                         initial_state=init_state)
    

    with tf.variable_scope('output_layer'):
        W_hy = tf.get_variable('W_hy', [n_hidden, n_outputs])
        b_y = tf.get_variable('b_y', [n_outputs], initializer=tf.constant_initializer(0.0))

    pre_activation_outputs = [tf.matmul(hidden_l1_output, W_hy) + b_y for hidden_l1_output in hidden_l1_outputs]
    post_activation_outputs = [tf.nn.sigmoid(pre_activation_output) for pre_activation_output in pre_activation_outputs]
    post_activation_outputs = tf.stack(post_activation_outputs, axis=1) # <n, t, o>

    squared_losses = tf.pow(tf.subtract(y, post_activation_outputs), 2) # <n,t,o>
    sum_of_squared_losses = tf.reduce_mean(squared_losses)
    optimizer = tf.train.AdagradOptimizer(0.1).minimize(sum_of_squared_losses)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        predictions = post_activation_outputs,
        total_loss = sum_of_squared_losses,
        optimizer = optimizer
    )

def test_weight_compressed_gru():
    # Build Computational Graph

    t = time.time()
    g = build_gru_single_layer()
    print("It took", time.time() - t, "seconds to build the graph.")
    for item in tf.trainable_variables():
        print(item)

In [20]:
test_gru()

TypeError: object.__init__() takes no parameters