In [1]:
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


1. W = 0, U = 1 and b=0. Assuming, each numeric value is input at time 't', These weights ensure that no matter what the history may be, input value is always considered. But each input is a sequence. This doesn't answer the question.

Input: [[0],
       [1],
       [0,0],
       [1,0],
       [0,0,0],
       [1,0,0]]

The correct answer is: U=1; W=1 and b=0. The intent is to remember the state of the history from the first input and not change it later. This RNN weight scheme tells that equal importance to be given to input and history.

2. GRU weight scheme to remeber history from the first input and not reset it with the subsequent inputs: U_z = 0; W_z = 1 and U_h = 1, given W_r and Z_r -0 and all biases to be 0. Ti doesn't matter what value W_h is because, it it gets multiplied by the all zero r_t.

2.. Switching state when input = 1: The reason it may not be modeled with 1-d input is that h_t depends both on x_t and h_t-1.
This inidcates that same importance must be assigned to the input and hidden state. However, this alwyas creates either positive o negative input irrespective of the bias value, previous state and current input.

In [2]:
from __future__ import absolute_import
from __future__ import division

import argparse
import logging
import sys

import tensorflow as tf
import numpy as np

In [3]:
logger = logging.getLogger("hw3.q3.1")
logger.setLevel(logging.DEBUG)
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)


In [20]:
class GRUCell(tf.nn.rnn_cell.RNNCell):
    """Wrapper around our GRU cell implementation that allows us to play
    nicely with TensorFlow.
    """
    def __init__(self, input_size, state_size):
        self.input_size = input_size
        self._state_size = state_size

    @property
    def state_size(self):
        return self._state_size

    @property
    def output_size(self):
        return self._state_size

    def __call__(self, inputs, state, scope=None):
        """Updates the state using the previous @state and @inputs.
        Remember the GRU equations are:

        z_t = sigmoid(x_t U_z + h_{t-1} W_z + b_z)
        r_t = sigmoid(x_t U_r + h_{t-1} W_r + b_r)
        o_t = tanh(x_t U_o + r_t * h_{t-1} W_o + b_o)
        h_t = z_t * h_{t-1} + (1 - z_t) * o_t

        TODO: In the code below, implement an GRU cell using @inputs
        (x_t above) and the state (h_{t-1} above).
            - Define W_r, U_r, b_r, W_z, U_z, b_z and W_o, U_o, b_o to
              be variables of the apporiate shape using the
              `tf.get_variable' functions.
            - Compute z, r, o and @new_state (h_t) defined above
        Tips:
            - Remember to initialize your matrices using the xavier
              initialization as before.
        Args:
            inputs: is the input vector of size [None, self.input_size]
            state: is the previous state vector of size [None, self.state_size]
            scope: is the name of the scope to be used when defining the variables inside.
        Returns:
            a pair of the output vector and the new state vector.
        """
        scope = scope or type(self).__name__

        # It's always a good idea to scope variables in functions lest they
        # be defined elsewhere!
        with tf.variable_scope(scope):
            ### YOUR CODE HERE (~20-30 lines)
            # Update gate
            U_z = tf.get_variable(name="U_z",shape=(self.input_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            W_z = tf.get_variable(name="W_z",shape=(self._state_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            b_z = tf.get_variable(name="b_z",shape=(self._state_size,),dtype=tf.float32,initializer=tf.initializers.zeros())
            
            # Reset gate
            U_r = tf.get_variable(name="U_r",shape=(self.input_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            W_r = tf.get_variable(name="W_r",shape=(self._state_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            b_r = tf.get_variable(name="b_r",shape=(self._state_size,),dtype=tf.float32,initializer=tf.initializers.zeros())
            
            # Intermediate hidden state
            U_o = tf.get_variable(name="U_o",shape=(self.input_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            W_o = tf.get_variable(name="W_o",shape=(self._state_size,self._state_size),dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer())
            b_o = tf.get_variable(name="b_o",shape=(self._state_size,),dtype=tf.float32,initializer=tf.initializers.zeros())
            
            # Compute update_gate value
            z_t = tf.nn.sigmoid(tf.matmul(inputs,U_z) + tf.matmul(state,W_z) + b_z)
            
            # Compute reset_gate value
            r_t = tf.nn.sigmoid(tf.matmul(inputs,U_r) + tf.matmul(state,W_r) + b_r)
            
            # Compute intermediate hidden state value
            o_t = tf.nn.tanh(tf.matmul(inputs,U_r) + tf.math.multiply(r_t,tf.matmul(state,W_r))  + b_r)
            
            # Final hidden state computation
            update_gate_shape = tf.shape(z_t)
            one_m_z_t = tf.ones(shape = (update_gate_shape[0],self._state_size),name="1_m_z_t") - z_t
            new_state = tf.math.multiply(z_t,state) + tf.math.multiply(one_m_z_t, o_t)
            
            ### END YOUR CODE ###
        # For a GRU, the output and state are the same (N.B. this isn't true
        # for an LSTM, though we aren't using one of those in our
        # assignment)
        output = new_state
        return output, new_state
    

In [21]:
def test_gru_cell():
    with tf.Graph().as_default():
        with tf.variable_scope("test_gru_cell"):
            x_placeholder = tf.placeholder(tf.float32, shape=(None,3))
            h_placeholder = tf.placeholder(tf.float32, shape=(None,2))

            with tf.variable_scope("gru"):
                tf.get_variable("U_r", initializer=np.array(np.eye(3,2), dtype=np.float32))
                tf.get_variable("W_r", initializer=np.array(np.eye(2,2), dtype=np.float32))
                tf.get_variable("b_r",  initializer=np.array(np.ones(2), dtype=np.float32))
                tf.get_variable("U_z", initializer=np.array(np.eye(3,2), dtype=np.float32))
                tf.get_variable("W_z", initializer=np.array(np.eye(2,2), dtype=np.float32))
                tf.get_variable("b_z",  initializer=np.array(np.ones(2), dtype=np.float32))
                tf.get_variable("U_o", initializer=np.array(np.eye(3,2), dtype=np.float32))
                tf.get_variable("W_o", initializer=np.array(np.eye(2,2), dtype=np.float32))
                tf.get_variable("b_o",  initializer=np.array(np.ones(2), dtype=np.float32))

            tf.get_variable_scope().reuse_variables()
            cell = GRUCell(3, 2)
            y_var, ht_var = cell(x_placeholder, h_placeholder, scope="gru")

            init = tf.global_variables_initializer()
            with tf.Session() as session:
                session.run(init)
                x = np.array([
                    [0.4, 0.5, 0.6],
                    [0.3, -0.2, -0.1]], dtype=np.float32)
                h = np.array([
                    [0.2, 0.5],
                    [-0.3, -0.3]], dtype=np.float32)
                y = np.array([
                    [ 0.320, 0.555],
                    [-0.006, 0.020]], dtype=np.float32)
                ht = y

                y_, ht_ = session.run([y_var, ht_var], feed_dict={x_placeholder: x, h_placeholder: h})
                print("y_ = " + str(y_))
                print("ht_ = " + str(ht_))

                assert np.allclose(y_, ht_), "output and state should be equal."
                assert np.allclose(ht, ht_, atol=1e-2), "new state vector does not seem to be correct."


In [22]:
def do_test(_):
    logger.info("Testing gru_cell")
    test_gru_cell()
    logger.info("Passed!")


In [23]:
do_test(" ")

INFO:Testing gru_cell
INFO:Passed!


y_ = [[ 0.32035077  0.55478156]
 [-0.00592546  0.0195577 ]]
ht_ = [[ 0.32035077  0.55478156]
 [-0.00592546  0.0195577 ]]
