BasicLSTMCell implementation for now.

philipperemy · Dec 15, 2016 · 3f1f355 · 3f1f355
1 parent 0388bc5
commit 3f1f355
Show file tree

Hide file tree

Showing 6 changed files with 196 additions and 0 deletions.
diff --git a/data_reader.py b/data_reader.py
@@ -0,0 +1,16 @@
+import numpy as np
+from numpy.random import uniform
+
+
+def next_batch(bs):
+    """
+    Modify this function to ingest your data and returns it.
+    :return: (inputs, targets). Could be a python generator.
+    """
+    x = np.array(uniform(size=(bs, 16, 1)), dtype='float32')
+    y = np.mean(x, axis=1)
+    return np.array(x, dtype='float32'), np.array(np.reshape(y, (bs, 1, 1)), dtype='float32')
+
+
+if __name__ == '__main__':
+    print(next_batch(4))
diff --git a/helpers/__init__.py b/helpers/__init__.py
@@ -0,0 +1 @@
+from .file_logger import FileLogger
diff --git a/helpers/file_logger.py b/helpers/file_logger.py
@@ -0,0 +1,17 @@
+class FileLogger(object):
+    def __init__(self, full_filename, headers):
+        self._headers = headers
+        self._out_fp = open(full_filename, 'w')
+        self._write(headers)
+
+    def write(self, line):
+        assert len(line) == len(self._headers)
+        self._write(line)
+
+    def close(self):
+        self._out_fp.close()
+
+    def _write(self, arr):
+        arr = [str(e) for e in arr]
+        self._out_fp.write(' '.join(arr) + '\n')
+        self._out_fp.flush()
diff --git a/main.py b/main.py
@@ -0,0 +1,58 @@
+import collections
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.ops.rnn import dynamic_rnn
+
+from data_reader import next_batch
+from helpers import FileLogger
+from ml_utils import create_adam_optimizer
+from phased_lstm import PhasedLSTMCell
+
+
+def main():
+    batch_size = 4
+    hidden_size = 32
+    learning_rate = 1e-3
+    momentum = 0.9
+
+    file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss'])
+
+    x = tf.placeholder(tf.float32, (None, 16, 1))
+    y = tf.placeholder(tf.float32, (None, 1, 1))
+
+    lstm = PhasedLSTMCell(hidden_size)
+
+    initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1),
+                     tf.random_normal([batch_size, hidden_size], stddev=0.1))
+
+    outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32)
+    _, final_hidden = state
+
+    loss = tf.reduce_mean(tf.square(tf.sub(outputs, y)))
+    optimizer = create_adam_optimizer(learning_rate, momentum)
+    trainable = tf.trainable_variables()
+    grad_update = optimizer.minimize(loss, var_list=trainable)
+
+    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
+    init = tf.global_variables_initializer()
+    sess.run(init)
+
+    d = collections.deque(maxlen=10)
+    benchmark_d = collections.deque(maxlen=10)
+    for step in range(1, int(1e9)):
+        x_s, y_s = next_batch(batch_size)
+        loss_value, _, pred_value = sess.run([loss, grad_update, outputs],
+                                             feed_dict={x: x_s, y: y_s})
+
+        # The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark.
+        benchmark_d.append(np.mean(np.square(0.5 - y_s)))
+        d.append(loss_value)
+        mean_loss = np.mean(d)
+        benchmark_mean_loss = np.mean(benchmark_d)
+        file_logger.write([step, mean_loss, benchmark_mean_loss])
+    file_logger.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ml_utils.py b/ml_utils.py
@@ -0,0 +1,16 @@
+import tensorflow as tf
+
+
+def create_convolution_variable(name, shape):
+    initializer = tf.contrib.layers.xavier_initializer_conv2d()
+    variable = tf.Variable(initializer(shape=shape), name=name)
+    return variable
+
+
+def create_bias_variable(name, shape):
+    initializer = tf.constant_initializer(value=0.0, dtype=tf.float32)
+    return tf.Variable(initializer(shape=shape), name)
+
+
+def create_adam_optimizer(learning_rate, momentum):
+    return tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4)
diff --git a/phased_lstm.py b/phased_lstm.py
@@ -0,0 +1,88 @@
+from tensorflow.python.ops import array_ops, math_ops, init_ops
+from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops.math_ops import sigmoid
+from tensorflow.python.ops.math_ops import tanh
+from tensorflow.python.ops.rnn_cell import RNNCell
+from tensorflow.python.util import nest
+
+
+# this is going to change with v0.13
+
+def _linear(args, output_size, bias, bias_start=0.0, scope=None):
+    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
+
+    Args:
+      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
+      output_size: int, second dimension of W[i].
+      bias: boolean, whether to add a bias term or not.
+      bias_start: starting value to initialize the bias; 0 by default.
+      scope: VariableScope for the created subgraph; defaults to "Linear".
+
+    Returns:
+      A 2D Tensor with shape [batch x output_size] equal to
+      sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
+
+    Raises:
+      ValueError: if some of the arguments has unspecified or wrong shape.
+    """
+    if args is None or (nest.is_sequence(args) and not args):
+        raise ValueError("`args` must be specified")
+    if not nest.is_sequence(args):
+        args = [args]
+
+    # Calculate the total size of arguments on dimension 1.
+    total_arg_size = 0
+    shapes = [a.get_shape().as_list() for a in args]
+    for shape in shapes:
+        if len(shape) != 2:
+            raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
+        if not shape[1]:
+            raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
+        else:
+            total_arg_size += shape[1]
+
+    dtype = [a.dtype for a in args][0]
+
+    # Now the computation.
+    with vs.variable_scope(scope or "Linear"):
+        matrix = vs.get_variable(
+            "Matrix", [total_arg_size, output_size], dtype=dtype)
+        if len(args) == 1:
+            res = math_ops.matmul(args[0], matrix)
+        else:
+            res = math_ops.matmul(array_ops.concat(1, args), matrix)
+        if not bias:
+            return res
+        bias_term = vs.get_variable(
+            "Bias", [output_size],
+            dtype=dtype,
+            initializer=init_ops.constant_initializer(
+                bias_start, dtype=dtype))
+    return res + bias_term
+
+
+class PhasedLSTMCell(RNNCell):
+    def __init__(self, num_units, activation=tanh):
+        self._num_units = num_units
+        self._activation = activation
+
+    @property
+    def state_size(self):
+        return self._num_units, self._num_units
+
+    @property
+    def output_size(self):
+        return self._num_units
+
+    def __call__(self, inputs, state, scope=None):
+        """Long short-term memory cell (LSTM)."""
+        with vs.variable_scope(scope or type(self).__name__):
+            # Parameters of gates are concatenated into one multiply for efficiency.
+            c, h = state
+            concat = _linear([inputs, h], 4 * self._num_units, True)
+            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
+            i, j, f, o = array_ops.split(1, 4, concat)
+            new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j))
+            new_h = self._activation(new_c) * sigmoid(o)
+            new_state = (new_c, new_h)
+            return new_h, new_state