Skip to content

Commit

Permalink
BasicLSTMCell implementation for now.
Browse files Browse the repository at this point in the history
  • Loading branch information
philipperemy committed Dec 15, 2016
1 parent 0388bc5 commit 3f1f355
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 0 deletions.
16 changes: 16 additions & 0 deletions data_reader.py
@@ -0,0 +1,16 @@
import numpy as np
from numpy.random import uniform


def next_batch(bs):
"""
Modify this function to ingest your data and returns it.
:return: (inputs, targets). Could be a python generator.
"""
x = np.array(uniform(size=(bs, 16, 1)), dtype='float32')
y = np.mean(x, axis=1)
return np.array(x, dtype='float32'), np.array(np.reshape(y, (bs, 1, 1)), dtype='float32')


if __name__ == '__main__':
print(next_batch(4))
1 change: 1 addition & 0 deletions helpers/__init__.py
@@ -0,0 +1 @@
from .file_logger import FileLogger
17 changes: 17 additions & 0 deletions helpers/file_logger.py
@@ -0,0 +1,17 @@
class FileLogger(object):
def __init__(self, full_filename, headers):
self._headers = headers
self._out_fp = open(full_filename, 'w')
self._write(headers)

def write(self, line):
assert len(line) == len(self._headers)
self._write(line)

def close(self):
self._out_fp.close()

def _write(self, arr):
arr = [str(e) for e in arr]
self._out_fp.write(' '.join(arr) + '\n')
self._out_fp.flush()
58 changes: 58 additions & 0 deletions main.py
@@ -0,0 +1,58 @@
import collections

import numpy as np
import tensorflow as tf
from tensorflow.python.ops.rnn import dynamic_rnn

from data_reader import next_batch
from helpers import FileLogger
from ml_utils import create_adam_optimizer
from phased_lstm import PhasedLSTMCell


def main():
batch_size = 4
hidden_size = 32
learning_rate = 1e-3
momentum = 0.9

file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss'])

x = tf.placeholder(tf.float32, (None, 16, 1))
y = tf.placeholder(tf.float32, (None, 1, 1))

lstm = PhasedLSTMCell(hidden_size)

initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1),
tf.random_normal([batch_size, hidden_size], stddev=0.1))

outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32)
_, final_hidden = state

loss = tf.reduce_mean(tf.square(tf.sub(outputs, y)))
optimizer = create_adam_optimizer(learning_rate, momentum)
trainable = tf.trainable_variables()
grad_update = optimizer.minimize(loss, var_list=trainable)

sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
init = tf.global_variables_initializer()
sess.run(init)

d = collections.deque(maxlen=10)
benchmark_d = collections.deque(maxlen=10)
for step in range(1, int(1e9)):
x_s, y_s = next_batch(batch_size)
loss_value, _, pred_value = sess.run([loss, grad_update, outputs],
feed_dict={x: x_s, y: y_s})

# The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark.
benchmark_d.append(np.mean(np.square(0.5 - y_s)))
d.append(loss_value)
mean_loss = np.mean(d)
benchmark_mean_loss = np.mean(benchmark_d)
file_logger.write([step, mean_loss, benchmark_mean_loss])
file_logger.close()


if __name__ == '__main__':
main()
16 changes: 16 additions & 0 deletions ml_utils.py
@@ -0,0 +1,16 @@
import tensorflow as tf


def create_convolution_variable(name, shape):
initializer = tf.contrib.layers.xavier_initializer_conv2d()
variable = tf.Variable(initializer(shape=shape), name=name)
return variable


def create_bias_variable(name, shape):
initializer = tf.constant_initializer(value=0.0, dtype=tf.float32)
return tf.Variable(initializer(shape=shape), name)


def create_adam_optimizer(learning_rate, momentum):
return tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4)
88 changes: 88 additions & 0 deletions phased_lstm.py
@@ -0,0 +1,88 @@
from tensorflow.python.ops import array_ops, math_ops, init_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops.math_ops import sigmoid
from tensorflow.python.ops.math_ops import tanh
from tensorflow.python.ops.rnn_cell import RNNCell
from tensorflow.python.util import nest


# this is going to change with v0.13

def _linear(args, output_size, bias, bias_start=0.0, scope=None):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
if args is None or (nest.is_sequence(args) and not args):
raise ValueError("`args` must be specified")
if not nest.is_sequence(args):
args = [args]

# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape().as_list() for a in args]
for shape in shapes:
if len(shape) != 2:
raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
if not shape[1]:
raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
else:
total_arg_size += shape[1]

dtype = [a.dtype for a in args][0]

# Now the computation.
with vs.variable_scope(scope or "Linear"):
matrix = vs.get_variable(
"Matrix", [total_arg_size, output_size], dtype=dtype)
if len(args) == 1:
res = math_ops.matmul(args[0], matrix)
else:
res = math_ops.matmul(array_ops.concat(1, args), matrix)
if not bias:
return res
bias_term = vs.get_variable(
"Bias", [output_size],
dtype=dtype,
initializer=init_ops.constant_initializer(
bias_start, dtype=dtype))
return res + bias_term


class PhasedLSTMCell(RNNCell):
def __init__(self, num_units, activation=tanh):
self._num_units = num_units
self._activation = activation

@property
def state_size(self):
return self._num_units, self._num_units

@property
def output_size(self):
return self._num_units

def __call__(self, inputs, state, scope=None):
"""Long short-term memory cell (LSTM)."""
with vs.variable_scope(scope or type(self).__name__):
# Parameters of gates are concatenated into one multiply for efficiency.
c, h = state
concat = _linear([inputs, h], 4 * self._num_units, True)
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
i, j, f, o = array_ops.split(1, 4, concat)
new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j))
new_h = self._activation(new_c) * sigmoid(o)
new_state = (new_c, new_h)
return new_h, new_state

0 comments on commit 3f1f355

Please sign in to comment.