Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BasicLSTMCell implementation for now.
- Loading branch information
philipperemy
committed
Dec 15, 2016
1 parent
0388bc5
commit 3f1f355
Showing
6 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import numpy as np | ||
from numpy.random import uniform | ||
|
||
|
||
def next_batch(bs): | ||
""" | ||
Modify this function to ingest your data and returns it. | ||
:return: (inputs, targets). Could be a python generator. | ||
""" | ||
x = np.array(uniform(size=(bs, 16, 1)), dtype='float32') | ||
y = np.mean(x, axis=1) | ||
return np.array(x, dtype='float32'), np.array(np.reshape(y, (bs, 1, 1)), dtype='float32') | ||
|
||
|
||
if __name__ == '__main__': | ||
print(next_batch(4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .file_logger import FileLogger |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
class FileLogger(object): | ||
def __init__(self, full_filename, headers): | ||
self._headers = headers | ||
self._out_fp = open(full_filename, 'w') | ||
self._write(headers) | ||
|
||
def write(self, line): | ||
assert len(line) == len(self._headers) | ||
self._write(line) | ||
|
||
def close(self): | ||
self._out_fp.close() | ||
|
||
def _write(self, arr): | ||
arr = [str(e) for e in arr] | ||
self._out_fp.write(' '.join(arr) + '\n') | ||
self._out_fp.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import collections | ||
|
||
import numpy as np | ||
import tensorflow as tf | ||
from tensorflow.python.ops.rnn import dynamic_rnn | ||
|
||
from data_reader import next_batch | ||
from helpers import FileLogger | ||
from ml_utils import create_adam_optimizer | ||
from phased_lstm import PhasedLSTMCell | ||
|
||
|
||
def main(): | ||
batch_size = 4 | ||
hidden_size = 32 | ||
learning_rate = 1e-3 | ||
momentum = 0.9 | ||
|
||
file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss']) | ||
|
||
x = tf.placeholder(tf.float32, (None, 16, 1)) | ||
y = tf.placeholder(tf.float32, (None, 1, 1)) | ||
|
||
lstm = PhasedLSTMCell(hidden_size) | ||
|
||
initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1), | ||
tf.random_normal([batch_size, hidden_size], stddev=0.1)) | ||
|
||
outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32) | ||
_, final_hidden = state | ||
|
||
loss = tf.reduce_mean(tf.square(tf.sub(outputs, y))) | ||
optimizer = create_adam_optimizer(learning_rate, momentum) | ||
trainable = tf.trainable_variables() | ||
grad_update = optimizer.minimize(loss, var_list=trainable) | ||
|
||
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) | ||
init = tf.global_variables_initializer() | ||
sess.run(init) | ||
|
||
d = collections.deque(maxlen=10) | ||
benchmark_d = collections.deque(maxlen=10) | ||
for step in range(1, int(1e9)): | ||
x_s, y_s = next_batch(batch_size) | ||
loss_value, _, pred_value = sess.run([loss, grad_update, outputs], | ||
feed_dict={x: x_s, y: y_s}) | ||
|
||
# The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark. | ||
benchmark_d.append(np.mean(np.square(0.5 - y_s))) | ||
d.append(loss_value) | ||
mean_loss = np.mean(d) | ||
benchmark_mean_loss = np.mean(benchmark_d) | ||
file_logger.write([step, mean_loss, benchmark_mean_loss]) | ||
file_logger.close() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import tensorflow as tf | ||
|
||
|
||
def create_convolution_variable(name, shape): | ||
initializer = tf.contrib.layers.xavier_initializer_conv2d() | ||
variable = tf.Variable(initializer(shape=shape), name=name) | ||
return variable | ||
|
||
|
||
def create_bias_variable(name, shape): | ||
initializer = tf.constant_initializer(value=0.0, dtype=tf.float32) | ||
return tf.Variable(initializer(shape=shape), name) | ||
|
||
|
||
def create_adam_optimizer(learning_rate, momentum): | ||
return tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from tensorflow.python.ops import array_ops, math_ops, init_ops | ||
from tensorflow.python.ops import variable_scope as vs | ||
from tensorflow.python.ops.math_ops import sigmoid | ||
from tensorflow.python.ops.math_ops import tanh | ||
from tensorflow.python.ops.rnn_cell import RNNCell | ||
from tensorflow.python.util import nest | ||
|
||
|
||
# this is going to change with v0.13 | ||
|
||
def _linear(args, output_size, bias, bias_start=0.0, scope=None): | ||
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. | ||
Args: | ||
args: a 2D Tensor or a list of 2D, batch x n, Tensors. | ||
output_size: int, second dimension of W[i]. | ||
bias: boolean, whether to add a bias term or not. | ||
bias_start: starting value to initialize the bias; 0 by default. | ||
scope: VariableScope for the created subgraph; defaults to "Linear". | ||
Returns: | ||
A 2D Tensor with shape [batch x output_size] equal to | ||
sum_i(args[i] * W[i]), where W[i]s are newly created matrices. | ||
Raises: | ||
ValueError: if some of the arguments has unspecified or wrong shape. | ||
""" | ||
if args is None or (nest.is_sequence(args) and not args): | ||
raise ValueError("`args` must be specified") | ||
if not nest.is_sequence(args): | ||
args = [args] | ||
|
||
# Calculate the total size of arguments on dimension 1. | ||
total_arg_size = 0 | ||
shapes = [a.get_shape().as_list() for a in args] | ||
for shape in shapes: | ||
if len(shape) != 2: | ||
raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes)) | ||
if not shape[1]: | ||
raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes)) | ||
else: | ||
total_arg_size += shape[1] | ||
|
||
dtype = [a.dtype for a in args][0] | ||
|
||
# Now the computation. | ||
with vs.variable_scope(scope or "Linear"): | ||
matrix = vs.get_variable( | ||
"Matrix", [total_arg_size, output_size], dtype=dtype) | ||
if len(args) == 1: | ||
res = math_ops.matmul(args[0], matrix) | ||
else: | ||
res = math_ops.matmul(array_ops.concat(1, args), matrix) | ||
if not bias: | ||
return res | ||
bias_term = vs.get_variable( | ||
"Bias", [output_size], | ||
dtype=dtype, | ||
initializer=init_ops.constant_initializer( | ||
bias_start, dtype=dtype)) | ||
return res + bias_term | ||
|
||
|
||
class PhasedLSTMCell(RNNCell): | ||
def __init__(self, num_units, activation=tanh): | ||
self._num_units = num_units | ||
self._activation = activation | ||
|
||
@property | ||
def state_size(self): | ||
return self._num_units, self._num_units | ||
|
||
@property | ||
def output_size(self): | ||
return self._num_units | ||
|
||
def __call__(self, inputs, state, scope=None): | ||
"""Long short-term memory cell (LSTM).""" | ||
with vs.variable_scope(scope or type(self).__name__): | ||
# Parameters of gates are concatenated into one multiply for efficiency. | ||
c, h = state | ||
concat = _linear([inputs, h], 4 * self._num_units, True) | ||
# i = input_gate, j = new_input, f = forget_gate, o = output_gate | ||
i, j, f, o = array_ops.split(1, 4, concat) | ||
new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j)) | ||
new_h = self._activation(new_c) * sigmoid(o) | ||
new_state = (new_c, new_h) | ||
return new_h, new_state |