Skip to content
This repository has been archived by the owner on Mar 19, 2021. It is now read-only.

Commit

Permalink
Switch to the Keras LSTM/GRU implementation
Browse files Browse the repository at this point in the history
Recent versions of Tensorflow Keras will automatically switch
between cuDNN and Tensorflow implementations. The trained parameters
work regardless of the selected implementation.

The conditions for using the cuDNN implementation are documented at:

https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM

They boil down to: 1. a NVIDIA GPU is available, 2. certain hyper
parameters (e.g. activations) are set to specific values. If the cuDNN
implementation is selected, this results in a nice speedup.

The Tensorflow requirements are bumped to 1.15.0. This setup fails with
1.14.0 with a constant folding error in Grappler:

tensorflow/tensorflow#29525
  • Loading branch information
danieldk committed Nov 7, 2019
1 parent 8e5b956 commit 11b2d27
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 139 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ matrix:
python: 3.6
before_script:
- cd sticker-graph
- pip install tensorflow==1.14.0 toml
- pip install tensorflow==1.15.0 toml
script:
- python setup.py test
- ./sticker-write-rnn-graph --rnn_layers 2 --hidden_size 100 testdata/sticker.shapes rnn.graph
Expand Down
4 changes: 2 additions & 2 deletions sticker-graph/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
license='BlueOak-1.0.0',
tests_require=[
'numpy',
'tensorflow == 1.13.1',
'tensorflow == 1.15.0',
'toml',
],
install_requires=[
'tensorflow == 1.13.1',
'tensorflow == 1.15.0',
'toml',
],
packages=['sticker_graph'],
Expand Down
15 changes: 10 additions & 5 deletions sticker-graph/sticker_graph/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,16 @@ def subword_reprs(self):
byte_lens = tf.reshape(subword_lens, [-1])

with tf.compat.v1.variable_scope("byte_rnn"):
_, fw, bw = bidi_rnn_layers(self.is_training, byte_reprs, num_layers=self.args.subword_layers, output_size=self.args.subword_hidden_size,
output_keep_prob=self.args.subword_keep_prob, seq_lens=byte_lens, gru=self.args.subword_gru, residual_connections=self.args.subword_residual)

# Concat forward/backward states.
subword_reprs = tf.concat([fw[-1].h, bw[-1].h], axis=-1)
subword_reprs = bidi_rnn_layers(
self.is_training,
byte_reprs,
num_layers=self.args.subword_layers,
output_size=self.args.subword_hidden_size,
output_keep_prob=self.args.subword_keep_prob,
seq_lens=byte_lens,
gru=self.args.subword_gru,
residual_connections=self.args.subword_residual,
return_sequences=False)

return tf.reshape(subword_reprs, [bytes_shape[0], bytes_shape[1],
subword_reprs.shape[-1]])
Expand Down
89 changes: 43 additions & 46 deletions sticker-graph/sticker_graph/rnn.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,6 @@
import tensorflow as tf

import sticker_graph.vendored


def dropout_wrapper(
cell,
is_training,
output_keep_prob=1.0,
state_keep_prob=1.0):
output_keep_prob = tf.cond(
pred=is_training,
true_fn=lambda: tf.constant(output_keep_prob),
false_fn=lambda: tf.constant(1.0))
state_keep_prob = tf.cond(
pred=is_training,
true_fn=lambda: tf.constant(state_keep_prob),
false_fn=lambda: tf.constant(1.0))
return tf.compat.v1.nn.rnn_cell.DropoutWrapper(
cell,
output_keep_prob=output_keep_prob,
state_keep_prob=state_keep_prob)
from sticker_graph.keras_vendored import GRU, LSTM


def bidi_rnn_layers(
Expand All @@ -28,32 +9,48 @@ def bidi_rnn_layers(
num_layers=1,
output_size=50,
output_keep_prob=1.0,
state_keep_prob=1.0,
seq_lens=None,
gru=False,
residual_connections=False):
residual_connections=False,
return_sequences=True,
seq_lens=None):
if gru:
cell = tf.compat.v1.nn.rnn_cell.GRUCell
rnn_layer = GRU
else:
cell = tf.compat.v1.nn.rnn_cell.LSTMCell

fw_cells = [
dropout_wrapper(
cell=cell(output_size),
is_training=is_training,
state_keep_prob=state_keep_prob,
output_keep_prob=output_keep_prob) for i in range(num_layers)]

bw_cells = [
dropout_wrapper(
cell=cell(output_size),
is_training=is_training,
state_keep_prob=state_keep_prob,
output_keep_prob=output_keep_prob) for i in range(num_layers)]
return sticker_graph.vendored.stack_bidirectional_dynamic_rnn(
fw_cells,
bw_cells,
inputs,
dtype=tf.float32,
sequence_length=seq_lens,
residual_connections=residual_connections)
rnn_layer = LSTM

# Compute mask
mask = None
if seq_lens is not None:
mask = tf.sequence_mask(
seq_lens, maxlen=tf.shape(
inputs)[1])

layer = inputs
for i in range(num_layers):
# Keep a reference to the previous layer for residual connections.
prev_layer = layer

layer_return_sequences = True
if i == num_layers - 1:
layer_return_sequences = return_sequences

# Bidirectional RNN + state output dropout.
layer = tf.compat.v2.keras.layers.Bidirectional(
rnn_layer(
output_size,
return_sequences=layer_return_sequences))(
layer,
mask=mask)
layer = tf.compat.v2.keras.layers.Dropout(
1.0 -
output_keep_prob)(
layer,
training=is_training)

# Add a residual connection if requested. A residual connection
# is not added for the first layer, since input/output sizes
# may mismatch.
if i != 0 and residual_connections:
layer = layer + prev_layer

return layer
2 changes: 1 addition & 1 deletion sticker-graph/sticker_graph/rnn_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(

self.setup_placeholders()

hidden_states, _, _ = bidi_rnn_layers(
hidden_states = bidi_rnn_layers(
self.is_training,
self.inputs,
num_layers=args.rnn_layers,
Expand Down
84 changes: 0 additions & 84 deletions sticker-graph/sticker_graph/vendored.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,90 +17,6 @@
import tensorflow as tf


def stack_bidirectional_dynamic_rnn(cells_fw,
cells_bw,
inputs,
initial_states_fw=None,
initial_states_bw=None,
dtype=None,
sequence_length=None,
parallel_iterations=None,
time_major=False,
scope=None,
residual_connections=False):
"""
NOTE:
This is a modified copy of tf.contrib.rnn.stack_bidirectional_dynamic_rnn
that adds the option to have residual skip connections. It has been taken
from https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/contrib/rnn/python/ops/rnn.py.
If residual connections is True, the input of a layer is summed with the
output of the layer. In order to allow for inputs with other dimensionality
than that of the concatenated RNN states, the input to the first layer is
not summed to its output.
"""
if not cells_fw:
raise ValueError(
"Must specify at least one fw cell for BidirectionalRNN.")
if not cells_bw:
raise ValueError(
"Must specify at least one bw cell for BidirectionalRNN.")
if not isinstance(cells_fw, list):
raise ValueError(
"cells_fw must be a list of RNNCells (one per layer).")
if not isinstance(cells_bw, list):
raise ValueError(
"cells_bw must be a list of RNNCells (one per layer).")
if len(cells_fw) != len(cells_bw):
raise ValueError(
"Forward and Backward cells must have the same depth.")
if (initial_states_fw is not None and
(not isinstance(initial_states_fw, list) or
len(initial_states_fw) != len(cells_fw))):
raise ValueError(
"initial_states_fw must be a list of state tensors (one per layer).")
if (initial_states_bw is not None and
(not isinstance(initial_states_bw, list) or
len(initial_states_bw) != len(cells_bw))):
raise ValueError(
"initial_states_bw must be a list of state tensors (one per layer).")

states_fw = []
states_bw = []
prev_layer = inputs

with tf.compat.v1.variable_scope(scope or "stack_bidirectional_rnn"):
for i, (cell_fw, cell_bw) in enumerate(zip(cells_fw, cells_bw)):
initial_state_fw = None
initial_state_bw = None
if initial_states_fw:
initial_state_fw = initial_states_fw[i]
if initial_states_bw:
initial_state_bw = initial_states_bw[i]

with tf.compat.v1.variable_scope("cell_%d" % i):
shortcut = prev_layer
outputs, (state_fw, state_bw) = tf.compat.v1.nn.bidirectional_dynamic_rnn(
cell_fw,
cell_bw,
prev_layer,
initial_state_fw=initial_state_fw,
initial_state_bw=initial_state_bw,
sequence_length=sequence_length,
parallel_iterations=parallel_iterations,
dtype=dtype,
time_major=time_major)
# Concat the outputs to create the new input.
prev_layer = tf.concat(outputs, 2)
if i != 0 and residual_connections:
prev_layer += shortcut

states_fw.append(state_fw)
states_bw.append(state_bw)

return prev_layer, tuple(states_fw), tuple(states_bw)


def _create_file_writer_generic_type(logdir,
name="logdir",
max_queue=None,
Expand Down

0 comments on commit 11b2d27

Please sign in to comment.