Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perform LMUFFT with raw convolution #42

Merged
merged 5 commits into from
Aug 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 18 additions & 4 deletions .nengobones.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ manifest_in: {}

setup_py:
install_req:
- packaging>=20.9
- scipy>=1.0.0
- tensorflow>=2.1.0
tests_req:
- pytest>=6.1.0
- pytest-rng>=1.0.0
docs_req:
- matplotlib>=3.0.2
- matplotlib>=3.0.2,<3.4.3
- jupyter>=1.0.0
- seaborn>=0.9.0
- sphinx>=1.8
Expand Down Expand Up @@ -65,7 +66,7 @@ travis_yml:
TF_VERSION: tensorflow
jobs:
- script: static
- script: test
- script: remote-test
- script: test
env:
TF_VERSION: tensorflow==2.1.0
Expand Down Expand Up @@ -94,6 +95,19 @@ ci_scripts:
coverage: true
pip_install:
- $TF_VERSION
- template: remote-script
remote_script: test
output_name: remote-test
host: azure
travis_var_key: 2895d60e3414
azure_name: nengo-dl
azure_group: nengo-ci
coverage: true
remote_vars:
TF_FORCE_GPU_ALLOW_GROWTH: "true"
TF_VERSION: $TF_VERSION
remote_setup:
- conda install -y -c conda-forge cudatoolkit=11.3 cudnn=8.2
- template: remote-script
remote_script: docs
output_name: remote-docs
Expand All @@ -102,7 +116,7 @@ ci_scripts:
azure_name: nengo-dl-docs
azure_group: nengo-ci
remote_setup:
- conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1
- conda install -y -c conda-forge cudatoolkit=11.3 cudnn=8.2
- template: remote-script
remote_script: examples
output_name: remote-examples
Expand All @@ -111,7 +125,7 @@ ci_scripts:
azure_name: nengo-dl-examples
azure_group: nengo-ci
remote_setup:
- conda install -y -c conda-forge cudatoolkit=11.2 cudnn=8.1
- conda install -y -c conda-forge cudatoolkit=11.3 cudnn=8.2
- template: deploy

codecov_yml: {}
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
SCRIPT="static"
-
env:
SCRIPT="test"
SCRIPT="remote-test"
-
env:
TF_VERSION="tensorflow==2.1.0"
Expand Down
11 changes: 11 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,26 @@ Release history
and ``B`` LMU matrices. This is mainly useful in combination with
``trainable_theta=True``, where setting ``discretizer="euler"`` may improve the
training speed (possibly at the cost of some accuracy). (`#41`_)
- The ``keras_lmu.LMUFFT`` layer can now use raw convolution internally (as opposed to
FFT-based convolution). The new ``conv_mode`` option exposes this. The new
``truncate_ir`` option allows truncating the impulse response when running with a
raw convolution mode, for efficiency. Whether FFT-based or raw convolution is faster
depends on the specific model, hardware, and amount of truncation. (`#42`_)

**Changed**

- The ``A`` and ``B`` matrices are now stored as constants instead of non-trainable
variables. This can improve the training/inference speed, but it means that saved
weights from previous versions will be incompatible. (`#41`_)
- Renamed ``keras_lmu.LMUFFT`` to ``keras_lmu.LMUFeedforward``. (`#42`_)

**Fixed**

- Fixed dropout support in TensorFlow 2.6. (`#42`_)

.. _#40: https://github.com/nengo/keras-lmu/pull/40
.. _#41: https://github.com/nengo/keras-lmu/pull/41
.. _#42: https://github.com/nengo/keras-lmu/pull/42

0.3.1 (November 16, 2020)
=========================
Expand Down
2 changes: 1 addition & 1 deletion keras_lmu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""KerasLMU provides a package for deep learning with Legendre Memory Units."""

from .layers import LMU, LMUFFT, LMUCell
from .layers import LMU, LMUCell, LMUFeedforward
from .version import version as __version__

__copyright__ = "2019-2021, Applied Brain Research"
Expand Down
127 changes: 96 additions & 31 deletions keras_lmu/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

import numpy as np
import tensorflow as tf
from tensorflow.python.keras.layers.recurrent import DropoutRNNCellMixin
from packaging import version

if version.parse(tf.__version__) < version.parse("2.6.0rc0"):
from tensorflow.python.keras.layers.recurrent import DropoutRNNCellMixin
else:
from keras.layers.recurrent import DropoutRNNCellMixin


class LMUCell(DropoutRNNCellMixin, tf.keras.layers.Layer):
Expand Down Expand Up @@ -516,7 +521,7 @@ def theta(self):
if self.built:
return (
self.layer.theta
if isinstance(self.layer, LMUFFT)
if isinstance(self.layer, LMUFeedforward)
else self.layer.cell.theta
)

Expand All @@ -541,7 +546,7 @@ def build(self, input_shapes):
and input_shapes[1] is not None
and not self.trainable_theta
):
self.layer = LMUFFT(
self.layer = LMUFeedforward(
memory_d=self.memory_d,
order=self.order,
theta=self._init_theta,
Expand Down Expand Up @@ -620,15 +625,14 @@ def from_config(cls, config):
return super().from_config(config)


class LMUFFT(tf.keras.layers.Layer):
class LMUFeedforward(tf.keras.layers.Layer):
"""
Layer class for the FFT variant of the LMU.
Layer class for the feedforward variant of the LMU.

This class assumes no recurrent connections are desired in the memory component.

Produces the output of the delay system by evaluating the convolution of the input
sequence with the impulse response from the LMU cell. The convolution operation is
calculated using the fast Fourier transform (FFT).
sequence with the impulse response from the LMU cell.

Parameters
----------
Expand Down Expand Up @@ -665,6 +669,15 @@ class LMUFFT(tf.keras.layers.Layer):
return_sequences : bool, optional
If True, return the full output sequence. Otherwise, return just the last
output in the output sequence.
conv_mode : "fft" or "raw"
The method for performing the inpulse response convolution. "fft" uses FFT
convolution (default). "raw" uses explicit convolution, which may be faster
for particular models on particular hardware.
truncate_ir : float
The portion of the impulse response to truncate when using "raw"
convolution (see ``conv_mode``). This is an approximate upper bound on the error
relative to the exact implementation. Smaller ``theta`` values result in more
truncated elements for a given value of ``truncate_ir``, improving efficiency.
"""

def __init__(
drasmuss marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -678,13 +691,18 @@ def __init__(
kernel_initializer="glorot_uniform",
dropout=0,
return_sequences=False,
conv_mode="fft",
truncate_ir=1e-4,
**kwargs,
):
super().__init__(**kwargs)

if input_to_hidden and hidden_cell is None:
raise ValueError("input_to_hidden must be False if hidden_cell is None")

if conv_mode not in ("fft", "raw"):
raise ValueError(f"Unrecognized conv mode '{conv_mode}'")

self.memory_d = memory_d
self.order = order
self.theta = theta
Expand All @@ -694,6 +712,8 @@ def __init__(
self.kernel_initializer = kernel_initializer
self.dropout = dropout
self.return_sequences = return_sequences
self.conv_mode = conv_mode.lower()
self.truncate_ir = truncate_ir

# create a standard LMUCell to generate the impulse response during `build`
self.delay_layer = tf.keras.layers.RNN(
Expand Down Expand Up @@ -733,19 +753,40 @@ def build(self, input_shape):
# TODO: we could dynamically run the impulse response for longer if
# needed using stateful=True
raise ValueError(
f"LMUFFT requires that the input shape's temporal axis be fully "
f"specified (got {seq_len})"
f"LMUFeedforward requires that the input shape's temporal axis be "
f"fully specified (got {seq_len})"
)

impulse = tf.reshape(tf.eye(seq_len, 1), (1, -1, 1))

self.impulse_response = tf.signal.rfft(
tf.squeeze(
tf.transpose(self.delay_layer(impulse, training=False)), axis=-1
),
fft_length=[2 * seq_len],
self.impulse_response = tf.squeeze(
self.delay_layer(impulse, training=False), axis=0
)

if self.conv_mode == "fft":
self.impulse_response = tf.signal.rfft(
tf.transpose(self.impulse_response),
fft_length=[2 * seq_len],
)
else:
if self.truncate_ir is not None:
assert self.impulse_response.shape == (seq_len, self.order)

cumsum = tf.math.cumsum(
tf.math.abs(self.impulse_response), axis=0, reverse=True
)
cumsum = cumsum / cumsum[0]
to_drop = tf.reduce_all(cumsum < self.truncate_ir, axis=-1)
if to_drop[-1]:
cutoff = tf.where(to_drop)[0, -1]
self.impulse_response = self.impulse_response[:cutoff]

self.impulse_response = tf.reshape(
self.impulse_response,
(self.impulse_response.shape[0], 1, 1, self.order),
)
self.impulse_response = self.impulse_response[::-1, :, :, :]

if self.kernel_initializer is not None:
self.kernel = self.add_weight(
name="kernel",
Expand Down Expand Up @@ -781,8 +822,6 @@ def call(self, inputs, training=None):
if training is None:
training = tf.keras.backend.learning_phase()

seq_len = tf.shape(inputs)[1]

if self.dropout:
inputs = tf.keras.layers.Dropout(
self.dropout, noise_shape=(inputs.shape[0], 1) + inputs.shape[2:]
Expand All @@ -795,21 +834,10 @@ def call(self, inputs, training=None):
else tf.matmul(inputs, self.kernel, name="input_encoder_mult")
)

# FFT requires shape (batch, memory_d, timesteps)
u = tf.transpose(u, perm=[0, 2, 1])

# Pad sequences to avoid circular convolution
# Perform the FFT
fft_input = tf.signal.rfft(u, fft_length=[2 * seq_len], name="input_pad")

# Elementwise product of FFT (with broadcasting)
result = tf.expand_dims(fft_input, axis=-2) * self.impulse_response

# Inverse FFT
m = tf.signal.irfft(result, fft_length=[2 * seq_len])[..., :seq_len]

m = tf.reshape(m, (-1, self.order * self.memory_d, seq_len))
m = tf.transpose(m, perm=[0, 2, 1])
if self.conv_mode == "fft":
m = self._fft_convolution(u)
elif self.conv_mode == "raw":
m = self._raw_convolution(u)

# apply hidden cell
h_in = tf.concat((m, inputs), axis=-1) if self.input_to_hidden else m
Expand All @@ -831,6 +859,41 @@ def call(self, inputs, training=None):

return h

def _fft_convolution(self, u):
seq_len = tf.shape(u)[1]

# FFT requires shape (batch, memory_d, timesteps)
u = tf.transpose(u, perm=[0, 2, 1])

# Pad sequences to avoid circular convolution
# Perform the FFT
fft_input = tf.signal.rfft(u, fft_length=[2 * seq_len])

# Elementwise product of FFT (with broadcasting)
result = tf.expand_dims(fft_input, axis=-2) * self.impulse_response

# Inverse FFT
m = tf.signal.irfft(result, fft_length=[2 * seq_len])[..., :seq_len]

m = tf.reshape(m, (-1, self.order * self.memory_d, seq_len))

return tf.transpose(m, perm=[0, 2, 1])

def _raw_convolution(self, u):
seq_len = tf.shape(u)[1]
ir_len = self.impulse_response.shape[0]

u = tf.expand_dims(u, -1)
m = tf.nn.conv2d(
u,
self.impulse_response,
strides=1,
data_format="NHWC",
padding=[[0, 0], [ir_len - 1, 0], [0, 0], [0, 0]],
)
m = tf.reshape(m, (-1, seq_len, self.memory_d * self.order))
return m

def get_config(self):
"""Return config of layer (for serialization during model saving/loading)."""

Expand All @@ -846,6 +909,8 @@ def get_config(self):
kernel_initializer=self.kernel_initializer,
dropout=self.dropout,
return_sequences=self.return_sequences,
conv_mode=self.conv_mode,
arvoelke marked this conversation as resolved.
Show resolved Hide resolved
truncate_ir=self.truncate_ir,
)
)

Expand Down
18 changes: 18 additions & 0 deletions keras_lmu/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# pylint: disable=missing-docstring

import subprocess
import sys

# check if GPU support is available
# note: we run this in a subprocess because list_physical_devices()
# will fix certain process-level TensorFlow configuration
# options the first time it is called
tf_gpu_installed = not subprocess.call(
[
sys.executable,
"-c",
"import sys; "
"import tensorflow as tf; "
"sys.exit(len(tf.config.list_physical_devices('GPU')) == 0)",
]
)