Skip to content

Commit

Permalink
Merge pull request #1009 from dwf/linear_interface_refactor
Browse files Browse the repository at this point in the history
Refactor common stuff from Linear and Convolutional
  • Loading branch information
dwf committed Mar 1, 2016
2 parents 47285eb + 687c90f commit 3f02718
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 52 deletions.
7 changes: 4 additions & 3 deletions blocks/bricks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from .base import application, Brick, lazy
from .bn import (BatchNormalization, SpatialBatchNormalization,
BatchNormalizedMLP)
from .interfaces import Activation, Feedforward, Initializable, Random
from .interfaces import (Activation, Feedforward, Initializable, LinearLike,
Random)
from .simple import (Linear, Bias, Maxout, LinearMaxout, Identity, Tanh,
Logistic, Softplus, Rectifier, Softmax,
NDimensionalSoftmax)
Expand All @@ -11,8 +12,8 @@

__all__ = ('application', 'Brick', 'lazy', 'BatchNormalization',
'SpatialBatchNormalization', 'BatchNormalizedMLP',
'Activation', 'Feedforward', 'Initializable', 'Random',
'Linear', 'Bias', 'Maxout', 'LinearMaxout', 'Identity',
'Activation', 'Feedforward', 'Initializable', 'LinearLike',
'Random', 'Linear', 'Bias', 'Maxout', 'LinearMaxout', 'Identity',
'Tanh', 'Logistic', 'Softplus', 'Rectifier', 'Softmax',
'NDimensionalSoftmax', 'Sequence', 'FeedforwardSequence',
'MLP', 'WithExtraDims')
24 changes: 6 additions & 18 deletions blocks/bricks/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
get_conv_output_shape)
from theano.tensor.signal.pool import pool_2d, Pool

from blocks.bricks import Initializable, Feedforward, Sequence, Activation
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
LinearLike)
from blocks.bricks.base import application, Brick, lazy
from blocks.roles import add_role, FILTER, BIAS
from blocks.utils import shared_floatx_nans


class Convolutional(Initializable):
class Convolutional(LinearLike):
"""Performs a 2D convolution.
Parameters
Expand Down Expand Up @@ -106,14 +107,6 @@ def _allocate(self):
self.parameters.append(b)
self.add_auxiliary_variable(b.norm(2), name='b_norm')

def _initialize(self):
if self.use_bias:
W, b = self.parameters
self.biases_init.initialize(b, self.rng)
else:
W, = self.parameters
self.weights_init.initialize(W, self.rng)

@application(inputs=['input_'], outputs=['output'])
def apply(self, input_):
"""Perform the convolution.
Expand All @@ -136,29 +129,24 @@ def apply(self, input_):
for 'full' it is ``image_size + filter_size - 1``.
"""
if self.use_bias:
W, b = self.parameters
else:
W, = self.parameters

if self.image_size == (None, None):
input_shape = None
else:
input_shape = (self.batch_size, self.num_channels)
input_shape += self.image_size

output = self.conv2d_impl(
input_, W,
input_, self.W,
input_shape=input_shape,
subsample=self.step,
border_mode=self.border_mode,
filter_shape=((self.num_filters, self.num_channels) +
self.filter_size))
if self.use_bias:
if self.tied_biases:
output += b.dimshuffle('x', 0, 'x', 'x')
output += self.b.dimshuffle('x', 0, 'x', 'x')
else:
output += b.dimshuffle('x', 0, 1, 2)
output += self.b.dimshuffle('x', 0, 1, 2)
return output

def get_dim(self, name):
Expand Down
33 changes: 33 additions & 0 deletions blocks/bricks/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,39 @@ def _push_initialization_config(self):
child.biases_init = self.biases_init


class LinearLike(Initializable):
"""Initializable subclass with logic for :class:`Linear`-like classes.
Notes
-----
Provides `W` and `b` properties that can be overridden in subclasses
to implement pre-application transformations on the weights and
biases. Application methods should refer to ``self.W`` and ``self.b``
rather than accessing the parameters list directly.
This assumes a layout of the parameters list with the weights coming
first and biases (if ``use_bias`` is True) coming second.
"""
@property
def W(self):
return self.parameters[0]

@property
def b(self):
if self.use_bias:
return self.parameters[1]
else:
raise AttributeError('use_bias is False')

def _initialize(self):
# Use self.parameters[] references in case W and b are overridden
# to return non-shared-variables.
if self.use_bias:
self.biases_init.initialize(self.parameters[1], self.rng)
self.weights_init.initialize(self.parameters[0], self.rng)


class Random(Brick):
"""A mixin class for Bricks which need Theano RNGs.
Expand Down
17 changes: 13 additions & 4 deletions blocks/bricks/sequences.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Bricks that compose together other bricks in linear sequences."""
import copy
from toolz import interleave
from picklable_itertools.extras import equizip

Expand Down Expand Up @@ -86,6 +87,10 @@ class MLP(Sequence, Initializable, Feedforward):
dims : list of ints
A list of input dimensions, as well as the output dimension of the
last layer. Required for :meth:`~.Brick.allocate`.
prototype : :class:`.Brick`, optional
The transformation prototype. A copy will be created for every
activation. If not provided, an instance of :class:`~simple.Linear`
will be used.
Notes
-----
Expand All @@ -107,11 +112,15 @@ class MLP(Sequence, Initializable, Feedforward):
"""
@lazy(allocation=['dims'])
def __init__(self, activations, dims, **kwargs):
def __init__(self, activations, dims, prototype=None, **kwargs):
self.activations = activations

self.linear_transformations = [Linear(name='linear_{}'.format(i))
for i in range(len(activations))]
self.prototype = Linear() if prototype is None else prototype
self.linear_transformations = []
for i in range(len(activations)):
linear = copy.deepcopy(self.prototype)
name = self.prototype.__class__.__name__.lower()
linear.name = '{}_{}'.format(name, i)
self.linear_transformations.append(linear)
# Interleave the transformations and activations
application_methods = []
for entity in interleave([self.linear_transformations, activations]):
Expand Down
28 changes: 4 additions & 24 deletions blocks/bricks/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from blocks.bricks.base import application, Brick, lazy
from blocks.bricks.interfaces import Activation, Feedforward, Initializable
from blocks.bricks.interfaces import Random # noqa
from blocks.bricks.interfaces import LinearLike, Random # noqa

from blocks.bricks.wrappers import WithExtraDims
from blocks.roles import add_role, WEIGHT, BIAS
Expand All @@ -14,7 +14,7 @@
logger = logging.getLogger(__name__)


class Linear(Initializable, Feedforward):
class Linear(LinearLike, Feedforward):
r"""A linear transformation with optional bias.
Brick which applies a linear (affine) transformation by multiplying
Expand Down Expand Up @@ -44,14 +44,6 @@ def __init__(self, input_dim, output_dim, **kwargs):
self.input_dim = input_dim
self.output_dim = output_dim

@property
def W(self):
return self.parameters[0]

@property
def b(self):
return self.parameters[1]

def _allocate(self):
W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
add_role(W, WEIGHT)
Expand All @@ -63,14 +55,6 @@ def _allocate(self):
self.parameters.append(b)
self.add_auxiliary_variable(b.norm(2), name='b_norm')

def _initialize(self):
if self.use_bias:
W, b = self.parameters
self.biases_init.initialize(b, self.rng)
else:
W, = self.parameters
self.weights_init.initialize(W, self.rng)

@application(inputs=['input_'], outputs=['output'])
def apply(self, input_):
"""Apply the linear transformation.
Expand All @@ -86,13 +70,9 @@ def apply(self, input_):
The transformed input plus optional bias
"""
output = tensor.dot(input_, self.W)
if self.use_bias:
W, b = self.parameters
else:
W, = self.parameters
output = tensor.dot(input_, W)
if self.use_bias:
output += b
output += self.b
return output

def get_dim(self, name):
Expand Down
6 changes: 3 additions & 3 deletions blocks/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def add_to_dump(object_, file_, name, parameters=None, use_cpickle=False,
"""
if name in ['_pkl', '_parameters']:
raise ValueError("_pkl and _parameters are reserved names and can't" \
raise ValueError("_pkl and _parameters are reserved names and can't"
" be used as name for your object.")

external_parameters = {}
Expand All @@ -337,7 +337,7 @@ def add_to_dump(object_, file_, name, parameters=None, use_cpickle=False,
file_.seek(0) # To be able to read what is in the tar file already.
with closing(tarfile.TarFile(fileobj=file_, mode='r')) as tar_file:
if '_parameters' not in tar_file.getnames():
raise ValueError("There is no parameters in the archive, so" \
raise ValueError("There is no parameters in the archive, so"
" you can't use the argument parameters.")
else:
parameters = numpy.load(
Expand All @@ -346,7 +346,7 @@ def add_to_dump(object_, file_, name, parameters=None, use_cpickle=False,
s2 = [_unmangle_parameter_name(x)[1] for x in
external_parameters.values()]
if not s1.issuperset(s2):
raise ValueError('The set of parameters is different' \
raise ValueError('The set of parameters is different'
' from the one in the archive.')

if use_cpickle:
Expand Down
10 changes: 10 additions & 0 deletions tests/bricks/test_bricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,16 @@ def test_mlp():
assert mlp.rng == mlp.linear_transformations[0].rng


def test_mlp_prototype_argument():
class MyLinear(Linear):
pass
mlp = MLP(activations=[Tanh(), Tanh(), None],
dims=[4, 5, 6, 7], prototype=MyLinear())
assert all(isinstance(lt, MyLinear) for lt in mlp.linear_transformations)
assert all(lt.name == 'mylinear_{}'.format(i)
for i, lt in enumerate(mlp.linear_transformations))


def test_mlp_apply():
x = tensor.matrix()
x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
Expand Down
20 changes: 20 additions & 0 deletions tests/bricks/test_interfaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy
import theano
from theano import tensor
from blocks.bricks import Linear
from blocks.initialization import Constant, IsotropicGaussian


def test_linearlike_subclass_initialize_works_overridden_w():
class NotQuiteLinear(Linear):
@property
def W(self):
W = super(NotQuiteLinear, self).W
return W / tensor.sqrt((W ** 2).sum(axis=0))

brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02),
biases_init=Constant(1))
brick.initialize()
assert not numpy.isnan(brick.parameters[0].get_value()).any()
numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1,
rtol=1e-6)

0 comments on commit 3f02718

Please sign in to comment.