Skip to content

Commit

Permalink
Merge pull request #1041 from dwf/infer_shape_conv_transp
Browse files Browse the repository at this point in the history
ConvolutionalTranspose: infer original_image_size.
  • Loading branch information
vdumoulin committed Apr 1, 2016
2 parents 6a7ed66 + 9e9220e commit 61dcf8e
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 10 deletions.
67 changes: 59 additions & 8 deletions blocks/bricks/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,6 @@ class ConvolutionalTranspose(Convolutional):
Parameters
----------
original_image_size : tuple
The height and width of the image that forms the output of
the transpose operation, which is the input of the original
(non-transposed) convolution.
num_filters : int
Number of filters at the *output* of the transposed convolution,
i.e. the number of channels in the corresponding convolution.
Expand All @@ -190,19 +186,74 @@ class ConvolutionalTranspose(Convolutional):
Image size of the input to the *transposed* convolution, i.e.
the output of the corresponding convolution. Required for tied
biases. Defaults to ``None``.
unused_edge : tuple, optional
Tuple of pixels added to the inferred height and width of the
output image, whose values would be ignored in the corresponding
forward convolution. Must be such that 0 <= ``unused_edge[i]`` <=
``step[i]``. Note that this parameter is **ignored** if
``original_image_size`` is specified in the constructor or manually
set as an attribute.
original_image_size : tuple, optional
The height and width of the image that forms the output of
the transpose operation, which is the input of the original
(non-transposed) convolution. By default, this is inferred
from `image_size` to be the size that has each pixel of the
original image touched by at least one filter application
in the original convolution. Degenerate cases with dropped
border pixels (in the original convolution) are possible, and can
be manually specified via this argument. See notes below.
See Also
--------
:class:`Convolutional` : For the documentation of other parameters.
Notes
-----
By default, `original_image_size` is inferred from `image_size`
as being the *minimum* size of image that could have produced this
output. Let ``hanging[i] = original_image_size[i] - image_size[i]
* step[i]``. Any value of ``hanging[i]`` greater than
``filter_size[i] - step[i]`` will result in border pixels that are
ignored by the original convolution. With this brick, any
``original_image_size`` such that ``filter_size[i] - step[i] <
hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
However, no value will be output by the transposed convolution
itself for these extra hanging border pixels, and they will be
determined entirely by the bias.
"""
@lazy(allocation=['original_image_size', 'filter_size', 'num_filters',
'num_channels'])
def __init__(self, original_image_size, filter_size, num_filters,
num_channels, **kwargs):
@lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
def __init__(self, filter_size, num_filters, num_channels,
original_image_size=None, unused_edge=(0, 0),
**kwargs):
super(ConvolutionalTranspose, self).__init__(
filter_size, num_filters, num_channels, **kwargs)
self.original_image_size = original_image_size
self.unused_edge = unused_edge

@property
def original_image_size(self):
if self._original_image_size is None:
if all(s is None for s in self.image_size):
raise ValueError("can't infer original_image_size, "
"no image_size set")
if isinstance(self.border_mode, tuple):
border = self.border_mode
elif self.border_mode == 'full':
border = tuple(k - 1 for k in self.filter_size)
elif self.border_mode == 'half':
border = tuple(k // 2 for k in self.filter_size)
else:
border = [0] * len(self.image_size)
tups = zip(self.image_size, self.step, self.filter_size, border,
self.unused_edge)
return tuple(s * (i - 1) + k - 2 * p + u for i, s, k, p, u in tups)
else:
return self._original_image_size

@original_image_size.setter
def original_image_size(self, value):
self._original_image_size = value

def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
filter_shape):
Expand Down
92 changes: 90 additions & 2 deletions tests/bricks/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from nose.tools import assert_raises_regexp

import theano
from numpy.testing import assert_allclose
from numpy.testing import assert_allclose, assert_raises
from theano import tensor
from theano import function

Expand Down Expand Up @@ -48,7 +48,8 @@ def test_convolutional_transpose():
filter_size = (3, 3)
step = (2, 2)
conv = ConvolutionalTranspose(
original_image_size, filter_size, num_filters, num_channels, step=step,
filter_size, num_filters, num_channels, step=step,
original_image_size=original_image_size,
image_size=image_size, weights_init=Constant(1.),
biases_init=Constant(5.))
conv.initialize()
Expand All @@ -65,6 +66,93 @@ def test_convolutional_transpose():
assert_allclose(func(x_val), expected_value + 5)


def test_convolutional_transpose_original_size_inference():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
num_channels=5, step=(3, 2),
image_size=(6, 9))
brick.allocate()
# In x: filter applied 6 times with a step of 3 and filter size of 4
# means 1 dangling pixel, total original image size of 6 * 3 + 1 == 19.
# In y: step of 2, applied 9 times, filter size of 5 means 3
# dangling pixels, so original is 2 * 9 + 3 == 21.
assert brick.original_image_size == (19, 21)
input_ = tensor.tensor4()
dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
result = brick.apply(input_).eval({input_: dummy})
assert result.shape == (4, 10, 19, 21)


def test_convolutional_transpose_original_size_inference_padding():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
num_channels=5, step=(3, 2),
border_mode=(2, 1),
image_size=(6, 9))
brick.allocate()
assert brick.original_image_size == (15, 19)
input_ = tensor.tensor4()
dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
result = brick.apply(input_).eval({input_: dummy})
assert result.shape == (4, 10, 15, 19)


def test_convolutional_transpose_original_size_inference_full_padding():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
num_channels=5, step=(3, 2),
border_mode='full',
image_size=(6, 9))
brick.allocate()
assert brick.original_image_size == (13, 13)
input_ = tensor.tensor4()
dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
result = brick.apply(input_).eval({input_: dummy})
assert result.shape == (4, 10, 13, 13)


def test_convolutional_transpose_original_size_inference_half_padding():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
num_channels=5, step=(3, 2),
border_mode='half',
image_size=(6, 9))
brick.allocate()
assert brick.original_image_size == (15, 17)
input_ = tensor.tensor4()
dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
result = brick.apply(input_).eval({input_: dummy})
assert result.shape == (4, 10, 15, 17)


def test_convolutional_transpose_original_size_inference_unused_edge():
brick = ConvolutionalTranspose(filter_size=(3, 3), num_filters=10,
num_channels=5, step=(2, 2),
border_mode=(1, 1), image_size=(4, 4),
unused_edge=(1, 1))
brick.allocate()
assert brick.original_image_size == (8, 8)
input_ = tensor.tensor4()
dummy = numpy.empty((4, 5, 4, 4), dtype=theano.config.floatX)
result = brick.apply(input_).eval({input_: dummy})
assert result.shape == (4, 10, 8, 8)


def test_convolutional_transpose_original_size_inferred_conv_sequence():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
step=(3, 2))

seq = ConvolutionalSequence([brick], num_channels=5, image_size=(6, 9))
try:
seq.allocate()
except Exception as e:
raise AssertionError('exception raised: {}: {}'.format(
e.__class__.__name__, e))


def test_conv_transpose_exception():
brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
num_channels=5, step=(3, 2),
tied_biases=True)
assert_raises(ValueError, brick.apply, tensor.tensor4())


def test_border_mode_not_pushed():
layers = [Convolutional(border_mode='full'),
Convolutional(),
Expand Down

0 comments on commit 61dcf8e

Please sign in to comment.