Merge pull request #1041 from dwf/infer_shape_conv_transp

ConvolutionalTranspose: infer original_image_size.
mila-iqia · Apr 1, 2016 · 61dcf8e · 61dcf8e
2 parents 6a7ed66 + 9e9220e
commit 61dcf8e
Show file tree

Hide file tree

Showing 2 changed files with 149 additions and 10 deletions.
diff --git a/blocks/bricks/conv.py b/blocks/bricks/conv.py
@@ -172,10 +172,6 @@ class ConvolutionalTranspose(Convolutional):
 
     Parameters
     ----------
-    original_image_size : tuple
-        The height and width of the image that forms the output of
-        the transpose operation, which is the input of the original
-        (non-transposed) convolution.
     num_filters : int
         Number of filters at the *output* of the transposed convolution,
         i.e. the number of channels in the corresponding convolution.
@@ -190,19 +186,74 @@ class ConvolutionalTranspose(Convolutional):
         Image size of the input to the *transposed* convolution, i.e.
         the output of the corresponding convolution. Required for tied
         biases. Defaults to ``None``.
+    unused_edge : tuple, optional
+        Tuple of pixels added to the inferred height and width of the
+        output image, whose values would be ignored in the corresponding
+        forward convolution. Must be such that 0 <= ``unused_edge[i]`` <=
+        ``step[i]``. Note that this parameter is **ignored** if
+        ``original_image_size`` is specified in the constructor or manually
+        set as an attribute.
+    original_image_size : tuple, optional
+        The height and width of the image that forms the output of
+        the transpose operation, which is the input of the original
+        (non-transposed) convolution. By default, this is inferred
+        from `image_size` to be the size that has each pixel of the
+        original image touched by at least one filter application
+        in the original convolution. Degenerate cases with dropped
+        border pixels (in the original convolution) are possible, and can
+        be manually specified via this argument. See notes below.
 
     See Also
     --------
     :class:`Convolutional` : For the documentation of other parameters.
 
+    Notes
+    -----
+    By default, `original_image_size` is inferred from `image_size`
+    as being the *minimum* size of image that could have produced this
+    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
+    * step[i]``. Any value of ``hanging[i]`` greater than
+    ``filter_size[i] - step[i]`` will result in border pixels that are
+    ignored by the original convolution. With this brick, any
+    ``original_image_size`` such that ``filter_size[i] - step[i] <
+    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
+    However, no value will be output by the transposed convolution
+    itself for these extra hanging border pixels, and they will be
+    determined entirely by the bias.
+
     """
-    @lazy(allocation=['original_image_size', 'filter_size', 'num_filters',
-                      'num_channels'])
-    def __init__(self, original_image_size, filter_size, num_filters,
-                 num_channels, **kwargs):
+    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
+    def __init__(self, filter_size, num_filters, num_channels,
+                 original_image_size=None, unused_edge=(0, 0),
+                 **kwargs):
         super(ConvolutionalTranspose, self).__init__(
             filter_size, num_filters, num_channels, **kwargs)
         self.original_image_size = original_image_size
+        self.unused_edge = unused_edge
+
+    @property
+    def original_image_size(self):
+        if self._original_image_size is None:
+            if all(s is None for s in self.image_size):
+                raise ValueError("can't infer original_image_size, "
+                                 "no image_size set")
+            if isinstance(self.border_mode, tuple):
+                border = self.border_mode
+            elif self.border_mode == 'full':
+                border = tuple(k - 1 for k in self.filter_size)
+            elif self.border_mode == 'half':
+                border = tuple(k // 2 for k in self.filter_size)
+            else:
+                border = [0] * len(self.image_size)
+            tups = zip(self.image_size, self.step, self.filter_size, border,
+                       self.unused_edge)
+            return tuple(s * (i - 1) + k - 2 * p + u for i, s, k, p, u in tups)
+        else:
+            return self._original_image_size
+
+    @original_image_size.setter
+    def original_image_size(self, value):
+        self._original_image_size = value
 
     def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
                     filter_shape):

diff --git a/tests/bricks/test_conv.py b/tests/bricks/test_conv.py
@@ -3,7 +3,7 @@
 from nose.tools import assert_raises_regexp
 
 import theano
-from numpy.testing import assert_allclose
+from numpy.testing import assert_allclose, assert_raises
 from theano import tensor
 from theano import function
 
@@ -48,7 +48,8 @@ def test_convolutional_transpose():
     filter_size = (3, 3)
     step = (2, 2)
     conv = ConvolutionalTranspose(
-        original_image_size, filter_size, num_filters, num_channels, step=step,
+        filter_size, num_filters, num_channels, step=step,
+        original_image_size=original_image_size,
         image_size=image_size, weights_init=Constant(1.),
         biases_init=Constant(5.))
     conv.initialize()
@@ -65,6 +66,93 @@ def test_convolutional_transpose():
     assert_allclose(func(x_val), expected_value + 5)
 
 
+def test_convolutional_transpose_original_size_inference():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   num_channels=5, step=(3, 2),
+                                   image_size=(6, 9))
+    brick.allocate()
+    # In x: filter applied 6 times with a step of 3 and filter size of 4
+    # means 1 dangling pixel, total original image size of 6 * 3 + 1 == 19.
+    # In y: step of 2, applied 9 times, filter size of 5 means 3
+    # dangling pixels, so original is 2 * 9 + 3 == 21.
+    assert brick.original_image_size == (19, 21)
+    input_ = tensor.tensor4()
+    dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
+    result = brick.apply(input_).eval({input_: dummy})
+    assert result.shape == (4, 10, 19, 21)
+
+
+def test_convolutional_transpose_original_size_inference_padding():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   num_channels=5, step=(3, 2),
+                                   border_mode=(2, 1),
+                                   image_size=(6, 9))
+    brick.allocate()
+    assert brick.original_image_size == (15, 19)
+    input_ = tensor.tensor4()
+    dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
+    result = brick.apply(input_).eval({input_: dummy})
+    assert result.shape == (4, 10, 15, 19)
+
+
+def test_convolutional_transpose_original_size_inference_full_padding():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   num_channels=5, step=(3, 2),
+                                   border_mode='full',
+                                   image_size=(6, 9))
+    brick.allocate()
+    assert brick.original_image_size == (13, 13)
+    input_ = tensor.tensor4()
+    dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
+    result = brick.apply(input_).eval({input_: dummy})
+    assert result.shape == (4, 10, 13, 13)
+
+
+def test_convolutional_transpose_original_size_inference_half_padding():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   num_channels=5, step=(3, 2),
+                                   border_mode='half',
+                                   image_size=(6, 9))
+    brick.allocate()
+    assert brick.original_image_size == (15, 17)
+    input_ = tensor.tensor4()
+    dummy = numpy.empty((4, 5, 6, 9), dtype=theano.config.floatX)
+    result = brick.apply(input_).eval({input_: dummy})
+    assert result.shape == (4, 10, 15, 17)
+
+
+def test_convolutional_transpose_original_size_inference_unused_edge():
+    brick = ConvolutionalTranspose(filter_size=(3, 3), num_filters=10,
+                                   num_channels=5, step=(2, 2),
+                                   border_mode=(1, 1), image_size=(4, 4),
+                                   unused_edge=(1, 1))
+    brick.allocate()
+    assert brick.original_image_size == (8, 8)
+    input_ = tensor.tensor4()
+    dummy = numpy.empty((4, 5, 4, 4), dtype=theano.config.floatX)
+    result = brick.apply(input_).eval({input_: dummy})
+    assert result.shape == (4, 10, 8, 8)
+
+
+def test_convolutional_transpose_original_size_inferred_conv_sequence():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   step=(3, 2))
+
+    seq = ConvolutionalSequence([brick], num_channels=5, image_size=(6, 9))
+    try:
+        seq.allocate()
+    except Exception as e:
+        raise AssertionError('exception raised: {}: {}'.format(
+            e.__class__.__name__, e))
+
+
+def test_conv_transpose_exception():
+    brick = ConvolutionalTranspose(filter_size=(4, 5), num_filters=10,
+                                   num_channels=5, step=(3, 2),
+                                   tied_biases=True)
+    assert_raises(ValueError, brick.apply, tensor.tensor4())
+
+
 def test_border_mode_not_pushed():
     layers = [Convolutional(border_mode='full'),
               Convolutional(),