Merge pull request #1106 from dmitriy-serdyuk/test-documentation

Test documentation
mila-iqia · Jun 5, 2016 · 3a7dde7 · 3a7dde7
2 parents 9d32e57 + 96cbf06
commit 3a7dde7
Show file tree

Hide file tree

Showing 21 changed files with 158 additions and 107 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -21,6 +21,10 @@ matrix:
       env: TESTS=blocks-examples FLOATX=float32 DB=sqlite
     - python: 3.4
       env: TESTS=blocks-examples FLOATX=float64
+    - python: 2.7
+      env: TESTS=documentation FLOATX=float32
+    - python: 3.4
+      env: TESTS=documentation FLOATX=float64
 before_install:
   - # Setup Python environment with BLAS libraries
   - wget -q http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
@@ -32,7 +36,15 @@ before_install:
 install:
   # Install all Python dependencies
   - conda install -q --yes python=$TRAVIS_PYTHON_VERSION --file req-travis-conda.txt
+  - |
+      if [[ $TESTS == 'documentation' ]]; then
+        conda install -q --yes python=$TRAVIS_PYTHON_VERSION graphviz==2.38.0
+      fi
   - pip install -q -r req-travis-pip.txt
+  - |
+      if [[ $TESTS == 'documentation' ]]; then
+        pip install -q -r req-travis-docs-pip.txt
+      fi
 script:
   - pip install -e . -r requirements.txt # Tests setup.py
   - curl https://raw.githubusercontent.com/mila-udem/fuel/master/.travis-data.sh | bash -s -- mnist
@@ -56,6 +68,10 @@ script:
         cd blocks-examples
         nose2 tests
       fi
+  - |
+      if [[ $TESTS == 'documentation' ]]; then
+        sphinx-build -vW docs html
+      fi
 after_script:
   - |
       if [[ $TESTS == 'blocks' ]]; then

diff --git a/blocks/algorithms/__init__.py b/blocks/algorithms/__init__.py
@@ -798,9 +798,9 @@ class AdaGrad(StepRule):
     -----
     For more information, see [ADAGRAD]_.
 
-    .. [ADADGRAD] Duchi J, Hazan E, Singer Y.,
+    .. [ADAGRAD] Duchi J, Hazan E, Singer Y.,
        *Adaptive subgradient methods for online learning and
-        stochastic optimization*,
+       stochastic optimization*,
        http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf
 
     """

diff --git a/blocks/bricks/attention.py b/blocks/bricks/attention.py
@@ -288,7 +288,7 @@ class SequenceContentAttention(GenericSequenceAttention, Initializable):
         The dimension of the sequence elements.
     match_dim : int
         The dimension of the match vector.
-    state_transformer : :class:`.Brick`
+    state_transformer : :class:`~.bricks.Brick`
         A prototype for state transformations. If ``None``,
         a linear transformation is used.
     attended_transformer : :class:`.Feedforward`
@@ -503,9 +503,9 @@ class AttentionRecurrent(AbstractAttentionRecurrent, Initializable):
     ----------
     transition : :class:`.BaseRecurrent`
         The recurrent transition.
-    attention : :class:`.Brick`
+    attention : :class:`~.bricks.Brick`
         The attention mechanism.
-    distribute : :class:`.Brick`, optional
+    distribute : :class:`~.bricks.Brick`, optional
         Distributes the information from glimpses across the input
         sequences of the transition. By default a :class:`.Distribute` is
         used, and those inputs containing the "mask" substring in their

diff --git a/blocks/bricks/base.py b/blocks/bricks/base.py
@@ -778,7 +778,7 @@ def get_unique_path(self):
     def get_hierarchical_name(self, parameter, delimiter=BRICK_DELIMITER):
         """Return hierarhical name for a parameter.
 
-        Returns a path of the form _brick1/brick2/brick3.parameter1_. The
+        Returns a path of the form ``brick1/brick2/brick3.parameter1``. The
         delimiter is configurable.
 
         Parameters

diff --git a/blocks/bricks/bn.py b/blocks/bricks/bn.py
@@ -410,7 +410,8 @@ def _nested_brick_property_setter(self, value, property_name):
     conserve_memory = property(partial(_nested_brick_property_getter,
                                        property_name='conserve_memory'),
                                partial(_nested_brick_property_setter,
-                                       property_name='conserve_memory'))
+                                       property_name='conserve_memory'),
+                               doc="Conserve memory.")
 
     def _push_allocation_config(self):
         super(BatchNormalizedMLP, self)._push_allocation_config()

diff --git a/blocks/bricks/conv.py b/blocks/bricks/conv.py
@@ -386,7 +386,7 @@ class MaxPooling(Pooling):
         input is captured by at least one pool by using the `padding`
         argument to add zero padding prior to pooling being performed.
 
-    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
+    .. [cuDNN] `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
 
     """
     @lazy(allocation=['pooling_size'])
@@ -476,7 +476,7 @@ class ConvolutionalSequence(Sequence, Initializable, Feedforward):
     is, without specifying the batch_size, num_channels and image_size. The
     main feature of :class:`ConvolutionalSequence` is that it will set the
     input dimensions of a layer to the output dimensions of the previous
-    layer by the :meth:`~.Brick.push_allocation_config` method.
+    layer by the :meth:`~bricks.Brick.push_allocation_config` method.
 
     The push behaviour of `tied_biases` mirrors that of `use_bias` or any
     initialization configuration: only an explicitly specified value is

diff --git a/blocks/bricks/recurrent/architectures.py b/blocks/bricks/recurrent/architectures.py
@@ -19,7 +19,7 @@ class SimpleRecurrent(BaseRecurrent, Initializable):
     ----------
     dim : int
         The dimension of the hidden state
-    activation : :class:`.Brick`
+    activation : :class:`~.bricks.Brick`
         The brick to apply as activation.
 
     Notes
@@ -114,10 +114,10 @@ class LSTM(BaseRecurrent, Initializable):
     ----------
     dim : int
         The dimension of the hidden state.
-    activation : :class:`.Brick`, optional
+    activation : :class:`~.bricks.Brick`, optional
         The activation function. The default and by far the most popular
         is :class:`.Tanh`.
-    gate_activation : :class:`.Brick` or None
+    gate_activation : :class:`~.bricks.Brick` or None
         The brick to apply as activation for gates (input/output/forget).
         If ``None`` a :class:`.Logistic` brick is used.
 
@@ -256,10 +256,10 @@ class GatedRecurrent(BaseRecurrent, Initializable):
     ----------
     dim : int
         The dimension of the hidden state.
-    activation : :class:`.Brick` or None
+    activation : :class:`~.bricks.Brick` or None
         The brick to apply as activation. If ``None`` a
         :class:`.Tanh` brick is used.
-    gate_activation : :class:`.Brick` or None
+    gate_activation : :class:`~.bricks.Brick` or None
         The brick to apply as activation for gates. If ``None`` a
         :class:`.Logistic` brick is used.
 
@@ -268,9 +268,9 @@ class GatedRecurrent(BaseRecurrent, Initializable):
     See :class:`.Initializable` for initialization parameters.
 
     .. [CvMG14] Kyunghyun Cho, Bart van Merriënboer, Çağlar Gülçehre,
-        Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
-        Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
-        for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.
+       Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
+       Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
+       for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.
 
     """
     @lazy(allocation=['dim'])

diff --git a/blocks/bricks/recurrent/misc.py b/blocks/bricks/recurrent/misc.py
@@ -359,17 +359,17 @@ def low_memory_apply(self, *args, **kwargs):
 
     @application
     def apply(self, *args, **kwargs):
-        """Apply the stack of transitions.
+        r"""Apply the stack of transitions.
 
         Parameters
         ----------
         low_memory : bool
             Use the slow, but also memory efficient, implementation of
             this code.
-        *args : :class:`~tensor.TensorVariable`, optional
+        \*args : :class:`~tensor.TensorVariable`, optional
             Positional argumentes in the order in which they appear in
             `self.apply.sequences` followed by `self.apply.contexts`.
-        **kwargs : :class:`~tensor.TensorVariable`
+        \*\*kwargs : :class:`~tensor.TensorVariable`
             Named argument defined in `self.apply.sequences`,
             `self.apply.states` or `self.apply.contexts`
 

diff --git a/blocks/bricks/sequence_generators.py b/blocks/bricks/sequence_generators.py
@@ -137,7 +137,7 @@ class BaseSequenceGenerator(Initializable):
         The readout component of the sequence generator.
     transition : instance of :class:`AbstractAttentionRecurrent`
         The transition component of the sequence generator.
-    fork : :class:`.Brick`
+    fork : :class:`~.bricks.Brick`
         The brick to compute the transition's inputs from the feedback.
 
     See Also
@@ -479,7 +479,7 @@ class Readout(AbstractReadout):
         The emitter component.
     feedback_brick : an instance of :class:`AbstractFeedback`
         The feedback component.
-    merge : :class:`.Brick`, optional
+    merge : :class:`~.bricks.Brick`, optional
         A brick that takes the sources given in `source_names` as an input
         and combines them into a single output. If given, `merge_prototype`
         cannot be given.

diff --git a/blocks/filter.py b/blocks/filter.py
@@ -44,8 +44,8 @@ class VariableFilter(object):
     ----------
     roles : list of :class:`.VariableRole` instances, optional
         Matches any variable which has one of the roles given.
-    bricks : list of :class:`.Brick` classes or list of instances of
-             :class:`.Brick`, optional
+    bricks : list of :class:`~.bricks.Brick` classes or list of
+             instances of :class:`~.bricks.Brick`, optional
         Matches any variable that is instance of any of the given classes
         or that is owned by any of the given brick instances.
     each_role : bool, optional

diff --git a/blocks/initialization.py b/blocks/initialization.py
@@ -163,13 +163,12 @@ class Orthogonal(NdarrayInitialization):
     scale : float, optional
         Multiply the resulting matrix with a scalar. Defaults to 1.
         For a discussion of the importance of scale for training time
-        and generalization refer to  [Saxe2013]_.
+        and generalization refer to [Saxe2013]_.
 
-    ..
-        [Saxe2014] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013.
-            Exact solutions to the nonlinear dynamics of learning in deep
-            linear neural networks.
-            arXiv:1312.6120 [cond-mat, q-bio, stat].
+        .. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013.,
+           *Exact solutions to the nonlinear dynamics of learning in deep
+           linear neural networks*,
+           arXiv:1312.6120 [cond-mat, q-bio, stat].
 
     """
     def __init__(self, scale=1):

diff --git a/blocks/roles.py b/blocks/roles.py
@@ -75,14 +75,14 @@ def __repr__(self):
 class InputRole(VariableRole):
     pass
 
-#: The input of a :class:`.Brick`
+#: The input of a :class:`~.bricks.Brick`
 INPUT = InputRole()
 
 
 class OutputRole(VariableRole):
     pass
 
-#: The output of a :class:`.Brick`
+#: The output of a :class:`~.bricks.Brick`
 OUTPUT = OutputRole()
 
 

diff --git a/blocks/select.py b/blocks/select.py
@@ -105,7 +105,7 @@ class Selector(object):
 
     Parameters
     ----------
-    bricks : list of :class:`.Brick`
+    bricks : list of :class:`~.bricks.Brick`
         The bricks of the selection.
 
     """

diff --git a/blocks/serialization.py b/blocks/serialization.py
@@ -259,7 +259,7 @@ def load(file_, name='_pkl', use_cpickle=False, **kwargs):
 
     Returns
     -------
-    The object saved in file_.
+    The object saved in ``file_``.
 
     """
     file_.seek(0)  # To be able to read several objects in one file

diff --git a/docs/api/bricks.rst b/docs/api/bricks.rst
@@ -10,7 +10,9 @@ Bricks
 
 .. automodule:: blocks.bricks
     :members:
-    :exclude-members: Activation, ActivationDocumentation
+    :exclude-members: Activation, ActivationDocumentation, BaseRecurrent,
+                      recurrent, SimpleRecurrent, LSTM, GatedRecurrent,
+                      Bidirectional, RecurrentStack, RECURRENTSTACK_SEPARATOR
     :undoc-members:
     :show-inheritance:
 

diff --git a/docs/bricks_overview.rst b/docs/bricks_overview.rst
@@ -26,8 +26,9 @@ The life-cycle of a brick is as follows:
    Theano variable is created in this phase.
 
 2. **Allocation:** (optional) allocate the Theano shared variables for the
-   *parameters* of the Brick. When :meth:`.Brick.allocate` is called, the
-   required Theano variables are allocated and initialized by default to ``NaN``.
+   *parameters* of the Brick. When :meth:`~.bricks.Brick.allocate` is
+   called, the required Theano variables are allocated and initialized by
+   default to ``NaN``.
 
 3. **Application:** instantiate a part of the Theano computational graph,
    linking the inputs and the outputs of the brick through its *parameters*
@@ -41,7 +42,7 @@ The life-cycle of a brick is as follows:
 .. note::
    If the Theano variables of the brick object have not been allocated when 
    :meth:`~.Application.apply` is called, Blocks will quietly call 
-   :meth:`.Brick.allocate`.
+   :meth:`~.bricks.Brick.allocate`.
 
 Example
 ^^^^^^^
@@ -82,8 +83,8 @@ of a brick we refer to this as *allocation*.
     array([ nan,  nan,  nan,  nan,  nan])
 
 By default, all our parameters are set to ``NaN``. To initialize them, simply
-call the :meth:`.Brick.initialize` method. This is the last step in the
-brick lifecycle: *initialization*.
+call the :meth:`~.bricks.Brick.initialize` method. This is the last
+step in the brick lifecycle: *initialization*.
 
     >>> linear.initialize()
     >>> linear.parameters[1].get_value() # doctest: +SKIP
@@ -166,11 +167,11 @@ sensible defaults for a particular use case.
     >>> mlp.children[0].input_dim
     16
 
-We can see that the :class:`.MLP` brick automatically constructed two child
-bricks to perform the linear transformations. When we applied the MLP to
-``x``, it automatically configured the input and output dimensions of its
-children. Likewise, when we call :meth:`.Brick.initialize`, it
-automatically pushed the weight matrix and biases initialization
+We can see that the :class:`~.bricks.MLP` brick automatically constructed
+two child bricks to perform the linear transformations. When we applied the MLP
+to ``x``, it automatically configured the input and output dimensions of its
+children. Likewise, when we call :meth:`~.bricks.Brick.initialize`,
+it automatically pushed the weight matrix and biases initialization
 configuration to its children.
 
     >>> mlp.initialize()
@@ -192,9 +193,11 @@ brick:
 
 When dealing with children, the life cycle actually becomes a bit more
 complicated. (The full life cycle is documented as part of the
-:class:`.Brick` class.) Before allocating or initializing parameters, the
-parent brick calls its :meth:`.Brick.push_allocation_config` and
-:meth:`.Brick.push_initialization_config` methods, which configure the
+:class:`~.bricks.Brick` class.) Before allocating or initializing
+parameters, the parent brick calls its
+:meth:`~.bricks.Brick.push_allocation_config` and
+:meth:`~.bricks.Brick.push_initialization_config`
+methods, which configure the
 children. If you want to override the child configuration, you will need to
 call these methods manually, after which you can override the child bricks'
 configuration.

diff --git a/docs/conf.py b/docs/conf.py
@@ -15,6 +15,8 @@
 import inspect
 import sys
 import os
+import sphinx.environment
+from docutils.utils import get_source_line
 from mock import Mock as MagicMock
 from sphinx.ext.autodoc import cut_lines
 
@@ -351,3 +353,11 @@ def linkcode_resolve(domain, info):
     github = "https://github.com/mila-udem/blocks/blob/master/blocks/{}{}"
     return github.format(fn, linespec)
 
+
+# Suppress "nonlocal image URI" warnings
+# http://stackoverflow.com/questions/12772927
+def _warn_node(self, msg, node, **kwargs):
+    if not msg.startswith('nonlocal image URI found:'):
+        self._warnfunc(msg, '%s:%s' % get_source_line(node), **kwargs)
+
+sphinx.environment.BuildEnvironment.warn_node = _warn_node