Fix docs links to PyTorch documentation (#856)

pyro-ppl · Mar 7, 2018 · bc59c00 · bc59c00
1 parent 5981247
commit bc59c00
Show file tree

Hide file tree

Showing 11 changed files with 75 additions and 65 deletions.
diff --git a/docs/source/distributions.rst b/docs/source/distributions.rst
@@ -11,9 +11,9 @@ PyTorch Distributions
 
 Most distributions in Pyro are thin wrappers around PyTorch distributions.
 For details on the PyTorch distribution interface, see
-:class:`torch.distributions.Distribution`.
+:class:`torch.distributions.distribution.Distribution`.
 For differences between the Pyro and PyTorch interfaces, see
-:class:`pyro.distributions.torch_distribution.TorchDistributionMixin`.
+:class:`~pyro.distributions.torch_distribution.TorchDistributionMixin`.
 
 .. automodule:: pyro.distributions.torch
     :members:

diff --git a/pyro/__init__.py b/pyro/__init__.py
@@ -170,18 +170,18 @@ def iarange(name, size=None, subsample_size=None, subsample=None, use_cuda=None)
     """
     Context manager for conditionally independent ranges of variables.
 
-    ``iarange`` is similar to ``torch.arange`` in that it yields an array
-    of indices by which other tensors can be indexed. ``iarange`` differs from
-    ``torch.arange`` in that it also informs inference algorithms that the
-    variables being indexed are conditionally independent. To do this,
-    ``iarange`` is a provided as context manager rather than a function, and
-    users must guarantee that all computation within an ``iarange`` context
-    is conditionally independent::
+    :func:`iarange` is similar to :func:`torch.arange` in that it yields an
+    array of indices by which other tensors can be indexed. :func:`iarange`
+    differs from :func:`torch.arange` in that it also informs inference
+    algorithms that the variables being indexed are conditionally independent.
+    To do this, :func:`iarange` is a provided as context manager rather than a
+    function, and users must guarantee that all computation within an
+    :func:`iarange` context is conditionally independent::
 
         with iarange("name", size) as ind:
             # ...do conditionally independent stuff with ind...
 
-    Additionally, ``iarange`` can take advantage of the conditional
+    Additionally, :func:`iarange` can take advantage of the conditional
     independence assumptions by subsampling the indices and informing inference
     algorithms to scale various computed values. This is typically used to
     subsample minibatches of data::

diff --git a/pyro/contrib/gp/__init__.py b/pyro/contrib/gp/__init__.py
@@ -13,11 +13,10 @@ class Parameterized(nn.Module):
     Parameterized class.
 
     This is a base class for other classes in Gaussian Process.
-    By default, a parameter will be a ``torch.nn.Parameter`` containing ``torch.FloatTensor``.
+    By default, a parameter will be a :class:`torch.nn.Parameter` containing :class:`torch.FloatTensor`.
     To cast them to the correct data type or GPU device, we can call methods such as
     ``.double()``, ``.cuda(device=0)``,...
-    See `torch.nn.Module
-    <http://pytorch.org/docs/master/nn.html#torch.nn.Module>`_ for more information.
+    See :class:`torch.nn.Module` for more information.
 
     :param str name: Name of this module.
     """
@@ -36,7 +35,8 @@ def set_prior(self, param, prior):
         Sets a prior to a parameter.
 
         :param str param: Name of a parameter.
-        :param pyro.distributions.Distribution prior: A prior distribution for random variable ``param``.
+        :param pyro.distributions.distribution.Distribution prior: A prior
+            distribution for random variable ``param``.
         """
         self._priors[param] = prior
 
@@ -46,9 +46,7 @@ def set_constraint(self, param, constraint):
 
         :param str param: Name of a parameter.
         :param torch.distributions.constraints.Constraint constraint: A Pytorch constraint.
-            See `Pytorch's docs
-            <http://pytorch.org/docs/master/distributions.html#module-torch.distributions.constraints>`_
-            for a list of constraints.
+            See :mod:`torch.distributions.constraints` for a list of constraints.
         """
         self._constraints[param] = constraint
 

diff --git a/pyro/contrib/gp/kernels/isotropic.py b/pyro/contrib/gp/kernels/isotropic.py
@@ -9,7 +9,8 @@
 
 def _torch_sqrt(x, eps=1e-18):
     """
-    A convenient function to avoid the NaN gradient issue of ``torch.sqrt`` at 0.
+    A convenient function to avoid the NaN gradient issue of :func:`torch.sqrt`
+    at 0.
     """
     # Ref: https://github.com/pytorch/pytorch/issues/2421
     return (x + eps).sqrt()

diff --git a/pyro/distributions/distribution.py b/pyro/distributions/distribution.py
@@ -12,8 +12,8 @@ class Distribution(object):
     """
     Base class for parameterized probability distributions.
 
-    Distributions in Pyro are stochastic function objects with ``.sample()`` and
-    ``.log_prob()`` methods. Distribution are stochastic functions with fixed
+    Distributions in Pyro are stochastic function objects with :meth:`sample` and
+    :meth:`log_prob` methods. Distribution are stochastic functions with fixed
     parameters::
 
       d = dist.Bernoulli(param)
@@ -22,10 +22,8 @@ class Distribution(object):
 
     **Implementing New Distributions**:
 
-    Derived classes must implement the following methods: ``.sample()``,
-    ``.log_prob()``, ``.batch_shape()``, and ``.event_shape()``.
-    Discrete classes may also implement the ``.enumerate_support()`` method to improve
-    gradient estimates and set ``.enumerable = True``.
+    Derived classes must implement the methods: :meth:`sample`,
+    :meth:`log_prob`.
 
     **Examples**:
 
@@ -37,9 +35,9 @@ class Distribution(object):
 
     def __call__(self, *args, **kwargs):
         """
-        Samples a random value (just an alias for `.sample(*args, **kwargs)`).
+        Samples a random value (just an alias for ``.sample(*args, **kwargs)``).
 
-        For tensor distributions, the returned Variable should have the same `.size()` as the
+        For tensor distributions, the returned Variable should have the same ``.size()`` as the
         parameters.
 
         :return: A random value.
@@ -52,14 +50,14 @@ def sample(self, *args, **kwargs):
         """
         Samples a random value.
 
-        For tensor distributions, the returned Variable should have the same `.size()` as the
+        For tensor distributions, the returned Variable should have the same ``.size()`` as the
         parameters, unless otherwise noted.
 
         :param sample_shape: the size of the iid batch to be drawn from the
             distribution.
         :type sample_shape: torch.Size
         :return: A random value or batch of random values (if parameters are
-            batched). The shape of the result should be `self.size()`.
+            batched). The shape of the result should be ``self.size()``.
         :rtype: torch.autograd.Variable
         """
         raise NotImplementedError
@@ -72,8 +70,8 @@ def log_prob(self, x, *args, **kwargs):
         :param torch.autograd.Variable x: A single value or a batch of values
             batched along axis 0.
         :return: log probability densities as a one-dimensional
-            `torch.autograd.Variable` with same batch size as value and params.
-            The shape of the result should be `self.batch_size()`.
+            :class:`~torch.autograd.Variable` with same batch size as value and
+            params. The shape of the result should be ``self.batch_size``.
         :rtype: torch.autograd.Variable
         """
         raise NotImplementedError

diff --git a/pyro/distributions/torch.py b/pyro/distributions/torch.py
@@ -111,9 +111,9 @@ class _PyroDist(_Dist, TorchDistributionMixin):
         locals()[_name] = _PyroDist
 
     _PyroDist.__doc__ = '''
-    Wraps :class:`torch.distributions.{}` with
+    Wraps :class:`{}.{}` with
     :class:`~pyro.distributions.torch_distribution.TorchDistributionMixin`.
-    '''.format(_Dist.__name__)
+    '''.format(_Dist.__module__, _Dist.__name__)
 
     __all__.append(_name)
 

diff --git a/pyro/distributions/torch_distribution.py b/pyro/distributions/torch_distribution.py
@@ -11,10 +11,12 @@ class TorchDistributionMixin(Distribution):
     """
     Mixin to provide Pyro compatibility for PyTorch distributions.
 
+    You should instead use `TorchDistribution` for new distribution classes.
+
     This is mainly useful for wrapping existing PyTorch distributions for
     use in Pyro.  Derived classes must first inherit from
-    :class:`torch.distributions.Distribution` and then inherit from
-    :class:`TorchDistributionMixin`.
+    :class:`torch.distributions.distribution.Distribution` and then inherit
+    from :class:`TorchDistributionMixin`.
     """
     @property
     def reparameterized(self):
@@ -38,8 +40,9 @@ def __call__(self, sample_shape=torch.Size()):
         Samples a random value.
 
         This is reparameterized whenever possible, calling
-        :meth:`~torch.distributions.Distribution.rsample` for reparameterized
-        distributions and :meth:`~torch.distributions.Distribution.sample` for
+        :meth:`~torch.distributions.distribution.Distribution.rsample` for
+        reparameterized distributions and
+        :meth:`~torch.distributions.distribution.Distribution.sample` for
         non-reparameterized distributions.
 
         :param sample_shape: the size of the iid batch to be drawn from the
@@ -78,7 +81,8 @@ def shape(self, sample_shape=torch.Size()):
     def reshape(self, sample_shape=torch.Size(), extra_event_dims=0):
         """
         Reshapes a distribution by adding ``sample_shape`` to its total shape
-        and adding ``extra_event_dims`` to its ``event_shape``.
+        and adding ``extra_event_dims`` to its
+        :attr:`~torch.distributions.distribution.Distribution.event_shape`.
 
         :param torch.Size sample_shape: The size of the iid batch to be drawn
             from the distribution.
@@ -92,7 +96,7 @@ def reshape(self, sample_shape=torch.Size(), extra_event_dims=0):
     def mask(self, mask):
         """
         Masks a distribution by a zero-one tensor that is broadcastable to the
-        distributions ``batch_shape``.
+        distributions :attr:`~torch.distributions.distribution.Distribution.batch_shape`.
 
         :param Variable mask: A zero-one valued float tensor.
         :return: A masked copy of this distribution.
@@ -115,8 +119,9 @@ class TorchDistribution(torch.distributions.Distribution, TorchDistributionMixin
 
     .. note::
 
-        Parameters and data should be of type `torch.autograd.Variable` and all
-        methods return type `torch.autograd.Variable` unless otherwise noted.
+        Parameters and data should be of type :class:`~torch.autograd.Variable`
+        and all methods return type :class:`~torch.autograd.Variable` unless
+        otherwise noted.
 
     **Tensor Shapes**:
 
@@ -139,13 +144,13 @@ class TorchDistribution(torch.distributions.Distribution, TorchDistributionMixin
 
     These shapes are related by the equation::
 
-      assert d.shape(sample_shape, *args, **kwargs) == sample_shape +
-                                                       d.batch_shape(*args, **kwargs) +
-                                                       d.event_shape(*args, **kwargs)
+      assert d.shape(sample_shape) == sample_shape + d.batch_shape + d.event_shape
 
-    Distributions provide a vectorized ``.log_prob()`` method that evaluates
-    the log probability density of each event in a batch independently,
-    returning a tensor of shape ``sample_shape + d.batch_shape``::
+    Distributions provide a vectorized
+    :meth`~torch.distributions.distribution.Distribution.log_prob` method that
+    evaluates the log probability density of each event in a batch
+    independently, returning a tensor of shape
+    ``sample_shape + d.batch_shape``::
 
       x = d.sample(sample_shape)
       assert x.size() == d.shape(sample_shape)
@@ -154,19 +159,27 @@ class TorchDistribution(torch.distributions.Distribution, TorchDistributionMixin
 
     **Implementing New Distributions**:
 
-    Derived classes must implement the following methods: ``.rsample()``
-    (or ``.sample()`` if ``.has_rsample == True``),
-    ``.log_prob()``, ``.batch_shape``, and ``.event_shape``.
-    Discrete classes may also implement the ``.enumerate_support()`` method to improve
-    gradient estimates and set ``.has_enumerate_support = True``.
+    Derived classes must implement the methods
+    :meth:`~torch.distributions.distribution.Distribution.sample`
+    (or :meth:`~torch.distributions.distribution.Distribution.rsample` if
+    ``.has_rsample == True``) and
+    :meth:`~torch.distributions.distribution.Distribution.log_prob`, and must
+    implement the properties
+    :attr:`~torch.distributions.distribution.Distribution.batch_shape`,
+    and :attr:`~torch.distributions.distribution.Distribution.event_shape`.
+    Discrete classes may also implement the
+    :meth:`~torch.distributions.distribution.Distribution.enumerate_support`
+    method to improve gradient estimates and set
+    ``.has_enumerate_support = True``.
     """
     pass
 
 
 class ReshapedDistribution(TorchDistribution):
     """
     Reshapes a distribution by adding ``sample_shape`` to its total shape
-    and adding ``extra_event_dims`` to its ``event_shape``.
+    and adding ``extra_event_dims`` to its
+    :attr:`~torch.distributions.distribution.Distribution.event_shape`.
 
     :param torch.Size sample_shape: The size of the iid batch to be drawn from
         the distribution.
@@ -237,7 +250,7 @@ def variance(self):
 class MaskedDistribution(TorchDistribution):
     """
     Masks a distribution by a zero-one tensor that is broadcastable to the
-    distribution's ``batch_shape``.
+    distribution's :attr:`~torch.distributions.distribution.Distribution.batch_shape`.
 
     :param Variable mask: A zero-one valued float tensor.
     """

diff --git a/pyro/distributions/util.py b/pyro/distributions/util.py
@@ -48,15 +48,15 @@ def decorator(destin_class):
 def is_identically_zero(x):
     """
     Check if argument is exactly the number zero. True for the number zero;
-    false for other numbers; false for ``torch.autograd.Variable``s.
+    false for other numbers; false for :class:`~torch.autograd.Variable`s.
     """
     return isinstance(x, numbers.Number) and x == 0
 
 
 def is_identically_one(x):
     """
     Check if argument is exactly the number one. True for the number one;
-    false for other numbers; false for ``torch.autograd.Variable``s.
+    false for other numbers; false for :class:`~torch.autograd.Variable`s.
     """
     return isinstance(x, numbers.Number) and x == 1
 
@@ -175,7 +175,7 @@ def scale_tensor(tensor, scale):
 
 def torch_eye(n, m=None, out=None):
     """
-    Like `torch.eye()`, but works with cuda tensors.
+    Like :func:`torch.eye`, but works with cuda tensors.
     """
     if m is None:
         m = n
@@ -194,7 +194,7 @@ def torch_eye(n, m=None, out=None):
 
 def torch_multinomial(input, num_samples, replacement=False):
     """
-    Like `torch.multinomial()` but works with cuda tensors.
+    Like :func:`torch.multinomial` but works with cuda tensors.
     Does not support keyword argument `out`.
     """
     if input.is_cuda:
@@ -205,7 +205,7 @@ def torch_multinomial(input, num_samples, replacement=False):
 
 def torch_sign(value):
     """
-    Like ``torch.sign()`` but also works for numbers.
+    Like :func:`torch.sign`` but also works for numbers.
     """
     if isinstance(value, numbers.Number):
         return (value > 0) - (value < 0)

diff --git a/pyro/infer/util.py b/pyro/infer/util.py
@@ -11,7 +11,7 @@
 
 def torch_exp(x):
     """
-    Like ``x.exp()`` for a ``torch.autograd.Variable``, but also accepts
+    Like ``x.exp()`` for a :class:`~torch.autograd.Variable`, but also accepts
     numbers.
     """
     if isinstance(x, numbers.Number):
@@ -21,7 +21,7 @@ def torch_exp(x):
 
 def torch_data_sum(x):
     """
-    Like ``x.data.sum()`` for a ``torch.autograd.Variable``, but also works
+    Like ``x.data.sum()`` for a :class:`~torch.autograd.Variable`, but also works
     with numbers.
     """
     if isinstance(x, numbers.Number):
@@ -31,7 +31,7 @@ def torch_data_sum(x):
 
 def torch_sum(x):
     """
-    Like ``x.sum()`` for a ``torch.autograd.Variable``, but also works with
+    Like ``x.sum()`` for a :class:`~torch.autograd.Variable`, but also works with
     numbers.
     """
     if isinstance(x, numbers.Number):
@@ -41,7 +41,7 @@ def torch_sum(x):
 
 def torch_backward(x):
     """
-    Like ``x.backward()`` for a ``torch.autograd.Variable``, but also accepts
+    Like ``x.backward()`` for a :class:`~torch.autograd.Variable`, but also accepts
     numbers (a no-op if given a number).
     """
     if isinstance(x, torch.autograd.Variable):

diff --git a/pyro/ops/integrator.py b/pyro/ops/integrator.py
@@ -8,9 +8,9 @@ def velocity_verlet(z, r, potential_fn, step_size, num_steps=1):
     Second order symplectic integrator that uses the velocity verlet algorithm.
 
     :param dict z: dictionary of sample site names and their current values
-        (type ``torch.autograd.Variable``).
+        (type :class:`~torch.autograd.Variable`).
     :param dict r: dictionary of sample site names and corresponding momenta
-        (type ``torch.autograd.Variable``).
+        (type :class:`~torch.autograd.Variable`).
     :param callable potential_fn: function that returns potential energy given z
         for each sample site. The negative gradient of the function with respect
         to ``z`` determines the rate of change of the corresponding sites'

diff --git a/pyro/shim.py b/pyro/shim.py
@@ -7,7 +7,7 @@
 
 def parse_torch_version():
     """
-    Parses `torch.__version__` into a semver-ish version tuple.
+    Parses ``torch.__version__`` into a semver-ish version tuple.
     This is needed to handle subpatch `_n` parts outside of the semver spec.
 
     :returns: a tuple `(major, minor, patch, extra_stuff)`