pytorch · neerajprad · Jan 25, 2021 · Jan 25, 2021 · Jan 25, 2021 · Jan 25, 2021
diff --git a/torch/distributions/categorical.py b/torch/distributions/categorical.py
@@ -16,14 +16,19 @@ class Categorical(Distribution):
 
     Samples are integers from :math:`\{0, \ldots, K-1\}` where `K` is ``probs.size(-1)``.
 
-    If :attr:`probs` is 1-dimensional with length-`K`, each element is the relative
-    probability of sampling the class at that index.
+    If `probs` is 1-dimensional with length-`K`, each element is the relative probability
+    of sampling the class at that index.
 
-    If :attr:`probs` is N-dimensional, the first N-1 dimensions are treated as a batch of 
+    If `probs` is N-dimensional, the first N-1 dimensions are treated as a batch of
     relative probability vectors.
 
-    .. note:: :attr:`probs` must be non-negative, finite and have a non-zero sum,
-              and it will be normalized to sum to 1 along the last dimension.
+    .. note:: The `probs` argument must be non-negative, finite and have a non-zero sum,
+              and it will be normalized to sum to 1 along the last dimension. attr:`probs`
+              will return this normalized value.
+              The `logits` argument will be interpreted as unnormalized log probabilities
+              and can therefore be any real number. It will likewise be normalized so that
+              the resulting probabilities sum to 1 along the last dimension. attr:`logits`
+              will return this normalized value.
 
     See also: :func:`torch.multinomial`
 
@@ -35,7 +40,7 @@ class Categorical(Distribution):
 
     Args:
         probs (Tensor): event probabilities
-        logits (Tensor): event log-odds
+        logits (Tensor): event log probabilities (unnormalized)
     """
     arg_constraints = {'probs': constraints.simplex,
                        'logits': constraints.real_vector}

diff --git a/torch/distributions/half_cauchy.py b/torch/distributions/half_cauchy.py
@@ -43,7 +43,7 @@ def scale(self):
 
     @property
     def mean(self):
-        return self.base_dist.mean
+        return torch.full(self._extended_shape(), math.inf, dtype=self.scale.dtype, device=self.scale.device)
 
     @property
     def variance(self):

diff --git a/torch/distributions/multinomial.py b/torch/distributions/multinomial.py
@@ -15,8 +15,13 @@ class Multinomial(Distribution):
     Note that :attr:`total_count` need not be specified if only :meth:`log_prob` is
     called (see example below)
 
-    .. note:: :attr:`probs` must be non-negative, finite and have a non-zero sum,
-              and it will be normalized to sum to 1.
+    .. note:: The `probs` argument must be non-negative, finite and have a non-zero sum,
+              and it will be normalized to sum to 1 along the last dimension. attr:`probs`
+              will return this normalized value.
+              The `logits` argument will be interpreted as unnormalized log probabilities
+              and can therefore be any real number. It will likewise be normalized so that
+              the resulting probabilities sum to 1 along the last dimension. attr:`logits`
+              will return this normalized value.
 
     -   :meth:`sample` requires a single shared `total_count` for all
         parameters and samples.
@@ -35,7 +40,7 @@ class Multinomial(Distribution):
     Args:
         total_count (int): number of trials
         probs (Tensor): event probabilities
-        logits (Tensor): event log probabilities
+        logits (Tensor): event log probabilities (unnormalized)
     """
     arg_constraints = {'probs': constraints.simplex,
                        'logits': constraints.real_vector}

diff --git a/torch/distributions/one_hot_categorical.py b/torch/distributions/one_hot_categorical.py
@@ -11,8 +11,13 @@ class OneHotCategorical(Distribution):
 
     Samples are one-hot coded vectors of size ``probs.size(-1)``.
 
-    .. note:: :attr:`probs` must be non-negative, finite and have a non-zero sum,
-              and it will be normalized to sum to 1.
+    .. note:: The `probs` argument must be non-negative, finite and have a non-zero sum,
+              and it will be normalized to sum to 1 along the last dimension. attr:`probs`
+              will return this normalized value.
+              The `logits` argument will be interpreted as unnormalized log probabilities
+              and can therefore be any real number. It will likewise be normalized so that
+              the resulting probabilities sum to 1 along the last dimension. attr:`logits`
+              will return this normalized value.
 
     See also: :func:`torch.distributions.Categorical` for specifications of
     :attr:`probs` and :attr:`logits`.
@@ -25,7 +30,7 @@ class OneHotCategorical(Distribution):
 
     Args:
         probs (Tensor): event probabilities
-        logits (Tensor): event log probabilities
+        logits (Tensor): event log probabilities (unnormalized)
     """
     arg_constraints = {'probs': constraints.simplex,
                        'logits': constraints.real_vector}

diff --git a/torch/distributions/relaxed_categorical.py b/torch/distributions/relaxed_categorical.py
@@ -21,7 +21,7 @@ class ExpRelaxedCategorical(Distribution):
     Args:
         temperature (Tensor): relaxation temperature
         probs (Tensor): event probabilities
-        logits (Tensor): the log probability of each event.
+        logits (Tensor): unnormalized log probability for each event
 
     [1] The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables
     (Maddison et al, 2017)
@@ -30,8 +30,8 @@ class ExpRelaxedCategorical(Distribution):
     (Jang et al, 2017)
     """
     arg_constraints = {'probs': constraints.simplex,
-                       'logits': constraints.real}
-    support = constraints.real
+                       'logits': constraints.real_vector}
+    support = constraints.real_vector
     has_rsample = True
 
     def __init__(self, temperature, probs=None, logits=None, validate_args=None):
@@ -101,10 +101,10 @@ class RelaxedOneHotCategorical(TransformedDistribution):
     Args:
         temperature (Tensor): relaxation temperature
         probs (Tensor): event probabilities
-        logits (Tensor): the log probability of each event.
+        logits (Tensor): unnormalized log probability for each event
     """
     arg_constraints = {'probs': constraints.simplex,
-                       'logits': constraints.real}
+                       'logits': constraints.real_vector}
     support = constraints.simplex
     has_rsample = True