Skip to content

Commit

Permalink
Improve shape handling in generate_samples (#3456)
Browse files Browse the repository at this point in the history
* Added shape_utils script

* Import from shape_utils and change generate samples

* Fixed lint and errors

* Still errors with broadcast shapes

* Fixed triangular errors

* Fixed distrubutions random errors.

* Fixed Multinomial error

* Fixed multinomial n shape error

* Added shape broadcasting tests

* Added broadcast samples tests

* Made all tests use fixtures. Finished shape broadcasting tests.

* Fixed shape_utils docstrings.

* Added random variable sampling to test_shape testsuite.

* Removed redundant broadcast_distribution_samples from random methods. This is now handled in generate_samples.

* Fixed lint

* Moved test_shape_handling to last travis environment

* Changed test skip to xfail

* Fixed Bound random error

* Changes to increase code coverage

* Moved to_tuple to shape_utils

* Used numpy vectorize to fix 3422

* Fixed signature for empty input and one_d reshaping.

* Removed unused fixture and added more details to RELEASE-NOTES

* Addressed colcarrol comments
  • Loading branch information
lucianopaz authored and ColCarroll committed May 2, 2019
1 parent a7f62a0 commit f5d8324
Show file tree
Hide file tree
Showing 13 changed files with 793 additions and 287 deletions.
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ install:
- conda list && pip freeze

env:
- FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
- FLOATX='float32' TESTCMD="--durations=10 --ignore=pymc3/tests/test_examples.py --cov-append --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
- FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py"
- FLOATX='float32' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append --ignore=pymc3/tests/test_examples.py --ignore=pymc3/tests/test_distributions_random.py --ignore=pymc3/tests/test_variational_inference.py --ignore=pymc3/tests/test_shared.py --ignore=pymc3/tests/test_smc.py --ignore=pymc3/tests/test_updates.py --ignore=pymc3/tests/test_posteriors.py --ignore=pymc3/tests/test_sampling.py --ignore=pymc3/tests/test_parallel_sampling.py --ignore=pymc3/tests/test_dist_math.py --ignore=pymc3/tests/test_distribution_defaults.py --ignore=pymc3/tests/test_distributions_timeseries.py --ignore=pymc3/tests/test_random.py --ignore=pymc3/tests/test_gp.py --ignore=pymc3/tests/test_shape_handling.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_distributions_random.py pymc3/tests/test_shared.py pymc3/tests/test_smc.py pymc3/tests/test_sampling.py pymc3/tests/test_parallel_sampling.py pymc3/tests/test_dist_math.py pymc3/tests/test_distribution_defaults.py pymc3/tests/test_distributions_timeseries.py pymc3/tests/test_random.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_examples.py pymc3/tests/test_posteriors.py pymc3/tests/test_gp.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py"
- FLOATX='float64' TESTCMD="--durations=10 --cov-append pymc3/tests/test_variational_inference.py pymc3/tests/test_updates.py pymc3/tests/test_shape_handling.py"

script:
- . ./scripts/test.sh $TESTCMD
Expand Down
6 changes: 6 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
- Add function `set_data` to update variables defined as `Data`.
- `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382.
- Added the `distributions.shape_utils` module with functions used to help broadcast samples drawn from distributions using the `size` keyword argument.
- Used `numpy.vectorize` in `distributions.distribution._compile_theano_function`. This enables `sample_prior_predictive` and `sample_posterior_predictive` to ask for tuples of samples instead of just integers. This fixes issue #3422.

### Maintenance
- All occurances of `sd` as a parameter name have been renamed to `sigma`. `sd` will continue to function for backwards compatibility.
Expand All @@ -31,6 +33,10 @@
- Add `sigma`, `tau`, and `sd` to signature of `NormalMixture`.
- Resolved issue #3248. Set default lower and upper values of -inf and inf for pm.distributions.continuous.TruncatedNormal. This avoids errors caused by their previous values of None.
- Resolved issue #3399. Converted all calls to `pm.distributions.bound._ContinuousBounded` and `pm.distributions.bound._DiscreteBounded` to use only and all positional arguments.
- Restructured `distributions.distribution.generate_samples` to use the `shape_utils` module. This solves issues #3421 and #3147 by using the `size` aware broadcating functions in `shape_utils`.
- Fixed the `Multinomial.random` and `Multinomial.random_` methods to make them compatible with the new `generate_samples` function. In the process, a bug of the `Multinomial.random_` shape handling was discovered and fixed.
- Fixed a defect found in `Bound.random` where the `point` dictionary was passed to `generate_samples` as an `arg` instead of in `not_broadcast_kwargs`.
- Fixed a defect found in `Bound.random_` where `total_size` could end up as a `float64` instead of being an integer if given `size=tuple()`.

### Deprecations

Expand Down
23 changes: 19 additions & 4 deletions pymc3/distributions/bound.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _random(self, lower, upper, point=None, size=None):
"Drawing samples from distributions with "
"array-valued bounds is not supported."
)
total_size = np.prod(size)
total_size = np.prod(size).astype(np.int)
samples = []
s = 0
while s < total_size:
Expand All @@ -81,17 +81,32 @@ def random(self, point=None, size=None):
elif self.lower is not None and self.upper is not None:
lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
return generate_samples(
self._random, lower, upper, point, dist_shape=self.shape, size=size
self._random,
lower,
upper,
dist_shape=self.shape,
size=size,
not_broadcast_kwargs={'point': point},
)
elif self.lower is not None:
lower = draw_values([self.lower], point=point, size=size)
return generate_samples(
self._random, lower, np.inf, point, dist_shape=self.shape, size=size
self._random,
lower,
np.inf,
dist_shape=self.shape,
size=size,
not_broadcast_kwargs={'point': point},
)
else:
upper = draw_values([self.upper], point=point, size=size)
return generate_samples(
self._random, -np.inf, upper, point, dist_shape=self.shape, size=size
self._random,
-np.inf,
upper,
dist_shape=self.shape,
size=size,
not_broadcast_kwargs={'point': point},
)


Expand Down
14 changes: 2 additions & 12 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
)
from .distribution import (Continuous, draw_values, generate_samples,
broadcast_distribution_samples)
from .distribution import (Continuous, draw_values, generate_samples)

__all__ = ['Uniform', 'Flat', 'HalfFlat', 'Normal', 'TruncatedNormal', 'Beta',
'Kumaraswamy', 'Exponential', 'Laplace', 'StudentT', 'Cauchy',
Expand Down Expand Up @@ -966,8 +965,6 @@ def random(self, point=None, size=None):
"""
mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha],
point=point, size=size)
mu, lam, alpha = broadcast_distribution_samples([mu, lam, alpha],
size=size)
return generate_samples(self._random,
mu, lam, alpha,
dist_shape=self.shape,
Expand Down Expand Up @@ -1297,7 +1294,6 @@ def random(self, point=None, size=None):
"""
a, b = draw_values([self.a, self.b],
point=point, size=size)
a, b = broadcast_distribution_samples([a, b], size=size)
return generate_samples(self._random, a, b,
dist_shape=self.shape,
size=size)
Expand Down Expand Up @@ -1674,7 +1670,6 @@ def random(self, point=None, size=None):
array
"""
mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
mu, tau = broadcast_distribution_samples([mu, tau], size=size)
return generate_samples(self._random, mu, tau,
dist_shape=self.shape,
size=size)
Expand Down Expand Up @@ -1965,7 +1960,6 @@ def random(self, point=None, size=None):
"""
alpha, m = draw_values([self.alpha, self.m],
point=point, size=size)
alpha, m = broadcast_distribution_samples([alpha, m], size=size)
return generate_samples(self._random, alpha, m,
dist_shape=self.shape,
size=size)
Expand Down Expand Up @@ -2090,7 +2084,6 @@ def random(self, point=None, size=None):
"""
alpha, beta = draw_values([self.alpha, self.beta],
point=point, size=size)
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)
return generate_samples(self._random, alpha, beta,
dist_shape=self.shape,
size=size)
Expand Down Expand Up @@ -2669,7 +2662,6 @@ def random(self, point=None, size=None):
"""
alpha, beta = draw_values([self.alpha, self.beta],
point=point, size=size)
alpha, beta = broadcast_distribution_samples([alpha, beta], size=size)

def _random(a, b, size=None):
return b * (-np.log(np.random.uniform(size=size)))**(1 / a)
Expand Down Expand Up @@ -2963,8 +2955,6 @@ def random(self, point=None, size=None):
"""
mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
point=point, size=size)
mu, sigma, nu = broadcast_distribution_samples([mu, sigma, nu],
size=size)

def _random(mu, sigma, nu, size=None):
return (np.random.normal(mu, sigma, size=size)
Expand Down Expand Up @@ -3369,7 +3359,7 @@ def random(self, point=None, size=None):
scale = upper - lower
c_ = (c - lower) / scale
return generate_samples(stats.triang.rvs, c=c_, loc=lower, scale=scale,
size=size, dist_shape=self.shape, random_state=None)
size=size, dist_shape=self.shape)

def logp(self, value):
"""
Expand Down
5 changes: 2 additions & 3 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from pymc3.util import get_variable_name
from .dist_math import bound, factln, binomln, betaln, logpow, random_choice
from .distribution import (Discrete, draw_values, generate_samples,
broadcast_distribution_samples)
from .distribution import Discrete, draw_values, generate_samples
from .shape_utils import broadcast_distribution_samples
from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp
from ..theanof import floatX, intX

Expand Down Expand Up @@ -351,7 +351,6 @@ def _random(self, q, beta, size=None):

def random(self, point=None, size=None):
q, beta = draw_values([self.q, self.beta], point=point, size=size)
q, beta = broadcast_distribution_samples([q, beta], size=size)

return generate_samples(self._random, q, beta,
dist_shape=self.shape,
Expand Down
12 changes: 1 addition & 11 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from theano.tensor.slinalg import Cholesky
from theano.scan_module import until
from theano import scan
from .shape_utils import to_tuple

from .special import gammaln
from pymc3.theanof import floatX
Expand All @@ -20,17 +21,6 @@
c = - .5 * np.log(2. * np.pi)


def to_tuple(shape):
"""Convert ints, arrays, and Nones to tuples"""
if shape is None:
return tuple()
temp = np.atleast_1d(shape)
if temp.size == 0:
return tuple()
else:
return tuple(temp)


def bound(logp, *conditions, **kwargs):
"""
Bounds a log probability density with several conditions.
Expand Down
Loading

0 comments on commit f5d8324

Please sign in to comment.