Skip to content
Permalink
Browse files

Refactor smc out of step methods (#3579)

* move smc from step_methods to its own family

* black

* update notebooks

* add release note and fix lint

* add release note and fix lint

* minor fix docstring

* reorder arguments and minor fix docstring
  • Loading branch information...
aloctavodia committed Aug 6, 2019
1 parent 24730cc commit 37466665efec5907c918f59a68edb6b11bea2b9e
@@ -14,6 +14,7 @@
- Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508)
- Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559))
- SMC: stabilize covariance matrix [3573](https://github.com/pymc-devs/pymc3/pull/3573)
- SMC is no longer a step method of `pm.sample` now it should be called using `pm.sample_smc` [3579](https://github.com/pymc-devs/pymc3/pull/3579)

## PyMC3 3.7 (May 29 2019)

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -11,6 +11,7 @@
from .stats import *
from .sampling import *
from .step_methods import *
from .smc import *
from .theanof import *
from .tuning import *
from .variational import *
@@ -425,3 +425,22 @@ def __exit__(self, *args):
ProcessAdapter.terminate_all(self._samplers)
if self._progress is not None:
self._progress.close()

def _cpu_count():
"""Try to guess the number of CPUs in the system.
We use the number provided by psutil if that is installed.
If not, we use the number provided by multiprocessing, but assume
that half of the cpus are only hardware threads and ignore those.
"""
try:
import psutil
cpus = psutil.cpu_count(False)
except ImportError:
try:
cpus = multiprocessing.cpu_count() // 2
except NotImplementedError:
cpus = 1
if cpus is None:
cpus = 1
return cpus

Large diffs are not rendered by default.

@@ -0,0 +1 @@
from .smc import sample_smc
@@ -7,7 +7,7 @@
import multiprocessing as mp
import warnings

from .metropolis import MultivariateNormalProposal
from ..step_methods.metropolis import MultivariateNormalProposal
from .smc_utils import (
_initial_population,
_calc_covariance,
@@ -20,54 +20,96 @@
)
from ..theanof import inputvars, make_shared_replacements
from ..model import modelcontext
from ..parallel_sampling import _cpu_count


EXPERIMENTAL_WARNING = (
"Warning: SMC-ABC methods are experimental step methods and not yet"
" recommended for use in PyMC3!"
)

__all__ = ["SMC", "sample_smc"]
__all__ = ["sample_smc"]


class SMC:
r"""
Sequential Monte Carlo step
def sample_smc(
draws=1000,
kernel="metropolis",
n_steps=25,
parallel=False,
start=None,
cores=None,
tune_scaling=True,
tune_steps=True,
scaling=1.0,
p_acc_rate=0.99,
threshold=0.5,
epsilon=1.0,
dist_func="absolute_error",
sum_stat=False,
progressbar=False,
model=None,
random_seed=-1,
):
"""
Sequential Monte Carlo based sampling
Parameters
----------
draws : int
The number of samples to draw from the posterior (i.e. last stage). And also the number of
independent chains. Defaults to 1000.
kernel : str
Kernel method for the SMC sampler. Available option are ``metropolis`` (default) and `ABC`.
Use `ABC` for likelihood free inference togheter with a ``pm.Simulator``.
n_steps : int
The number of steps of a Markov Chain. If `tune_steps == True` `n_steps` will be used for
the first stage and the number of steps of the other stages will be determined
automatically based on the acceptance rate and `p_acc_rate`.
The number of steps will never be larger than `n_steps`.
scaling : float
Factor applied to the proposal distribution i.e. the step size of the Markov Chain. Only
works if `tune_scaling == False` otherwise is determined automatically.
p_acc_rate : float
Used to compute `n_steps` when `tune_steps == True`. The higher the value of `p_acc_rate`
the higher the number of steps computed automatically. Defaults to 0.99. It should be
between 0 and 1.
The number of steps of each Markov Chain. If ``tune_steps == True`` ``n_steps`` will be used
for the first stage and for the others it will be determined automatically based on the
acceptance rate and `p_acc_rate`, the max number of steps is ``n_steps``.
parallel : bool
Distribute computations across cores if the number of cores is larger than 1.
Defaults to False.
start : dict, or array of dict
Starting point in parameter space. It should be a list of dict with length `chains`.
When None (default) the starting point is sampled from the prior distribution.
cores : int
The number of chains to run in parallel. If ``None`` (default), it will be automatically
set to the number of CPUs in the system.
tune_scaling : bool
Whether to compute the scaling automatically or not. Defaults to True
Whether to compute the scaling factor automatically or not. Defaults to True
tune_steps : bool
Whether to compute the number of steps automatically or not. Defaults to True
scaling : float
Scaling factor applied to the proposal distribution i.e. the step size of the Markov Chain.
If ``tune_scaling == True`` (defaults) it will be determined automatically at each stage.
p_acc_rate : float
Used to compute ``n_steps`` when ``tune_steps == True``. The higher the value of
``p_acc_rate`` the higher the number of steps computed automatically. Defaults to 0.99.
It should be between 0 and 1.
threshold : float
Determines the change of beta from stage to stage, i.e.indirectly the number of stages,
the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.
It should be between 0 and 1.
parallel : bool
Distribute computations across cores if the number of cores is larger than 1
(see `pm.sample()` for details). Defaults to True.
model : :class:`pymc3.Model`
Optional model for sampling step. Defaults to None (taken from context).
epsilon : float
Standard deviation of the gaussian pseudo likelihood. Only works with `kernel = ABC`
dist_func : str
Distance function. Available options are ``absolute_error`` (default) and
``sum_of_squared_distance``. Only works with ``kernel = ABC``
sum_stat : bool
Whether to use or not a summary statistics. Defaults to False. Only works with
``kernel = ABC``
progressbar : bool
Flag for displaying a progress bar. Defaults to False.
model : Model (optional if in ``with`` context)).
random_seed : int
random seed
Notes
-----
SMC works by moving through successive stages. At each stage the inverse temperature \beta is
increased a little bit (starting from 0 up to 1). When \beta = 0 we have the prior distribution
and when \beta =1 we have the posterior distribution. So in more general terms we are always
computing samples from a tempered posterior that we can write as:
SMC works by moving through successive stages. At each stage the inverse temperature
:math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0
we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution.
So in more general terms we are always computing samples from a tempered posterior that we can
write as:
.. math::
@@ -76,10 +118,10 @@ class SMC:
A summary of the algorithm is:
1. Initialize :math:`\beta` at zero and stage at zero.
2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior
is the prior).
3. Increase :math:`\beta` in order to make the effective sample size equals some predefined value
(we use :math:`Nt`, where :math:`t` is 0.5 by default).
2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the
tempered posterior is the prior).
3. Increase :math:`\beta` in order to make the effective sample size equals some predefined
value (we use :math:`Nt`, where :math:`t` is 0.5 by default).
4. Compute a set of N importance weights W. The weights are computed as the ratio of the
likelihoods of a sample at stage i+1 and stage i.
5. Obtain :math:`S_{w}` by re-sampling according to W.
@@ -106,69 +148,20 @@ class SMC:
%282007%29133:7%28816%29>`__
"""

def __init__(
self,
n_steps=25,
scaling=1.0,
p_acc_rate=0.99,
tune_scaling=True,
tune_steps=True,
threshold=0.5,
parallel=True,
ABC=False,
epsilon=1,
dist_func="absolute_error",
sum_stat=False,
):

self.n_steps = n_steps
self.max_steps = n_steps
self.scaling = scaling
self.p_acc_rate = 1 - p_acc_rate
self.tune_scaling = tune_scaling
self.tune_steps = tune_steps
self.threshold = threshold
self.parallel = parallel
self.ABC = ABC
self.epsilon = epsilon
self.dist_func = dist_func
self.sum_stat = sum_stat


def sample_smc(
draws=5000, step=None, start=None, cores=None, progressbar=False, model=None, random_seed=-1
):
"""
Sequential Monte Carlo sampling
Parameters
----------
draws : int
The number of samples to draw from the posterior (i.e. last stage). And also the number of
independent Markov Chains. Defaults to 5000.
step : :class:`SMC`
SMC initialization object
start : dict, or array of dict
Starting point in parameter space. It should be a list of dict with length `chains`.
cores : int
The number of chains to run in parallel.
progressbar : bool
Flag for displaying a progress bar
model : pymc3 Model
optional if in `with` context
random_seed : int
random seed
"""
model = modelcontext(model)

if random_seed != -1:
np.random.seed(random_seed)

if cores is None:
cores = _cpu_count()

beta = 0
stage = 0
accepted = 0
acc_rate = 1.0
proposed = draws * step.n_steps
max_steps = n_steps
proposed = draws * n_steps
model.marginal_likelihood = 1
variables = inputvars(model.vars)
discrete = np.concatenate([[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in variables])
@@ -181,33 +174,29 @@ def sample_smc(

posterior, var_info = _initial_population(draws, model, variables, start)

if step.ABC:
if kernel.lower() == "abc":
warnings.warn(EXPERIMENTAL_WARNING)
simulator = model.observed_RVs[0]
likelihood_logp = PseudoLikelihood(
step.epsilon,
epsilon,
simulator.observations,
simulator.distribution.function,
model,
var_info,
step.dist_func,
step.sum_stat,
dist_func,
sum_stat,
)
else:
elif kernel.lower() == "metropolis":
likelihood_logp = logp_forw([model.datalogpt], variables, shared)

while beta < 1:

if step.parallel and cores > 1:
if parallel and cores > 1:
pool = mp.Pool(processes=cores)
results = pool.starmap(
likelihood_logp,
[(sample,) for sample in posterior],
)
results = pool.starmap(likelihood_logp, [(sample,) for sample in posterior])
else:
results = [likelihood_logp(sample) for sample in posterior]
likelihoods = np.array(results).squeeze()
beta, old_beta, weights, sj = calc_beta(beta, likelihoods, step.threshold)
beta, old_beta, weights, sj = calc_beta(beta, likelihoods, threshold)
model.marginal_likelihood *= sj
# resample based on plausibility weights (selection)
resampling_indexes = np.random.choice(np.arange(draws), size=draws, p=weights)
@@ -220,31 +209,39 @@ def sample_smc(

# compute scaling (optional) and number of Markov chains steps (optional), based on the
# acceptance rate of the previous stage
if (step.tune_scaling or step.tune_steps) and stage > 0:
_tune(acc_rate, proposed, step)
if (tune_scaling or tune_steps) and stage > 0:
scaling, n_steps = _tune(
acc_rate,
proposed,
tune_scaling,
tune_steps,
scaling,
n_steps,
max_steps,
p_acc_rate,
)

pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, step.n_steps))
pm._log.info("Stage: {:d} Beta: {:.3f} Steps: {:d}".format(stage, beta, n_steps))
# Apply Metropolis kernel (mutation)
proposed = draws * step.n_steps
proposed = draws * n_steps
priors = np.array([prior_logp(sample) for sample in posterior]).squeeze()
tempered_logp = priors + likelihoods * beta
deltas = np.squeeze(proposal(step.n_steps) * step.scaling)
deltas = np.squeeze(proposal(n_steps) * scaling)

parameters = (
proposal,
step.scaling,
scaling,
accepted,
any_discrete,
all_discrete,
discrete,
step.n_steps,
n_steps,
prior_logp,
likelihood_logp,
beta,
step.ABC,
)

if step.parallel and cores > 1:
if parallel and cores > 1:
pool = mp.Pool(processes=cores)
results = pool.starmap(
metrop_kernel,

0 comments on commit 3746666

Please sign in to comment.
You can’t perform that action at this time.