diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 6c80b49472..69931ca21c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -100,10 +100,14 @@ def sample(draws, step=None, init='advi', n_init=200000, start=None, automatically (defaults to None). init : str {'advi', 'advi_map', 'map', 'nuts', None} Initialization method to use. - * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. + * advi : Run ADVI to estimate starting points and diagonal covariance + matrix. If njobs > 1 it will sample starting points from the estimated + posterior, otherwise it will use the estimated posterior mean. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. - * nuts : Run NUTS and estimate posterior mean and covariance matrix. + * nuts : Run NUTS to estimate starting points and covariance matrix. If + njobs > 1 it will sample starting points from the estimated posterior, + otherwise it will use the estimated posterior mean. * None : Do not initialize. n_init : int Number of iterations of initializer @@ -142,11 +146,12 @@ def sample(draws, step=None, init='advi', n_init=200000, start=None, MultiTrace object with access to sampling values """ model = modelcontext(model) + if step is None and init is not None and pm.model.all_continuous(model.vars): # By default, use NUTS sampler pm._log.info('Auto-assigning NUTS sampler...') - start_, step = init_nuts(init=init, n_init=n_init, model=model) + start_, step = init_nuts(init=init, njobs=njobs, n_init=n_init, model=model, random_seed=random_seed) if start is None: start = start_ else: @@ -393,7 +398,8 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see return {k: np.asarray(v) for k, v in ppc.items()} -def init_nuts(init='advi', n_init=500000, model=None, **kwargs): +def init_nuts(init='advi', njobs=1, n_init=500000, model=None, + random_seed=-1, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely @@ -409,6 +415,8 @@ def init_nuts(init='advi', n_init=500000, model=None, **kwargs): * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. + njobs : int + Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. @@ -430,23 +438,31 @@ def init_nuts(init='advi', n_init=500000, model=None, **kwargs): pm._log.info('Initializing NUTS using {}...'.format(init)) + random_seed = int(np.atleast_1d(random_seed)[0]) + if init == 'advi': - v_params = pm.variational.advi(n=n_init) - start = pm.variational.sample_vp(v_params, 1, progressbar=False, hide_transformed=False)[0] + v_params = pm.variational.advi(n=n_init, random_seed=random_seed) + start = pm.variational.sample_vp(v_params, njobs, progressbar=False, + hide_transformed=False, + random_seed=random_seed) + if njobs == 1: + start = start[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() - v_params = pm.variational.advi(n=n_init, start=start) + v_params = pm.variational.advi(n=n_init, start=start, + random_seed=random_seed) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) - elif init == 'nuts': - init_trace = pm.sample(step=pm.NUTS(), draws=n_init) - cov = pm.trace_cov(init_trace[n_init//2:]) - - start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} + init_trace = pm.sample(step=pm.NUTS(), draws=n_init, + random_seed=random_seed)[n_init // 2:] + cov = np.atleast_1d(pm.trace_cov(init_trace)) + start = np.random.choice(init_trace, njobs) + if njobs == 1: + start = start[0] else: raise NotImplemented('Initializer {} is not supported.'.format(init))