From 15ba923e29ea7c5eb2276fc0a526e9eda2f883b2 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Sat, 7 Jan 2017 12:56:25 -0300 Subject: [PATCH 1/3] initialize chains from estimated posterior samples --- pymc3/sampling.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 6c80b49472..ac68d2d680 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -100,10 +100,14 @@ def sample(draws, step=None, init='advi', n_init=200000, start=None, automatically (defaults to None). init : str {'advi', 'advi_map', 'map', 'nuts', None} Initialization method to use. - * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix. + * advi : Run ADVI to estimate starting points and diagonal covariance + matrix. If njobs > 1 it will sample starting points from the estimated + posterior, otherwise it will use the estimated posterior mean. * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. - * nuts : Run NUTS and estimate posterior mean and covariance matrix. + * nuts : Run NUTS to estimate starting points and covariance matrix. If + njobs > 1 it will sample starting points from the estimated posterior, + otherwise it will use the estimated posterior mean. * None : Do not initialize. n_init : int Number of iterations of initializer @@ -146,7 +150,7 @@ def sample(draws, step=None, init='advi', n_init=200000, start=None, if step is None and init is not None and pm.model.all_continuous(model.vars): # By default, use NUTS sampler pm._log.info('Auto-assigning NUTS sampler...') - start_, step = init_nuts(init=init, n_init=n_init, model=model) + start_, step = init_nuts(init=init, njobs=njobs, n_init=n_init, model=model) if start is None: start = start_ else: @@ -393,7 +397,7 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see return {k: np.asarray(v) for k, v in ppc.items()} -def init_nuts(init='advi', n_init=500000, model=None, **kwargs): +def init_nuts(init='advi', njobs=1, n_init=500000, model=None, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely @@ -409,6 +413,8 @@ def init_nuts(init='advi', n_init=500000, model=None, **kwargs): * advi_map: Initialize ADVI with MAP and use MAP as starting point. * map : Use the MAP as starting point. * nuts : Run NUTS and estimate posterior mean and covariance matrix. + njobs : int + Number of parallel jobs to start. n_init : int Number of iterations of initializer If 'advi', number of iterations, if 'metropolis', number of draws. @@ -432,7 +438,10 @@ def init_nuts(init='advi', n_init=500000, model=None, **kwargs): if init == 'advi': v_params = pm.variational.advi(n=n_init) - start = pm.variational.sample_vp(v_params, 1, progressbar=False, hide_transformed=False)[0] + if njobs > 1: + start = pm.variational.sample_vp(v_params, njobs, progressbar=False, hide_transformed=False) + else: + start = v_params.means cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() @@ -441,12 +450,13 @@ def init_nuts(init='advi', n_init=500000, model=None, **kwargs): elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) - elif init == 'nuts': - init_trace = pm.sample(step=pm.NUTS(), draws=n_init) - cov = pm.trace_cov(init_trace[n_init//2:]) - - start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} + init_trace = pm.sample(step=pm.NUTS(), draws=n_init)[n_init//2:] + cov = np.atleast_1d(pm.trace_cov(init_trace)) + if njobs > 1: + start = np.random.choice(init_trace, njobs) + else: + start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} else: raise NotImplemented('Initializer {} is not supported.'.format(init)) From e13d1868073888bc64d80efd8a68e54f0b5d1c97 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Sun, 8 Jan 2017 14:42:05 -0300 Subject: [PATCH 2/3] add random_seed to fix starting points --- pymc3/sampling.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index ac68d2d680..20c3a07cda 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -146,11 +146,12 @@ def sample(draws, step=None, init='advi', n_init=200000, start=None, MultiTrace object with access to sampling values """ model = modelcontext(model) + if step is None and init is not None and pm.model.all_continuous(model.vars): # By default, use NUTS sampler pm._log.info('Auto-assigning NUTS sampler...') - start_, step = init_nuts(init=init, njobs=njobs, n_init=n_init, model=model) + start_, step = init_nuts(init=init, njobs=njobs, n_init=n_init, model=model, random_seed=random_seed) if start is None: start = start_ else: @@ -397,7 +398,8 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see return {k: np.asarray(v) for k, v in ppc.items()} -def init_nuts(init='advi', njobs=1, n_init=500000, model=None, **kwargs): +def init_nuts(init='advi', njobs=1, n_init=500000, model=None, + random_seed=-1, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely @@ -436,27 +438,30 @@ def init_nuts(init='advi', njobs=1, n_init=500000, model=None, **kwargs): pm._log.info('Initializing NUTS using {}...'.format(init)) + random_seed = int(np.atleast_1d(random_seed)[0]) + if init == 'advi': - v_params = pm.variational.advi(n=n_init) - if njobs > 1: - start = pm.variational.sample_vp(v_params, njobs, progressbar=False, hide_transformed=False) - else: - start = v_params.means + v_params = pm.variational.advi(n=n_init, random_seed=random_seed) + start = pm.variational.sample_vp(v_params, njobs, progressbar=False, + hide_transformed=False, random_seed=random_seed) + if njobs == 1: + start = start[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() - v_params = pm.variational.advi(n=n_init, start=start) + v_params = pm.variational.advi(n=n_init, start=start, + random_seed=random_seed) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': - init_trace = pm.sample(step=pm.NUTS(), draws=n_init)[n_init//2:] + init_trace = pm.sample(step=pm.NUTS(), draws=n_init, + random_seed=random_seed)[n_init//2:] cov = np.atleast_1d(pm.trace_cov(init_trace)) - if njobs > 1: - start = np.random.choice(init_trace, njobs) - else: - start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames} + start = np.random.choice(init_trace, njobs) + if njobs == 1: + start = start[0] else: raise NotImplemented('Initializer {} is not supported.'.format(init)) From a2baf670e8859f017efce8dd4e845ecc398fcd23 Mon Sep 17 00:00:00 2001 From: aloctavodia Date: Wed, 18 Jan 2017 10:40:48 -0300 Subject: [PATCH 3/3] fix indentation --- pymc3/sampling.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 20c3a07cda..69931ca21c 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -399,7 +399,7 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see def init_nuts(init='advi', njobs=1, n_init=500000, model=None, - random_seed=-1, **kwargs): + random_seed=-1, **kwargs): """Initialize and sample from posterior of a continuous model. This is a convenience function. NUTS convergence and sampling speed is extremely @@ -443,21 +443,22 @@ def init_nuts(init='advi', njobs=1, n_init=500000, model=None, if init == 'advi': v_params = pm.variational.advi(n=n_init, random_seed=random_seed) start = pm.variational.sample_vp(v_params, njobs, progressbar=False, - hide_transformed=False, random_seed=random_seed) + hide_transformed=False, + random_seed=random_seed) if njobs == 1: start = start[0] cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'advi_map': start = pm.find_MAP() - v_params = pm.variational.advi(n=n_init, start=start, - random_seed=random_seed) + v_params = pm.variational.advi(n=n_init, start=start, + random_seed=random_seed) cov = np.power(model.dict_to_array(v_params.stds), 2) elif init == 'map': start = pm.find_MAP() cov = pm.find_hessian(point=start) elif init == 'nuts': init_trace = pm.sample(step=pm.NUTS(), draws=n_init, - random_seed=random_seed)[n_init//2:] + random_seed=random_seed)[n_init // 2:] cov = np.atleast_1d(pm.trace_cov(init_trace)) start = np.random.choice(init_trace, njobs) if njobs == 1: