Skip to content

Commit

Permalink
Implement quadpotential adaptation
Browse files Browse the repository at this point in the history
  • Loading branch information
aseyboldt committed Jul 16, 2017
1 parent 971db07 commit a4f4dc3
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 34 deletions.
35 changes: 29 additions & 6 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
Slice, CompoundStep)
from .plots.traceplot import traceplot
from .util import update_start_vals
from .util import is_transformed_name, get_untransformed_name, update_start_vals
from pymc3.step_methods.hmc import quadpotential
from tqdm import tqdm

import sys
Expand Down Expand Up @@ -236,8 +237,6 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
pm._log.info('Auto-assigning NUTS sampler...')
args = step_kwargs if step_kwargs is not None else {}
args = args.get('nuts', {})
if init == 'auto':
init = 'ADVI'
start_, step = init_nuts(init=init, njobs=njobs, n_init=n_init,
model=model, random_seed=random_seed,
progressbar=progressbar, **args)
Expand Down Expand Up @@ -654,8 +653,9 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
Parameters
----------
init : str {'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'}
init : str {'auto', 'ADVI', 'ADVI_MAP', 'MAP', 'NUTS'}
Initialization method to use.
* auto : TODO
* ADVI : Run ADVI to estimate posterior mean and diagonal covariance matrix.
* ADVI_MAP: Initialize ADVI with MAP and use MAP as starting point.
* MAP : Use the MAP as starting point.
Expand Down Expand Up @@ -691,7 +691,27 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'),
pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'),
]
if init == 'advi':
if init == 'auto':
init = 'advi+adapt_diag'

if init == 'advi+adapt_diag':
approx = pm.fit(
random_seed=random_seed,
n=n_init, method='advi', model=model,
callbacks=cb,
progressbar=progressbar,
obj_optimizer=pm.adagrad_window,
)
start = approx.sample(draws=njobs)
stds = approx.gbij.rmap(approx.std.eval())
cov = model.dict_to_array(stds) ** 2
mean = approx.gbij.rmap(approx.mean.get_value())
mean = model.dict_to_array(mean)
weight = 30
potential = quadpotential.QuadPotentialDiagAdapt(mean, cov, weight)
if njobs == 1:
start = start[0]
elif init == 'advi':
approx = pm.fit(
random_seed=random_seed,
n=n_init, method='advi', model=model,
Expand All @@ -702,6 +722,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
start = approx.sample(draws=njobs)
stds = approx.gbij.rmap(approx.std.eval())
cov = model.dict_to_array(stds) ** 2
potential = quadpotential.QuadPotentialDiag(cov)
if njobs == 1:
start = start[0]
elif init == 'advi_map':
Expand All @@ -717,6 +738,7 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
start = approx.sample(draws=njobs)
stds = approx.gbij.rmap(approx.std.eval())
cov = model.dict_to_array(stds) ** 2
potential = quadpotential.QuadPotentialDiag(cov)
if njobs == 1:
start = start[0]
elif init == 'map':
Expand All @@ -728,11 +750,12 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
random_seed=random_seed)
cov = np.atleast_1d(pm.trace_cov(init_trace))
start = np.random.choice(init_trace, njobs)
potential = quadpotential.QuadPotentialFull(cov)
if njobs == 1:
start = start[0]
else:
raise NotImplementedError('Initializer {} is not supported.'.format(init))

step = pm.NUTS(scaling=cov, is_cov=True, **kwargs)
step = pm.NUTS(potential=potential, **kwargs)

return start, step
2 changes: 1 addition & 1 deletion pymc3/step_methods/hmc/base_hmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False,
if potential is not None:
self.potential = potential
else:
self.potential = quad_potential(scaling, is_cov, as_cov=False)
self.potential = quad_potential(scaling, is_cov)

shared = make_shared_replacements(vars, model)
if theano_kwargs is None:
Expand Down
23 changes: 20 additions & 3 deletions pymc3/step_methods/hmc/nuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ class NUTS(BaseHMC):

def __init__(self, vars=None, Emax=1000, target_accept=0.8,
gamma=0.05, k=0.75, t0=10, adapt_step_size=True,
max_treedepth=10, on_error='summary', **kwargs):
max_treedepth=10, on_error='summary',
adapt_mass_matrix=True, early_max_treedepth=8,
**kwargs):
R"""
Parameters
----------
Expand All @@ -113,9 +115,14 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
adapt_step_size : bool, default=True
Whether step size adaptation should be enabled. If this is
disabled, `k`, `t0`, `gamma` and `target_accept` are ignored.
adapt_mass_matrix : bool, default=True
Whether to adapt the mass matrix during tuning if the
potential supports tuning.
max_treedepth : int, default=10
The maximum tree depth. Trajectories are stoped when this
depth is reached.
early_max_treedepth : int, default=8
The maximum tree depth during tuning.
integrator : str, default "leapfrog"
The integrator to use for the trajectories. One of "leapfrog",
"two-stage" or "three-stage". The second two can increase
Expand Down Expand Up @@ -161,7 +168,9 @@ def __init__(self, vars=None, Emax=1000, target_accept=0.8,
self.log_step_size_bar = 0
self.m = 1
self.adapt_step_size = adapt_step_size
self.adapt_mass_matrix = adapt_mass_matrix
self.max_treedepth = max_treedepth
self.early_max_treedepth = early_max_treedepth

self.tune = True
self.report = NutsReport(on_error, max_treedepth, target_accept)
Expand All @@ -170,7 +179,7 @@ def astep(self, q0):
p0 = self.potential.random()
v0 = self.compute_velocity(p0)
start_energy = self.compute_energy(q0, p0)
if not np.isfinite(start_energy):
if not np.all(np.isfinite(start_energy)):
raise ValueError('Bad initial energy: %s. The model '
'might be misspecified.' % start_energy)

Expand All @@ -181,10 +190,15 @@ def astep(self, q0):
else:
step_size = np.exp(self.log_step_size_bar)

if self.tune:
max_treedepth = self.early_max_treedepth
else:
max_treedepth = self.max_treedepth

start = Edge(q0, p0, v0, self.dlogp(q0), start_energy)
tree = _Tree(len(p0), self.leapfrog, start, step_size, self.Emax)

for _ in range(self.max_treedepth):
for _ in range(max_treedepth):
direction = logbern(np.log(0.5)) * 2 - 1
diverging, turning = tree.extend(direction)
q = tree.proposal.q
Expand All @@ -205,6 +219,9 @@ def astep(self, q0):

self.m += 1

if self.tune and self.adapt_mass_matrix:
self.potential.adapt(q)

stats = {
'step_size': step_size,
'tune': self.tune,
Expand Down
147 changes: 124 additions & 23 deletions pymc3/step_methods/hmc/quadpotential.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@

import numpy as np

__all__ = ['quad_potential', 'ElemWiseQuadPotential', 'QuadPotential',
'QuadPotential_Inv', 'isquadpotential']
__all__ = ['quad_potential', 'QuadPotentialDiag', 'QuadPotentialFull',
'QuadPotentialFullInv', 'QuadPotentialDiagAdapt', 'isquadpotential']


def quad_potential(C, is_cov, as_cov):
def quad_potential(C, is_cov):
"""
Parameters
----------
Expand All @@ -22,9 +22,6 @@ def quad_potential(C, is_cov, as_cov):
vector treated as diagonal matrix.
is_cov : Boolean
whether C is provided as a covariance matrix or hessian
as_cov : Boolean
whether the random draws should come from the normal dist
using the covariance matrix above or the inverse
Returns
-------
Expand All @@ -33,22 +30,22 @@ def quad_potential(C, is_cov, as_cov):
if issparse(C):
if not chol_available:
raise ImportError("Sparse mass matrices require scikits.sparse")
if is_cov != as_cov:
return QuadPotential_Sparse(C)
if is_cov:
return QuadPotentialSparse(C)
else:
raise ValueError("Sparse precission matrices are not supported")

partial_check_positive_definite(C)
if C.ndim == 1:
if is_cov != as_cov:
return ElemWiseQuadPotential(C)
if is_cov:
return QuadPotentialDiag(C)
else:
return ElemWiseQuadPotential(1. / C)
return QuadPotentialDiag(1. / C)
else:
if is_cov != as_cov:
return QuadPotential(C)
if is_cov:
return QuadPotentialFull(C)
else:
return QuadPotential_Inv(C)
return QuadPotentialFullInv(C)


def partial_check_positive_definite(C):
Expand All @@ -65,21 +62,124 @@ def partial_check_positive_definite(C):


class PositiveDefiniteError(ValueError):

def __init__(self, msg, idx):
self.idx = idx
self.msg = msg

def __str__(self):
return "Scaling is not positive definite. " + self.msg + ". Check indexes " + str(self.idx)
return ("Scaling is not positive definite: %s. Check indexes %s."
% (self.msg, self.idx))


class QuadPotential(object):
def velocity(self, x):
raise NotImplementedError('Abstract method')

def energy(self, x):
raise NotImplementedError('Abstract method')

def random(self, x):
raise NotImplementedError('Abstract method')

def isquadpotential(o):
return all(hasattr(o, attr) for attr in ["velocity", "random", "energy"])
def adapt(self, sample):
pass


class ElemWiseQuadPotential(object):
def isquadpotential(value):
return isinstance(value, QuadPotential)


class QuadPotentialDiagAdapt(QuadPotential):
def __init__(self, initial_mean=None, initial_diag=None, initial_weight=0):
if initial_diag.ndim != 1:
raise ValueError('Initial diagonal must be one-dimensional.')

self._n = len(initial_diag)
self._var = np.array(initial_diag, dtype=theano.config.floatX, copy=True)
self._var_theano = theano.shared(self._var)
self._stds = np.sqrt(initial_diag)
self._inv_stds = floatX(1.) / self._stds
self._weight_var = WeightedVariance(
self._n, initial_mean, initial_diag, initial_weight)
self._weight_var2 = WeightedVariance(self._n)
self._n_samples = 0

def velocity(self, x):
return self._var_theano * x

def energy(self, x):
return 0.5 * x.dot(self._var_theano * x)

def random(self):
vals = floatX(normal(size=self._n))
return self._inv_stds * vals

def _update_from_weightvar(self, weightvar):
weightvar.current_variance(out=self._var)
np.sqrt(self._var, out=self._stds)
np.divide(1, self._stds, out=self._inv_stds)
self._var_theano.set_value(self._var)

def adapt(self, sample):
weight = 1
if self._n_samples < 200:
self._weight_var.add_sample(sample, weight)
self._update_from_weightvar(self._weight_var)
elif self._n_samples < 400:
self._weight_var.add_sample(sample, weight)
self._weight_var2.add_sample(sample, weight)
self._update_from_weightvar(self._weight_var)
else:
self._weight_var2.add_sample(sample, weight)
self._update_from_weightvar(self._weight_var2)

self._n_samples += 1


class WeightedVariance(object):
def __init__(self, nelem, initial_mean=None, initial_variance=None,
initial_weight=0):
self.w_sum = initial_weight
self.w_sum2 = initial_weight ** 2
if initial_mean is None:
self.mean = np.zeros(nelem, dtype='d')
else:
self.mean = np.array(initial_mean, dtype='d', copy=True)
if initial_variance is None:
self.raw_var = np.zeros(nelem, dtype='d')
else:
self.raw_var = np.array(initial_variance, dtype='d', copy=True)

self.raw_var[:] *= self.w_sum

if self.raw_var.shape != (nelem,):
raise ValueError('Invalid shape for initial variance.')
if self.mean.shape != (nelem,):
raise ValueError('Invalid shape for initial mean.')

def add_sample(self, x, weight):
x = np.asarray(x)
self.w_sum += weight
self.w_sum2 += weight * weight
prop = weight / self.w_sum
old_diff = x - self.mean
self.mean[:] += prop * old_diff
new_diff = x - self.mean
self.raw_var[:] += weight * old_diff * new_diff

def current_variance(self, out=None):
if self.w_sum == 0:
raise ValueError('Can not compute variance for empty set of samples.')
if out is not None:
return np.divide(self.raw_var, self.w_sum, out=out)
else:
return floatX(self.raw_var / self.w_sum)

def current_mean(self):
return floatX(self.mean.copy())


class QuadPotentialDiag(QuadPotential):
def __init__(self, v):
v = floatX(v)
s = v ** .5
Expand All @@ -98,7 +198,7 @@ def energy(self, x):
return .5 * x.dot(self.v * x)


class QuadPotential_Inv(object):
class QuadPotentialFullInv(QuadPotential):

def __init__(self, A):
self.L = floatX(scipy.linalg.cholesky(A, lower=True))
Expand All @@ -117,7 +217,7 @@ def energy(self, x):
return .5 * L1x.T.dot(L1x)


class QuadPotential(object):
class QuadPotentialFull(QuadPotential):

def __init__(self, A):
self.A = floatX(A)
Expand All @@ -135,19 +235,20 @@ def energy(self, x):

__call__ = random


try:
import sksparse.cholmod as cholmod
chol_available = True
except ImportError:
chol_available = False

if chol_available:
__all__ += ['QuadPotential_Sparse']
__all__ += ['QuadPotentialSparse']

import theano
import theano.sparse

class QuadPotential_Sparse(object):
class QuadPotentialSparse(QuadPotential):
def __init__(self, A):
self.A = A
self.size = A.shape[0]
Expand Down

0 comments on commit a4f4dc3

Please sign in to comment.