Skip to content

Commit

Permalink
Merge 43a8638 into 32a2eb7
Browse files Browse the repository at this point in the history
  • Loading branch information
ferrine committed Dec 19, 2016
2 parents 32a2eb7 + 43a8638 commit b6aae3d
Show file tree
Hide file tree
Showing 6 changed files with 735 additions and 3 deletions.
38 changes: 38 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from .special import gammaln, multigammaln

c = - 0.5 * np.log(2 * np.pi)


def bound(logp, *conditions):
"""
Expand Down Expand Up @@ -77,3 +79,39 @@ def i1(x):
x**9 / 1474560 + x**11 / 176947200 + x**13 / 29727129600,
np.e**x / (2 * np.pi * x)**0.5 * (1 - 3 / (8 * x) + 15 / (128 * x**2) + 315 / (3072 * x**3)
+ 14175 / (98304 * x**4)))


def sd2rho(sd):
"""sd -> rho
theano converter
mu + sd*e = mu + log(1+exp(rho))*e"""
return tt.log(tt.exp(sd) - 1)


def rho2sd(rho):
"""rho -> sd
theano converter
mu + sd*e = mu + log(1+exp(rho))*e"""
return tt.log1p(tt.exp(rho))


def kl_divergence_normal_pair(mu1, mu2, sd1, sd2):
elemwise_kl = (tt.log(sd2/sd1) +
(sd2**2 + (mu1-mu2)**2)/(2.*sd2**2) -
0.5)
return tt.sum(elemwise_kl)


def kl_divergence_normal_pair3(mu1, mu2, rho1, rho2):
sd1, sd2 = rho2sd(rho1), rho2sd(rho2)
return kl_divergence_normal_pair(mu1, mu2, sd1, sd2)


def log_normal(x, mean, std, eps=0.0):
std += eps
return c - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2 * std ** 2)


def log_normal3(x, mean, rho, eps=0.0):
std = rho2sd(rho)
return log_normal(x, mean, std, eps)
4 changes: 4 additions & 0 deletions pymc3/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ def invlogit(x, eps=sys.float_info.epsilon):

def logit(p):
return tt.log(p / (1 - p))


def flatten_list(tensors):
return tt.concatenate([var.ravel() for var in tensors])
238 changes: 238 additions & 0 deletions pymc3/step_methods/nuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from .quadpotential import quad_potential
from .arraystep import ArrayStepShared, ArrayStep, SamplerHist, Competence
from ..model import modelcontext, Point
from ..vartypes import continuous_types
from numpy import exp, log, array
from numpy.random import uniform
from .hmc import leapfrog, Hamiltonian, bern, energy
from ..tuning import guess_scaling
import theano
from ..theanof import (make_shared_replacements, join_nonshared_inputs, CallableTensor,
gradient, inputvars)
import theano.tensor as tt

__all__ = ['NUTS']


class NUTS(ArrayStepShared):
"""
Automatically tunes step size and adjust number of steps for good performance.
Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in:
Hoffman, Matthew D., & Gelman, Andrew. (2011).
The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
"""
default_blocked = True

def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state=None,
Emax=1000,
target_accept=0.8,
gamma=0.05,
k=0.75,
t0=10,
model=None,
profile=False,
mode=None, **kwargs):
"""
Parameters
----------
vars : list of Theano variables, default continuous vars
scaling : array_like, ndim = {1,2} or point
Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
step_scale : float, default=.25
Size of steps to take, automatically scaled down by 1/n**(1/4)
is_cov : bool, default=False
Treat C as a covariance matrix/vector if True, else treat it as a precision matrix/vector
state
state to start from
Emax : float, default 1000
maximum energy
target_accept : float (0,1) default .8
target for avg accept probability between final branch and initial position
gamma : float, default .05
k : float (.5,1) default .75
scaling of speed of adaptation
t0 : int, default 10
slows inital adapatation
model : Model
profile : bool or ProfileStats
sets the functions to be profiled
mode : string or `Mode` instance.
compilation mode passed to Theano functions
"""
model = modelcontext(model)

if vars is None:
vars = model.cont_vars
vars = inputvars(vars)

if scaling is None:
scaling = model.test_point

if isinstance(scaling, dict):
scaling = guess_scaling(
Point(scaling, model=model), model=model, vars=vars)

n = scaling.shape[0]

self.step_size = step_scale / n**(1 / 4.)

self.potential = quad_potential(scaling, is_cov, as_cov=False)

if state is None:
state = SamplerHist()
self.state = state
self.Emax = Emax

self.target_accept = target_accept
self.gamma = gamma
self.t0 = t0
self.k = k

self.Hbar = 0
self.u = log(self.step_size * 10)
self.m = 1
self.mode = mode

shared = make_shared_replacements(vars, model)

def create_hamiltonian(vars, shared, model):
dlogp = gradient(model.logpt, vars)
(logp, dlogp), q = join_nonshared_inputs(
[model.logpt, dlogp], vars, shared)
logp = CallableTensor(logp)
dlogp = CallableTensor(dlogp)

return Hamiltonian(logp, dlogp, self.potential), q

def create_energy_func(q):
p = tt.dvector('p')
p.tag.test_value = q.tag.test_value
E0 = energy(self.H, q, p)
E0_func = theano.function([q, p], E0, mode=self.mode)
E0_func.trust_input = True

return E0_func

self.H, q = create_hamiltonian(vars, shared, model)
self.compute_energy = create_energy_func(q)

self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile,
mode=self.mode)

super(NUTS, self).__init__(vars, shared, **kwargs)

def astep(self, q0):
leapfrog = self.leapfrog1_dE
Emax = self.Emax
e = self.step_size

p0 = self.potential.random()
E0 = self.compute_energy(q0, p0)

u = uniform()
q = qn = qp = q0
p = pn = pp = p0

n, s, j = 1, 1, 0

while s == 1:
v = bern(.5) * 2 - 1

if v == -1:
qn, pn, _, _, q1, n1, s1, a, na = buildtree(
leapfrog, qn, pn, u, v, j, e, Emax, E0)
else:
_, _, qp, pp, q1, n1, s1, a, na = buildtree(
leapfrog, qp, pp, u, v, j, e, Emax, E0)

if s1 == 1 and bern(min(1, n1 * 1. / n)):
q = q1

n = n + n1

span = qp - qn
s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
j = j + 1

p = -p

w = 1. / (self.m + self.t0)
self.Hbar = (1 - w) * self.Hbar + w * \
(self.target_accept - a * 1. / na)

self.step_size = exp(self.u - (self.m**self.k / self.gamma) * self.Hbar)
self.m += 1

return q

@staticmethod
def competence(var):
if var.dtype in continuous_types:
return Competence.IDEAL
return Competence.INCOMPATIBLE


def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0):
if j == 0:
q1, p1, dE = leapfrog1_dE(q, p, array(v * e), E0)

n1 = int(log(u) + dE <= 0)
s1 = int(log(u) + dE < Emax)
return q1, p1, q1, p1, q1, n1, s1, min(1, exp(-dE)), 1
else:
qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree(
leapfrog1_dE, q, p, u, v, j - 1, e, Emax, E0)
if s1 == 1:
if v == -1:
qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree(
leapfrog1_dE, qn, pn, u, v, j - 1, e, Emax, E0)
else:
_, _, qp, pp, q11, n11, s11, a11, na11 = buildtree(
leapfrog1_dE, qp, pp, u, v, j - 1, e, Emax, E0)

if bern(n11 * 1. / (max(n1 + n11, 1))):
q1 = q11

a1 = a1 + a11
na1 = na1 + na11

span = qp - qn
s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
n1 = n1 + n11
return qn, pn, qp, pp, q1, n1, s1, a1, na1
return


def leapfrog1_dE(H, q, profile, mode):
"""Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory.
Parameters
----------
H : Hamiltonian
q : theano.tensor
profile : Boolean
Returns
-------
theano function which returns
q_new, p_new, dE
"""
p = tt.dvector('p')
p.tag.test_value = q.tag.test_value

e = tt.dscalar('e')
e.tag.test_value = 1

q1, p1 = leapfrog(H, q, p, 1, e)
E = energy(H, q1, p1)

E0 = tt.dscalar('E0')
E0.tag.test_value = 1

dE = E - E0

f = theano.function([q, p, e, E0], [q1, p1, dE],
profile=profile, mode=mode)
f.trust_input = True
return f
60 changes: 60 additions & 0 deletions pymc3/tests/test_replacements_mean_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
import numpy as np
import theano
import theano.tensor as tt
from pymc3 import Model, Normal
from pymc3.variational.replacements import MeanField
from . import models


class TestMeanField(unittest.TestCase):
def test_elbo(self):
mu0 = 1.5
sigma = 1.0
y_obs = np.array([1.6, 1.4])

post_mu = np.array([1.88])
post_sd = np.array([1])
# Create a model for test
with Model() as model:
mu = Normal('mu', mu=mu0, sd=sigma)
Normal('y', mu=mu, sd=1, observed=y_obs)

# Create variational gradient tensor
mean_field = MeanField(model)
elbo = mean_field.elbo(samples=10000)

mean_field.shared_params['mu'].set_value(post_mu)
mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))

f = theano.function([], elbo.mean())
elbo_mc = f()

# Exact value
elbo_true = (-0.5 * (
3 + 3 * post_mu**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu +
y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) +
0.5 * (np.log(2 * np.pi) + 1))
np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)

def test_vary_samples(self):
_, model, _ = models.simple_model()
i = tt.iscalar('i')
i.tag.test_value = 1
with model:
mf = MeanField()
elbo = mf.elbo(i)
elbos = theano.function([i], elbo)
self.assertEqual(elbos(1).shape[0], 1)
self.assertEqual(elbos(10).shape[0], 10)

def test_vars_view(self):
_, model, _ = models.multidimensional_model()
with model:
mf = MeanField()
posterior = mf.posterior(mf.initial(10))
x_sampled = mf.view_from(posterior, 'x').eval()
self.assertEqual(x_sampled.shape, (10,) + model['x'].dshape)

if __name__ == '__main__':
unittest.main()

0 comments on commit b6aae3d

Please sign in to comment.