Merge 43a8638 into 32a2eb7

pymc-devs · Dec 19, 2016 · b6aae3d · b6aae3d
2 parents 32a2eb7 + 43a8638
commit b6aae3d
Show file tree

Hide file tree

Showing 6 changed files with 735 additions and 3 deletions.
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -10,6 +10,8 @@
 
 from .special import gammaln, multigammaln
 
+c = - 0.5 * np.log(2 * np.pi)
+
 
 def bound(logp, *conditions):
     """
@@ -77,3 +79,39 @@ def i1(x):
                      x**9 / 1474560 + x**11 / 176947200 + x**13 / 29727129600,
                      np.e**x / (2 * np.pi * x)**0.5 * (1 - 3 / (8 * x) + 15 / (128 * x**2) + 315 / (3072 * x**3)
                                                        + 14175 / (98304 * x**4)))
+
+
+def sd2rho(sd):
+    """sd -> rho
+    theano converter
+    mu + sd*e = mu + log(1+exp(rho))*e"""
+    return tt.log(tt.exp(sd) - 1)
+
+
+def rho2sd(rho):
+    """rho -> sd
+    theano converter
+    mu + sd*e = mu + log(1+exp(rho))*e"""
+    return tt.log1p(tt.exp(rho))
+
+
+def kl_divergence_normal_pair(mu1, mu2, sd1, sd2):
+    elemwise_kl = (tt.log(sd2/sd1) +
+                   (sd2**2 + (mu1-mu2)**2)/(2.*sd2**2) -
+                   0.5)
+    return tt.sum(elemwise_kl)
+
+
+def kl_divergence_normal_pair3(mu1, mu2, rho1, rho2):
+    sd1, sd2 = rho2sd(rho1), rho2sd(rho2)
+    return kl_divergence_normal_pair(mu1, mu2, sd1, sd2)
+
+
+def log_normal(x, mean, std, eps=0.0):
+    std += eps
+    return c - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2 * std ** 2)
+
+
+def log_normal3(x, mean, rho, eps=0.0):
+    std = rho2sd(rho)
+    return log_normal(x, mean, std, eps)
diff --git a/pymc3/math.py b/pymc3/math.py
@@ -22,3 +22,7 @@ def invlogit(x, eps=sys.float_info.epsilon):
 
 def logit(p):
     return tt.log(p / (1 - p))
+
+
+def flatten_list(tensors):
+    return tt.concatenate([var.ravel() for var in tensors])
diff --git a/pymc3/step_methods/nuts.py b/pymc3/step_methods/nuts.py
@@ -0,0 +1,238 @@
+from .quadpotential import quad_potential
+from .arraystep import ArrayStepShared, ArrayStep, SamplerHist, Competence
+from ..model import modelcontext, Point
+from ..vartypes import continuous_types
+from numpy import exp, log, array
+from numpy.random import uniform
+from .hmc import leapfrog, Hamiltonian, bern, energy
+from ..tuning import guess_scaling
+import theano
+from ..theanof import (make_shared_replacements, join_nonshared_inputs, CallableTensor,
+                       gradient, inputvars)
+import theano.tensor as tt
+
+__all__ = ['NUTS']
+
+
+class NUTS(ArrayStepShared):
+    """
+    Automatically tunes step size and adjust number of steps for good performance.
+
+    Implements "Algorithm 6: Efficient No-U-Turn Sampler with Dual Averaging" in:
+
+    Hoffman, Matthew D., & Gelman, Andrew. (2011).
+    The No-U-Turn Sampler: Adaptively Setting Path Lengths in Hamiltonian Monte Carlo.
+    """
+    default_blocked = True
+
+    def __init__(self, vars=None, scaling=None, step_scale=0.25, is_cov=False, state=None,
+                 Emax=1000,
+                 target_accept=0.8,
+                 gamma=0.05,
+                 k=0.75,
+                 t0=10,
+                 model=None,
+                 profile=False,
+                 mode=None, **kwargs):
+        """
+        Parameters
+        ----------
+            vars : list of Theano variables, default continuous vars
+            scaling : array_like, ndim = {1,2} or point
+                Scaling for momentum distribution. 1d arrays interpreted matrix diagonal.
+            step_scale : float, default=.25
+                Size of steps to take, automatically scaled down by 1/n**(1/4)
+            is_cov : bool, default=False
+                Treat C as a covariance matrix/vector if True, else treat it as a precision matrix/vector
+            state
+                state to start from
+            Emax : float, default 1000
+                maximum energy
+            target_accept : float (0,1) default .8
+                target for avg accept probability between final branch and initial position
+            gamma : float, default .05
+            k : float (.5,1) default .75
+                scaling of speed of adaptation
+            t0 : int, default 10
+                slows inital adapatation
+            model : Model
+            profile : bool or ProfileStats
+                sets the functions to be profiled
+            mode :  string or `Mode` instance.
+                compilation mode passed to Theano functions
+        """
+        model = modelcontext(model)
+
+        if vars is None:
+            vars = model.cont_vars
+        vars = inputvars(vars)
+
+        if scaling is None:
+            scaling = model.test_point
+
+        if isinstance(scaling, dict):
+            scaling = guess_scaling(
+                Point(scaling, model=model), model=model, vars=vars)
+
+        n = scaling.shape[0]
+
+        self.step_size = step_scale / n**(1 / 4.)
+
+        self.potential = quad_potential(scaling, is_cov, as_cov=False)
+
+        if state is None:
+            state = SamplerHist()
+        self.state = state
+        self.Emax = Emax
+
+        self.target_accept = target_accept
+        self.gamma = gamma
+        self.t0 = t0
+        self.k = k
+
+        self.Hbar = 0
+        self.u = log(self.step_size * 10)
+        self.m = 1
+        self.mode = mode
+
+        shared = make_shared_replacements(vars, model)
+
+        def create_hamiltonian(vars, shared, model):
+            dlogp = gradient(model.logpt, vars)
+            (logp, dlogp), q = join_nonshared_inputs(
+                [model.logpt, dlogp], vars, shared)
+            logp = CallableTensor(logp)
+            dlogp = CallableTensor(dlogp)
+
+            return Hamiltonian(logp, dlogp, self.potential), q
+
+        def create_energy_func(q):
+            p = tt.dvector('p')
+            p.tag.test_value = q.tag.test_value
+            E0 = energy(self.H, q, p)
+            E0_func = theano.function([q, p], E0, mode=self.mode)
+            E0_func.trust_input = True
+
+            return E0_func
+
+        self.H, q = create_hamiltonian(vars, shared, model)
+        self.compute_energy = create_energy_func(q)
+
+        self.leapfrog1_dE = leapfrog1_dE(self.H, q, profile=profile, 
+                                        mode=self.mode)
+
+        super(NUTS, self).__init__(vars, shared, **kwargs)
+
+    def astep(self, q0):
+        leapfrog = self.leapfrog1_dE
+        Emax = self.Emax
+        e = self.step_size
+
+        p0 = self.potential.random()
+        E0 = self.compute_energy(q0, p0)
+
+        u = uniform()
+        q = qn = qp = q0
+        p = pn = pp = p0
+
+        n, s, j = 1, 1, 0
+
+        while s == 1:
+            v = bern(.5) * 2 - 1
+
+            if v == -1:
+                qn, pn, _, _, q1, n1, s1, a, na = buildtree(
+                    leapfrog, qn, pn, u, v, j, e, Emax, E0)
+            else:
+                _, _, qp, pp, q1, n1, s1, a, na = buildtree(
+                    leapfrog, qp, pp, u, v, j, e, Emax, E0)
+
+            if s1 == 1 and bern(min(1, n1 * 1. / n)):
+                q = q1
+
+            n = n + n1
+
+            span = qp - qn
+            s = s1 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
+            j = j + 1
+
+        p = -p
+
+        w = 1. / (self.m + self.t0)
+        self.Hbar = (1 - w) * self.Hbar + w * \
+            (self.target_accept - a * 1. / na)
+
+        self.step_size = exp(self.u - (self.m**self.k / self.gamma) * self.Hbar)
+        self.m += 1
+
+        return q
+
+    @staticmethod
+    def competence(var):
+        if var.dtype in continuous_types:
+            return Competence.IDEAL
+        return Competence.INCOMPATIBLE
+
+
+def buildtree(leapfrog1_dE, q, p, u, v, j, e, Emax, E0):
+    if j == 0:
+        q1, p1, dE = leapfrog1_dE(q, p, array(v * e), E0)
+
+        n1 = int(log(u) + dE <= 0)
+        s1 = int(log(u) + dE < Emax)
+        return q1, p1, q1, p1, q1, n1, s1, min(1, exp(-dE)), 1
+    else:
+        qn, pn, qp, pp, q1, n1, s1, a1, na1 = buildtree(
+            leapfrog1_dE, q, p, u, v, j - 1, e, Emax, E0)
+        if s1 == 1:
+            if v == -1:
+                qn, pn, _, _, q11, n11, s11, a11, na11 = buildtree(
+                    leapfrog1_dE, qn, pn, u, v, j - 1, e, Emax, E0)
+            else:
+                _, _, qp, pp, q11, n11, s11, a11, na11 = buildtree(
+                    leapfrog1_dE, qp, pp, u, v, j - 1, e, Emax, E0)
+
+            if bern(n11 * 1. / (max(n1 + n11, 1))):
+                q1 = q11
+
+            a1 = a1 + a11
+            na1 = na1 + na11
+
+            span = qp - qn
+            s1 = s11 * (span.dot(pn) >= 0) * (span.dot(pp) >= 0)
+            n1 = n1 + n11
+        return qn, pn, qp, pp, q1, n1, s1, a1, na1
+    return
+
+
+def leapfrog1_dE(H, q, profile, mode):
+    """Computes a theano function that computes one leapfrog step and the energy difference between the beginning and end of the trajectory.
+    Parameters
+    ----------
+    H : Hamiltonian
+    q : theano.tensor
+    profile : Boolean
+
+    Returns
+    -------
+    theano function which returns
+    q_new, p_new, dE
+    """
+    p = tt.dvector('p')
+    p.tag.test_value = q.tag.test_value
+
+    e = tt.dscalar('e')
+    e.tag.test_value = 1
+
+    q1, p1 = leapfrog(H, q, p, 1, e)
+    E = energy(H, q1, p1)
+
+    E0 = tt.dscalar('E0')
+    E0.tag.test_value = 1
+
+    dE = E - E0
+
+    f = theano.function([q, p, e, E0], [q1, p1, dE], 
+                        profile=profile, mode=mode)
+    f.trust_input = True
+    return f
diff --git a/pymc3/tests/test_replacements_mean_field.py b/pymc3/tests/test_replacements_mean_field.py
@@ -0,0 +1,60 @@
+import unittest
+import numpy as np
+import theano
+import theano.tensor as tt
+from pymc3 import Model, Normal
+from pymc3.variational.replacements import MeanField
+from . import models
+
+
+class TestMeanField(unittest.TestCase):
+    def test_elbo(self):
+        mu0 = 1.5
+        sigma = 1.0
+        y_obs = np.array([1.6, 1.4])
+
+        post_mu = np.array([1.88])
+        post_sd = np.array([1])
+        # Create a model for test
+        with Model() as model:
+            mu = Normal('mu', mu=mu0, sd=sigma)
+            Normal('y', mu=mu, sd=1, observed=y_obs)
+
+        # Create variational gradient tensor
+        mean_field = MeanField(model)
+        elbo = mean_field.elbo(samples=10000)
+
+        mean_field.shared_params['mu'].set_value(post_mu)
+        mean_field.shared_params['rho'].set_value(np.log(np.exp(post_sd) - 1))
+
+        f = theano.function([], elbo.mean())
+        elbo_mc = f()
+
+        # Exact value
+        elbo_true = (-0.5 * (
+            3 + 3 * post_mu**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu +
+            y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) +
+            0.5 * (np.log(2 * np.pi) + 1))
+        np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
+
+    def test_vary_samples(self):
+        _, model, _ = models.simple_model()
+        i = tt.iscalar('i')
+        i.tag.test_value = 1
+        with model:
+            mf = MeanField()
+            elbo = mf.elbo(i)
+        elbos = theano.function([i], elbo)
+        self.assertEqual(elbos(1).shape[0], 1)
+        self.assertEqual(elbos(10).shape[0], 10)
+
+    def test_vars_view(self):
+        _, model, _ = models.multidimensional_model()
+        with model:
+            mf = MeanField()
+            posterior = mf.posterior(mf.initial(10))
+            x_sampled = mf.view_from(posterior, 'x').eval()
+        self.assertEqual(x_sampled.shape, (10,) + model['x'].dshape)
+
+if __name__ == '__main__':
+    unittest.main()