Merge branch 'master' into latex_dists

pymc-devs · May 23, 2017 · 73e2773 · 73e2773
2 parents e911982 + fab6938
commit 73e2773
Show file tree

Hide file tree

Showing 15 changed files with 636 additions and 264 deletions.
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -1590,23 +1590,30 @@ def _repr_latex_(self, name=None, dist=None):
 
 class Interpolated(Continuous):
     R"""
-    Probability distribution defined as a linear interpolation of
-    of a set of points and values of probability density function
-    evaluated on them.
-    
-    The points are not variables, but plain array-like objects, so
-    they are constant and cannot be sampled.
-    
-    ========  =========================================
-    Support   :math:`x \in [x_points[0], x_points[-1]]`
-    ========  =========================================
-    
+    Univariate probability distribution defined as a linear interpolation
+    of probability density function evaluated on some lattice of points.
+
+    The lattice can be uneven, so the steps between different points can have
+    different size and it is possible to vary the precision between regions
+    of the support.
+
+    The probability density function values don not have to be normalized, as the
+    interpolated density is any way normalized to make the total probability
+    equal to $1$.
+
+    Both parameters `x_points` and values `pdf_points` are not variables, but
+    plain array-like objects, so they are constant and cannot be sampled.
+
+    ========  ===========================================
+    Support   :math:`x \in [x\_points[0], x\_points[-1]]`
+    ========  ===========================================
+
     Parameters
     ----------
     x_points : array-like
         A monotonically growing list of values
     pdf_points : array-like
-        Probability density function evaluated at points from `x`
+        Probability density function evaluated on lattice `x_points`
     """
 
     def __init__(self, x_points, pdf_points, transform='interval',

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -48,7 +48,7 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None):
+    def __init__(self, shape, dtype, testval=None, defaults=(), transform=None):
         self.shape = np.atleast_1d(shape)
         if False in (np.floor(self.shape) == self.shape):
             raise TypeError("Expected int elements in shape")
@@ -59,7 +59,7 @@ def __init__(self, shape, dtype, testval=None, defaults=[], transform=None):
         self.transform = transform
 
     def default(self):
-        return self.get_test_val(self.testval, self.defaults)
+        return np.asarray(self.get_test_val(self.testval, self.defaults), self.dtype)
 
     def get_test_val(self, val, defaults):
         if val is None:
@@ -95,7 +95,7 @@ def TensorType(dtype, shape):
 
 class NoDistribution(Distribution):
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None, parent_dist=None, *args, **kwargs):
+    def __init__(self, shape, dtype, testval=None, defaults=(), transform=None, parent_dist=None, *args, **kwargs):
         super(NoDistribution, self).__init__(shape=shape, dtype=dtype,
                                              testval=testval, defaults=defaults,
                                              *args, **kwargs)
@@ -114,7 +114,7 @@ def logp(self, x):
 class Discrete(Distribution):
     """Base class for discrete distributions"""
 
-    def __init__(self, shape=(), dtype=None, defaults=['mode'],
+    def __init__(self, shape=(), dtype=None, defaults=('mode', ),
                  *args, **kwargs):
         if dtype is None:
             if theano.config.floatX == 'float32':
@@ -130,7 +130,7 @@ def __init__(self, shape=(), dtype=None, defaults=['mode'],
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, shape=(), dtype=None, defaults=['median', 'mean', 'mode'], *args, **kwargs):
+    def __init__(self, shape=(), dtype=None, defaults=('median', 'mean', 'mode'), *args, **kwargs):
         if dtype is None:
             dtype = theano.config.floatX
         super(Continuous, self).__init__(

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -574,13 +574,17 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
 
     if init is not None:
         init = init.lower()
-
+    cb = [
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='absolute'),
+        pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff='relative'),
+    ]
     if init == 'advi':
         approx = pm.fit(
             random_seed=random_seed,
             n=n_init, method='advi', model=model,
-            callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)],
-            progressbar=progressbar
+            callbacks=cb,
+            progressbar=progressbar,
+            obj_optimizer=pm.adagrad_window
         )  # type: pm.MeanField
         start = approx.sample(draws=njobs)
         stds = approx.gbij.rmap(approx.std.eval())
@@ -593,8 +597,9 @@ def init_nuts(init='ADVI', njobs=1, n_init=500000, model=None,
         pm.fit(
             random_seed=random_seed,
             n=n_init, method=pm.ADVI.from_mean_field(approx),
-            callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-2)],
-            progressbar=progressbar
+            callbacks=cb,
+            progressbar=progressbar,
+            obj_optimizer=pm.adagrad_window
         )
         start = approx.sample(draws=njobs)
         cov = approx.cov.eval()

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -49,9 +49,11 @@ def get_lkj_cases():
 
 
 class Domain(object):
-
     def __init__(self, vals, dtype=None, edges=None, shape=None):
-        avals = array(vals)
+        avals = array(vals, dtype=dtype)
+        if dtype is None and not str(avals.dtype).startswith('int'):
+            avals = avals.astype(theano.config.floatX)
+        vals = [array(v, dtype=avals.dtype) for v in vals]
 
         if edges is None:
             edges = array(vals[0]), array(vals[-1])
@@ -283,7 +285,6 @@ def mvt_logpdf(value, nu, Sigma, mu=0):
 
 
 class Simplex(object):
-
     def __init__(self, n):
         self.vals = list(simplex_values(n))
         self.shape = (n,)
@@ -322,6 +323,7 @@ def PdMatrix(n):
 PdMatrixChol3 = Domain([np.eye(3), [[0.1, 0, 0], [10, 100, 0], [0, 1, 10]]],
                        edges=(None, None))
 
+
 def PdMatrixChol(n):
     if n == 1:
         return PdMatrixChol1
@@ -336,7 +338,8 @@ def PdMatrixChol(n):
 PdMatrixCholUpper1 = Domain([np.eye(1), [[0.001]]], edges=(None, None))
 PdMatrixCholUpper2 = Domain([np.eye(2), [[0.1, 10], [0, 1]]], edges=(None, None))
 PdMatrixCholUpper3 = Domain([np.eye(3), [[0.1, 10, 0], [0, 100, 1], [0, 0, 10]]],
-                       edges=(None, None))
+                            edges=(None, None))
+
 
 def PdMatrixCholUpper(n):
     if n == 1:
@@ -549,6 +552,7 @@ def test_student_tpos(self):
     def test_skew_normal(self):
         self.pymc3_matches_scipy(SkewNormal, R, {'mu': R, 'sd': Rplusbig, 'alpha': R},
                                  lambda value, alpha, mu, sd: sp.skewnorm.logpdf(value, alpha, mu, sd))
+
     def test_binomial(self):
         self.pymc3_matches_scipy(Binomial, Nat, {'n': NatSmall, 'p': Unit},
                                  lambda value, n, p: sp.binom.logpmf(value, n, p))
@@ -801,7 +805,6 @@ def test_interpolated(self):
                 xmax = mu + 5 * sd
 
                 class TestedInterpolated (Interpolated):
-
                     def __init__(self, **kwargs):
                         x_points = np.linspace(xmin, xmax, 100000)
                         pdf_points = sp.norm.pdf(x_points, loc=mu, scale=sd)

diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py
@@ -1,4 +1,5 @@
 import pytest
+import numpy as np
 import theano
 from theano.configparser import change_flags
 from pymc3.variational.updates import (
@@ -9,43 +10,64 @@
     rmsprop,
     adadelta,
     adam,
-    adamax
+    adamax,
+    adagrad_window
 )
 
 _a = theano.shared(1.)
 _b = _a*2
 
-_with_params = dict(loss_or_grads=_b, params=[_a])
-_without_first_param = dict(loss_or_grads=None, params=[_a])
-_without_second_param = dict(loss_or_grads=_b, params=None)
-_without_params = dict(loss_or_grads=None, params=None)
+_m = theano.shared(np.empty((10, ), theano.config.floatX))
+_n = _m.sum()
+_m2 = theano.shared(np.empty((10, 10, 10), theano.config.floatX))
+_n2 = _b + _n + _m2.sum()
 
 
 @pytest.mark.parametrize(
     'opt',
-    [sgd, momentum, nesterov_momentum, adagrad, rmsprop, adadelta, adam, adamax]
+    [sgd, momentum, nesterov_momentum,
+     adagrad, rmsprop, adadelta, adam,
+     adamax, adagrad_window],
+    ids=['sgd', 'momentum', 'nesterov_momentum',
+         'adagrad', 'rmsprop', 'adadelta', 'adam',
+         'adamax', 'adagrad_window']
 )
 @pytest.mark.parametrize(
-    'loss_and_params',
-    [_with_params, _without_first_param, _without_second_param, _without_params]
+    'getter',
+    [lambda t: t,              # all params -> ok
+     lambda t: (None, t[1]),   # missing loss -> fail
+     lambda t: (t[0], None),   # missing params -> fail
+     lambda t: (None, None)],  # all missing -> partial
+    ids=['all_params',
+         'missing_loss',
+         'missing_params',
+         'all_missing']
 )
 @pytest.mark.parametrize(
     'kwargs',
-    [dict(), dict(learning_rate=1e-2)]
+    [dict(), dict(learning_rate=1e-2)],
+    ids=['without_args', 'with_args']
+)
+@pytest.mark.parametrize(
+    'loss_and_params',
+    [(_b, [_a]), (_n, [_m]), (_n2, [_a, _m, _m2])],
+    ids=['scalar', 'matrix', 'mixed']
 )
-def test_updates_fast(opt, loss_and_params, kwargs):
+def test_updates_fast(opt, loss_and_params, kwargs, getter):
     with change_flags(compute_test_value='ignore'):
+        loss, param = getter(loss_and_params)
         args = dict()
         args.update(**kwargs)
-        args.update(**loss_and_params)
-        if loss_and_params['loss_or_grads'] is None and loss_and_params['params'] is None:
+        args.update(dict(loss_or_grads=loss, params=param))
+        if loss is None and param is None:
             updates = opt(**args)
             # Here we should get new callable
             assert callable(updates)
             # And be able to get updates
             updates = opt(_b, [_a])
             assert isinstance(updates, dict)
-        elif loss_and_params['loss_or_grads'] is None or loss_and_params['params'] is None:
+        # case when both are None is above
+        elif loss is None or param is None:
             # Here something goes wrong and user provides not full set of [params + loss_or_grads]
             # We raise Value error
             with pytest.raises(ValueError):

diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
@@ -7,11 +7,10 @@
 from pymc3 import Model, Normal
 from pymc3.variational import (
     ADVI, FullRankADVI, SVGD,
-    Empirical,
-    fit
+    Empirical, ASVGD,
+    MeanField, fit
 )
 from pymc3.variational.operators import KL
-from pymc3.variational.approximations import MeanField
 
 from pymc3.tests import models
 from pymc3.tests.helpers import SeededTest
@@ -70,7 +69,15 @@ class TestApproximates:
     class Base(SeededTest):
         inference = None
         NITER = 12000
-        optimizer = functools.partial(pm.adam, learning_rate=.01)
+        optimizer = pm.adagrad_window(learning_rate=0.01)
+        conv_cb = property(lambda self: [
+            pm.callbacks.CheckParametersConvergence(
+                every=500,
+                diff='relative', tolerance=0.001),
+            pm.callbacks.CheckParametersConvergence(
+                every=500,
+                diff='absolute', tolerance=0.0001)
+        ])
 
         def test_vars_view(self):
             _, model, _ = models.multidimensional_model()
@@ -147,11 +154,10 @@ def test_optimizer_with_full_data(self):
                 inf.fit(10)
                 approx = inf.fit(self.NITER,
                                  obj_optimizer=self.optimizer,
-                                 callbacks=
-                                 [pm.callbacks.CheckParametersConvergence()],)
+                                 callbacks=self.conv_cb,)
                 trace = approx.sample(10000)
-            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
-            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
+            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
+            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)
 
         def test_optimizer_minibatch_with_generator(self):
             n = 1000
@@ -176,11 +182,10 @@ def create_minibatch(data):
                 Normal('x', mu=mu_, sd=sd, observed=minibatches, total_size=n)
                 inf = self.inference()
                 approx = inf.fit(self.NITER * 3, obj_optimizer=self.optimizer,
-                                 callbacks=
-                                 [pm.callbacks.CheckParametersConvergence()])
+                                 callbacks=self.conv_cb)
                 trace = approx.sample(10000)
-            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.1)
-            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
+            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
+            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)
 
         def test_optimizer_minibatch_with_callback(self):
             n = 1000
@@ -208,12 +213,21 @@ def cb(*_):
                 mu_ = Normal('mu', mu=mu0, sd=sd0, testval=0)
                 Normal('x', mu=mu_, sd=sd, observed=data_t, total_size=n)
                 inf = self.inference(scale_cost_to_minibatch=True)
-                approx = inf.fit(self.NITER * 3, callbacks=
-                [cb, pm.callbacks.CheckParametersConvergence()],
-                                 obj_n_mc=10, obj_optimizer=self.optimizer)
+                approx = inf.fit(
+                    self.NITER * 3, callbacks=[cb] + self.conv_cb, obj_optimizer=self.optimizer)
                 trace = approx.sample(10000)
-            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.4)
-            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.4)
+            np.testing.assert_allclose(np.mean(trace['mu']), mu_post, rtol=0.05)
+            np.testing.assert_allclose(np.std(trace['mu']), np.sqrt(1. / d), rtol=0.1)
+
+        def test_n_obj_mc(self):
+            n_samples = 100
+            xs = np.random.binomial(n=1, p=0.2, size=n_samples)
+            with pm.Model():
+                p = pm.Beta('p', alpha=1, beta=1)
+                pm.Binomial('xs', n=1, p=p, observed=xs)
+                inf = self.inference(scale_cost_to_minibatch=True)
+                # should just work
+                inf.fit(10, obj_n_mc=10, obj_optimizer=self.optimizer)
 
         def test_pickling(self):
             with models.multidimensional_model()[1]:
@@ -277,8 +291,14 @@ def test_from_advi(self):
 
 class TestSVGD(TestApproximates.Base):
     inference = functools.partial(SVGD, n_particles=100)
-    NITER = 2500
-    optimizer = functools.partial(pm.adam, learning_rate=.1)
+
+
+class TestASVGD(TestApproximates.Base):
+    NITER = 15000
+    inference = ASVGD
+    test_aevb = _test_aevb
+    optimizer = pm.adagrad_window(learning_rate=0.002)
+    conv_cb = []
 
 
 class TestEmpirical(SeededTest):