294 changes: 292 additions & 2 deletions pymc3/distributions/continuous.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# coding: utf-8
"""
A collection of common probability distributions for stochastic
nodes in PyMC.
Expand All @@ -19,8 +20,8 @@
from .special import log_i0
from ..math import invlogit, logit, logdiffexp
from .dist_math import (
bound, logpow, gammaln, betaln, std_cdf, alltrue_elemwise,
SplineWrapper, i0e, normal_lcdf, normal_lccdf
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
)
from .distribution import Continuous, draw_values, generate_samples

Expand Down Expand Up @@ -239,6 +240,18 @@ def _repr_latex_(self, name=None, dist=None):
return r'${} \sim \text{{Uniform}}(\mathit{{lower}}={},~\mathit{{upper}}={})$'.format(
name, get_variable_name(lower), get_variable_name(upper))

def logcdf(self, value):
return tt.switch(
tt.or_(tt.lt(value, self.lower), tt.gt(value, self.upper)),
-np.inf,
tt.switch(
tt.eq(value, self.upper),
0,
tt.log((value - self.lower)) -
tt.log((self.upper - self.lower))
)
)


class Flat(Continuous):
"""
Expand Down Expand Up @@ -284,6 +297,17 @@ def _repr_latex_(self, name=None, dist=None):
name = r'\text{%s}' % name
return r'${} \sim \text{{Flat}}()$'.format(name)

def logcdf(self, value):
return tt.switch(
tt.eq(value, -np.inf),
-np.inf,
tt.switch(
tt.eq(value, np.inf),
0,
tt.log(0.5)
)
)


class HalfFlat(PositiveContinuous):
"""Improper flat prior over the positive reals."""
Expand Down Expand Up @@ -326,6 +350,17 @@ def _repr_latex_(self, name=None, dist=None):
name = r'\text{%s}' % name
return r'${} \sim \text{{HalfFlat}}()$'.format(name)

def logcdf(self, value):
return tt.switch(
tt.lt(value, np.inf),
-np.inf,
tt.switch(
tt.eq(value, np.inf),
0,
-np.inf
)
)


class Normal(Continuous):
R"""
Expand Down Expand Up @@ -457,6 +492,9 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(sd))

def logcdf(self, value):
return normal_lcdf(self.mu, self.sd, value)


class TruncatedNormal(BoundedContinuous):
R"""
Expand Down Expand Up @@ -778,6 +816,15 @@ def _repr_latex_(self, name=None, dist=None):
return r'${} \sim \text{{HalfNormal}}(\mathit{{sd}}={})$'.format(name,
get_variable_name(sd))

def logcdf(self, value):
sd = self.sd
z = zvalue(value, mu=0, sd=sd)
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.))) - tt.sqr(z),
tt.log1p(-tt.erfc(z / tt.sqrt(2.)))
)


class Wald(PositiveContinuous):
R"""
Expand Down Expand Up @@ -852,6 +899,9 @@ class Wald(PositiveContinuous):
.. [Michael1976] Michael, J. R., Schucany, W. R. and Hass, R. W. (1976).
Generating Random Variates Using Transformations with Multiple Roots.
The American Statistician, Vol. 30, No. 2, pp. 88-90
.. [Giner2016] Göknur Giner, Gordon K. Smyth (2016)
statmod: Probability Calculations for the Inverse Gaussian Distribution
"""

def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
Expand Down Expand Up @@ -959,6 +1009,46 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(lam),
get_variable_name(alpha))

def logcdf(self, value):
# Distribution parameters
mu = self.mu
lam = self.lam
alpha = self.alpha

value -= alpha
q = value / mu
l = lam * mu
r = tt.sqrt(value * lam)

a = normal_lcdf(0, 1, (q - 1.)/r)
b = 2./l + normal_lcdf(0, 1, -(q + 1.)/r)
return tt.switch(
(
# Left limit
tt.lt(value, 0) |
(tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) |
(tt.lt(value, mu) & tt.eq(lam, 0))
),
-np.inf,
tt.switch(
(
# Right limit
tt.eq(value, np.inf) |
(tt.eq(lam, 0) & tt.gt(value, mu)) |
(tt.gt(value, 0) & tt.eq(lam, np.inf)) |
# Degenerate distribution
(
tt.lt(mu, np.inf) &
tt.eq(mu, value) &
tt.eq(lam, 0)
) |
(tt.eq(value, 0) & tt.eq(lam, np.inf))
),
0,
a + tt.log1p(tt.exp(b - a))
)
)


class Beta(UnitContinuous):
R"""
Expand Down Expand Up @@ -1101,6 +1191,20 @@ def logp(self, value):
value >= 0, value <= 1,
alpha > 0, beta > 0)

def logcdf(self, value):
value = floatX(tt.as_tensor(value))
a = floatX(tt.as_tensor(self.alpha))
b = floatX(tt.as_tensor(self.beta))
return tt.switch(
tt.le(value, 0),
-np.inf,
tt.switch(
tt.ge(value, 1),
0,
tt.log(incomplete_beta(a, b, value))
)
)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
Expand Down Expand Up @@ -1227,6 +1331,7 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(a),
get_variable_name(b))


class Exponential(PositiveContinuous):
R"""
Exponential log-likelihood.
Expand Down Expand Up @@ -1322,6 +1427,30 @@ def _repr_latex_(self, name=None, dist=None):
return r'${} \sim \text{{Exponential}}(\mathit{{lam}}={})$'.format(name,
get_variable_name(lam))

def logcdf(self, value):
"""
Compute the log CDF for the Exponential distribution
References
----------
.. [Machler2012] Martin Mächler (2012).
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
package"
"""
value = floatX(tt.as_tensor(value))
lam = self.lam
a = lam * value
return tt.switch(
tt.le(value, 0.0),
-np.inf,
tt.switch(
tt.le(a, tt.log(2.0)),
tt.log(-tt.expm1(-a)),
tt.log1p(-tt.exp(-a)),
)
)


class Laplace(Continuous):
R"""
Laplace log-likelihood.
Expand Down Expand Up @@ -1424,6 +1553,20 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(b))

def logcdf(self, value):
a = self.mu
b = self.b
y = (value - a) / b
return tt.switch(
tt.le(value, a),
tt.log(0.5) + y,
tt.switch(
tt.gt(y, 1),
tt.log1p(-0.5 * tt.exp(-y)),
tt.log(1 - 0.5 * tt.exp(-y))
)
)


class Lognormal(PositiveContinuous):
R"""
Expand Down Expand Up @@ -1559,6 +1702,22 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(tau))

def logcdf(self, value):
mu = self.mu
sd = self.sd
z = zvalue(tt.log(value), mu=mu, sd=sd)

return tt.switch(
tt.le(value, 0),
-np.inf,
tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) -
tt.sqr(z) / 2,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
)


class StudentT(Continuous):
R"""
Expand Down Expand Up @@ -1698,6 +1857,15 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(lam))

def logcdf(self, value):
nu = self.nu
mu = self.mu
sd = self.sd
t = (value - mu)/sd
sqrt_t2_nu = tt.sqrt(t**2 + nu)
x = (t + sqrt_t2_nu)/(2.0 * sqrt_t2_nu)
return tt.log(incomplete_beta(nu/2., nu/2., x))


class Pareto(Continuous):
R"""
Expand Down Expand Up @@ -1820,6 +1988,20 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(alpha),
get_variable_name(m))

def logcdf(self, value):
m = self.m
alpha = self.alpha
arg = (m / value) ** alpha
return tt.switch(
tt.lt(value, m),
-np.inf,
tt.switch(
tt.le(arg, 1e-5),
tt.log1p(-arg),
tt.log(1 - arg)
)
)


class Cauchy(Continuous):
R"""
Expand Down Expand Up @@ -1930,6 +2112,11 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(alpha),
get_variable_name(beta))

def logcdf(self, value):
return tt.log(
0.5 + tt.arctan((value - self.alpha) / self.beta) / np.pi
)


class HalfCauchy(PositiveContinuous):
R"""
Expand Down Expand Up @@ -2030,6 +2217,15 @@ def _repr_latex_(self, name=None, dist=None):
return r'${} \sim \text{{HalfCauchy}}(\mathit{{beta}}={})$'.format(name,
get_variable_name(beta))

def logcdf(self, value):
return tt.switch(
tt.le(value, 0),
-np.inf,
tt.log(
2 * tt.arctan(value / self.beta) / np.pi
))


class Gamma(PositiveContinuous):
R"""
Gamma log-likelihood.
Expand Down Expand Up @@ -2459,6 +2655,28 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(alpha),
get_variable_name(beta))

def logcdf(self, value):
'''
Compute the log CDF for the Weibull distribution
References
----------
.. [Machler2012] Martin Mächler (2012).
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
package"
'''
alpha = self.alpha
beta = self.beta
a = (value / beta)**alpha
return tt.switch(
tt.le(value, 0.0),
-np.inf,
tt.switch(
tt.le(a, tt.log(2.0)),
tt.log(-tt.expm1(-a)),
tt.log1p(-tt.exp(-a)))
)


class HalfStudentT(PositiveContinuous):
R"""
Expand Down Expand Up @@ -2729,6 +2947,30 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(sigma),
get_variable_name(nu))

def logcdf(self, value):
"""
Compute the log CDF for the ExGaussian distribution
References
----------
.. [Rigby2005] R.A. Rigby (2005).
"Generalized additive models for location, scale and shape"
http://dx.doi.org/10.1111/j.1467-9876.2005.00510.x
"""
mu = self.mu
sigma = self.sigma
sigma_2 = sigma**2
nu = self.nu
z = value - mu - sigma_2/nu
return tt.switch(
tt.gt(nu, 0.05 * sigma),
tt.log(std_cdf((value - mu)/sigma) -
std_cdf(z/sigma) * tt.exp(
((mu + (sigma_2/nu))**2 -
(mu**2) -
2 * value * ((sigma_2)/nu))/(2 * sigma_2))),
normal_lcdf(mu, sigma, value))


class VonMises(Continuous):
R"""
Expand Down Expand Up @@ -3093,6 +3335,24 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(lower),
get_variable_name(upper))

def logcdf(self, value):
l = self.lower
u = self.upper
c = self.c
return tt.switch(
tt.le(value, l),
-np.inf,
tt.switch(
tt.le(value, c),
tt.log(((value - l) ** 2) / ((u - l) * (c - l))),
tt.switch(
tt.lt(value, u),
tt.log1p(-((u - value) ** 2) / ((u - l) * (u - c))),
0
)
)
)


class Gumbel(Continuous):
R"""
Expand Down Expand Up @@ -3198,6 +3458,12 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(beta))

def logcdf(self, value):
beta = self.beta
mu = self.mu

return -tt.exp(-(value - mu)/beta)


class Rice(PositiveContinuous):
R"""
Expand Down Expand Up @@ -3388,6 +3654,30 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(mu),
get_variable_name(s))

def logcdf(self, value):
"""
Compute the log CDF for the Logistic distribution
References
----------
.. [Machler2012] Martin Mächler (2012).
"Accurately computing log(1-exp(-|a|)) Assessed by the Rmpfr
package"
"""
mu = self.mu
s = self.s
a = -(value - mu)/s
return - tt.switch(
tt.le(a, -37),
tt.exp(a),
tt.switch(
tt.le(a, 18),
tt.log1p(tt.exp(a)),
tt.switch(
tt.le(a, 33.3),
tt.exp(-a) + a,
a)))


class LogitNormal(UnitContinuous):
R"""
Expand Down
194 changes: 194 additions & 0 deletions pymc3/distributions/dist_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import theano
from theano.scalar import UnaryScalarOp, upgrade_to_float
from theano.tensor.slinalg import Cholesky
from theano.scan_module import until
from theano import scan

from .special import gammaln
from pymc3.theanof import floatX
Expand Down Expand Up @@ -308,3 +310,195 @@ def random_choice(*args, **kwargs):
samples = np.random.choice(k, p=p, size=size)
return samples


def zvalue(value, sd, mu):
"""
Calculate the z-value for a normal distribution.
"""
return (value - mu) / sd


def incomplete_beta_cfe(a, b, x, small):
'''Incomplete beta continued fraction expansions
based on Cephes library by Steve Moshier (incbet.c).
small: Choose element-wise which continued fraction expansion to use.
'''
BIG = tt.constant(4.503599627370496e15, dtype='float64')
BIGINV = tt.constant(2.22044604925031308085e-16, dtype='float64')
THRESH = tt.constant(3. * np.MachAr().eps, dtype='float64')

zero = tt.constant(0., dtype='float64')
one = tt.constant(1., dtype='float64')
two = tt.constant(2., dtype='float64')

r = one
k1 = a
k3 = a
k4 = a + one
k5 = one
k8 = a + two

k2 = tt.switch(small, a + b, b - one)
k6 = tt.switch(small, b - one, a + b)
k7 = tt.switch(small, k4, a + one)
k26update = tt.switch(small, one, -one)
x = tt.switch(small, x, x / (one - x))

pkm2 = zero
qkm2 = one
pkm1 = one
qkm1 = one
r = one

def _step(
i,
pkm1, pkm2, qkm1, qkm2,
k1, k2, k3, k4, k5, k6, k7, k8, r
):
xk = -(x * k1 * k2) / (k3 * k4)
pk = pkm1 + pkm2 * xk
qk = qkm1 + qkm2 * xk
pkm2 = pkm1
pkm1 = pk
qkm2 = qkm1
qkm1 = qk

xk = (x * k5 * k6) / (k7 * k8)
pk = pkm1 + pkm2 * xk
qk = qkm1 + qkm2 * xk
pkm2 = pkm1
pkm1 = pk
qkm2 = qkm1
qkm1 = qk

old_r = r
r = tt.switch(tt.eq(qk, zero), r, pk/qk)

k1 += one
k2 += k26update
k3 += two
k4 += two
k5 += one
k6 -= k26update
k7 += two
k8 += two

big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG)
biginv_cond = tt.or_(
tt.lt(tt.abs_(qk), BIGINV),
tt.lt(tt.abs_(pk), BIGINV)
)

pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2)
pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1)
qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2)
qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1)

pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2)
pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1)
qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2)
qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1)

return ((pkm1, pkm2, qkm1, qkm2,
k1, k2, k3, k4, k5, k6, k7, k8, r),
until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))))

(pkm1, pkm2, qkm1, qkm2,
k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan(
_step,
sequences=[tt.arange(0, 300)],
outputs_info=[
e for e in
tt.cast((pkm1, pkm2, qkm1, qkm2,
k1, k2, k3, k4, k5, k6, k7, k8, r),
'float64')
]
)

return r[-1]


def incomplete_beta_ps(a, b, value):
'''Power series for incomplete beta
Use when b*x is small and value not too close to 1.
Based on Cephes library by Steve Moshier (incbet.c)
'''
one = tt.constant(1, dtype='float64')
ai = one / a
u = (one - b) * value
t1 = u / (a + one)
t = u
threshold = np.MachAr().eps * ai
s = tt.constant(0, dtype='float64')

def _step(i, t, s):
t *= (i - b) * value / i
step = t / (a + i)
s += step
return ((t, s), until(tt.abs_(step) < threshold))

(t, s), _ = scan(
_step,
sequences=[tt.arange(2, 302)],
outputs_info=[
e for e in
tt.cast((t, s),
'float64')
]
)

s = s[-1] + t1 + ai

t = (
gammaln(a + b) - gammaln(a) - gammaln(b) +
a * tt.log(value) +
tt.log(s)
)
return tt.exp(t)


def incomplete_beta(a, b, value):
'''Incomplete beta implementation
Power series and continued fraction expansions chosen for best numerical
convergence across the board based on inputs.
'''
machep = tt.constant(np.MachAr().eps, dtype='float64')
one = tt.constant(1, dtype='float64')
w = one - value

ps = incomplete_beta_ps(a, b, value)

flip = tt.gt(value, (a / (a + b)))
aa, bb = a, b
a = tt.switch(flip, bb, aa)
b = tt.switch(flip, aa, bb)
xc = tt.switch(flip, value, w)
x = tt.switch(flip, w, value)

tps = incomplete_beta_ps(a, b, x)
tps = tt.switch(tt.le(tps, machep), one - machep, one - tps)

# Choose which continued fraction expansion for best convergence.
small = tt.lt(x * (a + b - 2.0) - (a - one), 0.0)
cfe = incomplete_beta_cfe(a, b, x, small)
w = tt.switch(small, cfe, cfe / xc)

# Direct incomplete beta accounting for flipped a, b.
t = tt.exp(
a * tt.log(x) + b * tt.log(xc) +
gammaln(a + b) - gammaln(a) - gammaln(b) +
tt.log(w / a)
)

t = tt.switch(
flip,
tt.switch(tt.le(t, machep), one - machep, one - t),
t
)
return tt.switch(
tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))),
tps,
tt.switch(
tt.and_(tt.le(b * value, one), tt.le(value, 0.95)),
ps,
t))
81 changes: 79 additions & 2 deletions pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,19 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal
decimal = select_by_precision(float64=6, float32=3)
assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt))

def check_logcdf(self, pymc3_dist, domain, paramdomains, scipy_logcdf, decimal=None):
domains = paramdomains.copy()
domains['value'] = domain
if decimal is None:
decimal = select_by_precision(float64=6, float32=3)
for pt in product(domains, n_samples=100):
params = dict(pt)
scipy_cdf = scipy_logcdf(**params)
value = params.pop('value')
dist = pymc3_dist.dist(**params)
assert_almost_equal(dist.logcdf(value).tag.test_value, scipy_cdf,
decimal=decimal, err_msg=str(pt))

def check_int_to_1(self, model, value, domain, paramdomains):
pdf = model.fastfn(exp(model.logpt))
for pt in product(paramdomains, n_samples=10):
Expand Down Expand Up @@ -498,11 +511,15 @@ def test_uniform(self):
self.pymc3_matches_scipy(
Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif},
lambda value, lower, upper: sp.uniform.logpdf(value, lower, upper - lower))
self.check_logcdf(Uniform, Runif, {'lower': -Rplusunif, 'upper': Rplusunif},
lambda value, lower, upper: sp.uniform.logcdf(value, lower, upper - lower))

def test_triangular(self):
self.pymc3_matches_scipy(
Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif},
lambda value, c, lower, upper: sp.triang.logpdf(value, c-lower, lower, upper-lower))
self.check_logcdf(Triangular, Runif, {'lower': -Rplusunif, 'c': Runif, 'upper': Rplusunif},
lambda value, c, lower, upper: sp.triang.logcdf(value, c-lower, lower, upper-lower))

def test_bound_normal(self):
PositiveNormal = Bound(Normal, lower=0.)
Expand All @@ -522,19 +539,29 @@ def test_flat(self):
with Model():
x = Flat('a')
assert_allclose(x.tag.test_value, 0)
self.check_logcdf(Flat, Runif, {}, lambda value: np.log(0.5))
# Check infinite cases individually.
assert 0. == Flat.dist().logcdf(np.inf).tag.test_value
assert -np.inf == Flat.dist().logcdf(-np.inf).tag.test_value

def test_half_flat(self):
self.pymc3_matches_scipy(HalfFlat, Rplus, {}, lambda value: 0)
with Model():
x = HalfFlat('a', shape=2)
assert_allclose(x.tag.test_value, 1)
assert x.tag.test_value.shape == (2,)
self.check_logcdf(HalfFlat, Runif, {}, lambda value: -np.inf)
# Check infinite cases individually.
assert 0. == HalfFlat.dist().logcdf(np.inf).tag.test_value
assert -np.inf == HalfFlat.dist().logcdf(-np.inf).tag.test_value

def test_normal(self):
self.pymc3_matches_scipy(Normal, R, {'mu': R, 'sd': Rplus},
lambda value, mu, sd: sp.norm.logpdf(value, mu, sd),
decimal=select_by_precision(float64=6, float32=1)
)
self.check_logcdf(Normal, R, {'mu': R, 'sd': Rplus},
lambda value, mu, sd: sp.norm.logcdf(value, mu, sd))

def test_truncated_normal(self):
def scipy_logp(value, mu, sd, lower, upper):
Expand All @@ -558,16 +585,21 @@ def test_half_normal(self):
lambda value, sd: sp.halfnorm.logpdf(value, scale=sd),
decimal=select_by_precision(float64=6, float32=-1)
)
self.check_logcdf(HalfNormal, Rplus, {'sd': Rplus},
lambda value, sd: sp.halfnorm.logcdf(value, scale=sd))

def test_chi_squared(self):
self.pymc3_matches_scipy(ChiSquared, Rplus, {'nu': Rplusdunif},
lambda value, nu: sp.chi2.logpdf(value, df=nu))

@pytest.mark.xfail(reason="Poor CDF in SciPy. See scipy/scipy#869 for details.")
def test_wald_scipy(self):
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus},
lambda value, mu: sp.invgauss.logpdf(value, mu),
self.pymc3_matches_scipy(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
lambda value, mu, alpha: sp.invgauss.logpdf(value, mu=mu, loc=alpha),
decimal=select_by_precision(float64=6, float32=1)
)
self.check_logcdf(Wald, Rplus, {'mu': Rplus, 'alpha': Rplus},
lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha))

@pytest.mark.parametrize('value,mu,lam,phi,alpha,logp', [
(.5, .001, .5, None, 0., -124500.7257914),
Expand Down Expand Up @@ -599,6 +631,8 @@ def test_beta(self):
self.pymc3_matches_scipy(Beta, Unit, {'alpha': Rplus, 'beta': Rplus},
lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta))
self.pymc3_matches_scipy(Beta, Unit, {'mu': Unit, 'sd': Rplus}, beta_mu_sd)
self.check_logcdf(Beta, Unit, {'alpha': Rplus, 'beta': Rplus},
lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta))

def test_kumaraswamy(self):
# Scipy does not have a built-in Kumaraswamy pdf
Expand All @@ -609,6 +643,8 @@ def scipy_log_pdf(value, a, b):
def test_exponential(self):
self.pymc3_matches_scipy(Exponential, Rplus, {'lam': Rplus},
lambda value, lam: sp.expon.logpdf(value, 0, 1 / lam))
self.check_logcdf(Exponential, Rplus, {'lam': Rplus},
lambda value, lam: sp.expon.logcdf(value, 0, 1 / lam))

def test_geometric(self):
self.pymc3_matches_scipy(Geometric, Nat, {'p': Unit},
Expand All @@ -623,23 +659,33 @@ def test_fun(value, mu, alpha):
def test_laplace(self):
self.pymc3_matches_scipy(Laplace, R, {'mu': R, 'b': Rplus},
lambda value, mu, b: sp.laplace.logpdf(value, mu, b))
self.check_logcdf(Laplace, R, {'mu': R, 'b': Rplus},
lambda value, mu, b: sp.laplace.logcdf(value, mu, b))

def test_lognormal(self):
self.pymc3_matches_scipy(
Lognormal, Rplus, {'mu': R, 'tau': Rplusbig},
lambda value, mu, tau: floatX(sp.lognorm.logpdf(value, tau**-.5, 0, np.exp(mu))))
self.check_logcdf(Lognormal, Rplus, {'mu': R, 'tau': Rplusbig},
lambda value, mu, tau: sp.lognorm.logcdf(value, tau**-.5, 0, np.exp(mu)))

def test_t(self):
self.pymc3_matches_scipy(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus},
lambda value, nu, mu, lam: sp.t.logpdf(value, nu, mu, lam**-0.5))
self.check_logcdf(StudentT, R, {'nu': Rplus, 'mu': R, 'lam': Rplus},
lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam**-0.5))

def test_cauchy(self):
self.pymc3_matches_scipy(Cauchy, R, {'alpha': R, 'beta': Rplusbig},
lambda value, alpha, beta: sp.cauchy.logpdf(value, alpha, beta))
self.check_logcdf(Cauchy, R, {'alpha': R, 'beta': Rplusbig},
lambda value, alpha, beta: sp.cauchy.logcdf(value, alpha, beta))

def test_half_cauchy(self):
self.pymc3_matches_scipy(HalfCauchy, Rplus, {'beta': Rplusbig},
lambda value, beta: sp.halfcauchy.logpdf(value, scale=beta))
self.check_logcdf(HalfCauchy, Rplus, {'beta': Rplusbig},
lambda value, beta: sp.halfcauchy.logcdf(value, scale=beta))

def test_gamma(self):
self.pymc3_matches_scipy(
Expand All @@ -659,12 +705,17 @@ def test_inverse_gamma(self):
def test_pareto(self):
self.pymc3_matches_scipy(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
lambda value, alpha, m: sp.pareto.logpdf(value, alpha, scale=m))
self.check_logcdf(Pareto, Rplus, {'alpha': Rplusbig, 'm': Rplusbig},
lambda value, alpha, m: sp.pareto.logcdf(value, alpha, scale=m))

@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32 due to inf issues")
def test_weibull(self):
self.pymc3_matches_scipy(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
lambda value, alpha, beta: sp.exponweib.logpdf(value, 1, alpha, scale=beta),
)
self.check_logcdf(Weibull, Rplus, {'alpha': Rplusbig, 'beta': Rplusbig},
lambda value, alpha, beta:
sp.exponweib.logcdf(value, 1, alpha, scale=beta),)

def test_half_studentt(self):
# this is only testing for nu=1 (halfcauchy)
Expand Down Expand Up @@ -1064,6 +1115,25 @@ def test_ex_gaussian(self, value, mu, sigma, nu, logp):
pt = {'eg': value}
assert_almost_equal(model.fastlogp(pt), logp, decimal=select_by_precision(float64=6, float32=2), err_msg=str(pt))

@pytest.mark.parametrize('value,mu,sigma,nu,logcdf', [
(0.5, -50.000, 0.500, 0.500, 0.0000000),
(1.0, -1.000, 0.001, 0.001, 0.0000000),
(2.0, 0.001, 1.000, 1.000, -0.2365674),
(5.0, 0.500, 2.500, 2.500, -0.2886489),
(7.5, 2.000, 5.000, 5.000, -0.5655104),
(15.0, 5.000, 7.500, 7.500, -0.4545255),
(50.0, 50.000, 10.000, 10.000, -1.433714),
(1000.0, 500.000, 10.000, 20.000, -1.573708e-11),
])
def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf):
"""Log probabilities calculated using the pexGAUS function from the R package gamlss.
See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or http://www.gamlss.org/."""
assert_almost_equal(
ExGaussian.dist(mu=mu, sigma=sigma, nu=nu).logcdf(value).tag.test_value,
logcdf,
decimal=select_by_precision(float64=6, float32=2),
err_msg=str((value, mu, sigma, nu, logcdf)))

@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32")
def test_vonmises(self):
self.pymc3_matches_scipy(
Expand All @@ -1075,10 +1145,17 @@ def gumbel(value, mu, beta):
return floatX(sp.gumbel_r.logpdf(value, loc=mu, scale=beta))
self.pymc3_matches_scipy(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbel)

def gumbellcdf(value, mu, beta):
return floatX(sp.gumbel_r.logcdf(value, loc=mu, scale=beta))
self.check_logcdf(Gumbel, R, {'mu': R, 'beta': Rplusbig}, gumbellcdf)

def test_logistic(self):
self.pymc3_matches_scipy(Logistic, R, {'mu': R, 's': Rplus},
lambda value, mu, s: sp.logistic.logpdf(value, mu, s),
decimal=select_by_precision(float64=6, float32=1))
self.check_logcdf(Logistic, R, {'mu': R, 's': Rplus},
lambda value, mu, s: sp.logistic.logcdf(value, mu, s),
decimal=select_by_precision(float64=6, float32=1))

def test_logitnormal(self):
self.pymc3_matches_scipy(LogitNormal, Unit, {'mu': R, 'sd': Rplus},
Expand Down