Skip to content

Commit

Permalink
Merge d6c2978 into 801accb
Browse files Browse the repository at this point in the history
  • Loading branch information
bwengals committed Feb 22, 2018
2 parents 801accb + d6c2978 commit 6d515e9
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/source/api/distributions/discrete.rst
Expand Up @@ -18,6 +18,7 @@ Discrete
Categorical
DiscreteWeibull
Constant
OrderedLogistic

.. automodule:: pymc3.distributions.discrete
:members:
1 change: 1 addition & 0 deletions pymc3/distributions/__init__.py
Expand Up @@ -42,6 +42,7 @@
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import Categorical
from .discrete import OrderedLogistic

from .distribution import DensityDist
from .distribution import Distribution
Expand Down
95 changes: 92 additions & 3 deletions pymc3/distributions/discrete.py
Expand Up @@ -8,13 +8,13 @@
from pymc3.util import get_variable_name
from .dist_math import bound, factln, binomln, betaln, logpow
from .distribution import Discrete, draw_values, generate_samples, reshape_sampled
from pymc3.math import tround
from ..math import logaddexp, logit, log1pexp
from pymc3.math import tround, sigmoid, logaddexp, logit, log1pexp


__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull',
'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
'DiscreteUniform', 'Geometric', 'Categorical']
'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic']


class Binomial(Discrete):
Expand Down Expand Up @@ -851,3 +851,92 @@ def _repr_latex_(self, name=None, dist=None):
r'(\mathit{{mu}}={},~\mathit{{alpha}}={},~'
r'\mathit{{psi}}={})$'
.format(name, name_mu, name_alpha, name_psi))


class OrderedLogistic(Categorical):
R"""
Ordered Logistic log-likelihood.
Useful for regression on ordinal data values whose values range
from 1 to K as a function of some predictor, :math:`\eta`. The
cutpoints, :math:`c`, separate which ranges of :math:`\eta` are
mapped to which of the K observed dependent variables. The number
of cutpoints is K - 1. It is recommended that the cutpoints are
constrained to be ordered.
.. math::
f(k \mid \eta, c) = \left\{
\begin{array}{l}
1 - \text{logit}^{-1}(\eta - c_1)
\,, \text{if } k = 0 \\
\text{logit}^{-1}(\eta - c_{k - 1}) -
\text{logit}^{-1}(\eta - c_{k})
\,, \text{if } 0 < k < K \\
\text{logit}^{-1}(\eta - c_{K - 1})
\,, \text{if } k = K \\
\end{array}
\right.
Parameters
----------
eta : float
The predictor.
c : array
The length K - 1 array of cutpoints which break :math:`\eta` into
ranges. Do not explicitly set the first and last elements of
:math:`c` to negative and positive infinity.
Example
--------
.. code:: python
# Generate data for a simple 1 dimensional example problem
n1 = 300; n2 = 300; n3 = 300
cluster1 = np.random.randn(n1_c) + -1
cluster2 = np.random.randn(n2_c) + 0
cluster3 = np.random.randn(n3_c) + 2
x = np.concatenate((cluster1, cluster2, cluster3))
y = np.concatenate((1*np.ones(n1_c),
2*np.ones(n2_c),
3*np.ones(n3_c))) - 1
# Ordered logistic regression
with pm.Model() as model:
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sd=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y)
tr = pm.sample(1000)
# Plot the results
plt.hist(cluster1, 30, alpha=0.5);
plt.hist(cluster2, 30, alpha=0.5);
plt.hist(cluster3, 30, alpha=0.5);
plt.hist(tr["cutpoints"][:,0], 80, alpha=0.2, color='k');
plt.hist(tr["cutpoints"][:,1], 80, alpha=0.2, color='k');
"""

def __init__(self, eta, cutpoints, *args, **kwargs):
self.eta = tt.as_tensor_variable(eta)
self.cutpoints = tt.as_tensor_variable(cutpoints)

pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
p_cum = tt.concatenate([
tt.zeros_like(tt.shape_padright(pa[:, 0])),
pa,
tt.ones_like(tt.shape_padright(pa[:, 0]))
], axis=1)
p = p_cum[:, 1:] - p_cum[:, :-1]

super(OrderedLogistic, self).__init__(p=p, *args, **kwargs)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
name_eta = get_variable_name(dist.eta)
name_cutpoints = get_variable_name(dist.cutpoints)
return (r'${} \sim \text{{OrderedLogistic}}'
r'(\mathit{{eta}}={}, \mathit{{cutpoints}}={}$'
.format(name, name_eta, name_cutpoints))
60 changes: 54 additions & 6 deletions pymc3/distributions/transforms.py
Expand Up @@ -9,7 +9,8 @@
import numpy as np

__all__ = ['transform', 'stick_breaking', 'logodds', 'interval', 'log_exp_m1',
'lowerbound', 'upperbound', 'log', 'sum_to_1', 't_stick_breaking']
'lowerbound', 'upperbound', 'ordered', 'log', 'sum_to_1',
't_stick_breaking']


class Transform(object):
Expand Down Expand Up @@ -107,20 +108,20 @@ def jacobian_det(self, x):

class LogExpM1(ElemwiseTransform):
name = "log_exp_m1"

def backward(self, x):
return tt.nnet.softplus(x)

def forward(self, x):
"""Inverse operation of softplus
y = Log(Exp(x) - 1)
y = Log(Exp(x) - 1)
= Log(1 - Exp(-x)) + x
"""
return tt.log(1.-tt.exp(-x)) + x

def forward_val(self, x, point=None):
return self.forward(x)

def jacobian_det(self, x):
return -tt.nnet.softplus(-x)

Expand Down Expand Up @@ -237,6 +238,53 @@ def jacobian_det(self, x):
upperbound = UpperBound


class Ordered(ElemwiseTransform):
name = "ordered"

def backward(self, x):
out = tt.zeros(x.shape)
out = tt.inc_subtensor(out[0], x[0])
out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
return out

def forward(self, y):
out = tt.zeros(y.shape)
out = tt.inc_subtensor(out[0], y[0])
out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
return tt.cumsum(out)

def forward_val(self, x, point=None):
x, = draw_values([x], point=point)
return self.forward(x)

def jacobian_det(self, y):
return tt.sum(y[1:])

ordered = Ordered()


class Composed(Transform):
def __init__(self, transform1, transform2):
self._transform1 = transform1
self._transform2 = transform2
self.name = '_'.join([transform1.name, transform2.name])

def forward(self, x):
return self._transform2.forward(self._transform1.forward(x))

def forward_val(self, x, point=None):
return self.forward(x)

def backward(self, y):
return self._transform1.backward(self._transform2.backward(y))

def jacobian_det(self, y):
y2 = self._transform2.backward(y)
det1 = self._transform1.jacobian_det(y2)
det2 = self._transform2.jacobian_det(y)
return det1 + det2


class SumTo1(Transform):
"""Transforms K dimensional simplex space (values in [0,1] and sum to 1) to K - 1 vector of values in [0,1]
"""
Expand Down
20 changes: 17 additions & 3 deletions pymc3/tests/test_distributions.py
@@ -1,6 +1,7 @@
from __future__ import division

import itertools
import sys

from .helpers import SeededTest, select_by_precision
from ..vartypes import continuous_types
Expand All @@ -15,7 +16,8 @@
Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull,
Gumbel, Logistic, Interpolated, ZeroInflatedBinomial, HalfFlat, AR1,
KroneckerNormal)
KroneckerNormal, OrderedLogistic)

from ..distributions import continuous
from pymc3.theanof import floatX
from numpy import array, inf, log, exp
Expand Down Expand Up @@ -315,7 +317,6 @@ def categorical_logpdf(value, p):
else:
return -inf


def mvt_logpdf(value, nu, Sigma, mu=0):
d = len(Sigma)
dist = np.atleast_2d(value) - mu
Expand All @@ -328,11 +329,18 @@ def mvt_logpdf(value, nu, Sigma, mu=0):
logp = norm - logdet - (nu + d) / 2. * np.log1p((trafo * trafo).sum(-1) / nu)
return logp.sum()


def AR1_logpdf(value, k, tau_e):
return (sp.norm(loc=0,scale=1/np.sqrt(tau_e)).logpdf(value[0]) +
sp.norm(loc=k*value[:-1],scale=1/np.sqrt(tau_e)).logpdf(value[1:]).sum())

def invlogit(x, eps=sys.float_info.epsilon):
return (1. - 2. * eps) / (1. + np.exp(-x)) + eps

def orderedlogistic_logpdf(value, eta, cutpoints):
c = np.concatenate(([-np.inf], cutpoints, [np.inf]))
p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1])
return np.log(p)

class Simplex(object):
def __init__(self, n):
self.vals = list(simplex_values(n))
Expand Down Expand Up @@ -978,6 +986,12 @@ def test_categorical(self, n):
self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)},
lambda value, p: categorical_logpdf(value, p))

@pytest.mark.parametrize('n', [2, 3, 4])
def test_orderedlogistic(self, n):
self.pymc3_matches_scipy(OrderedLogistic, Domain(range(n), 'int64'),
{'eta': R, 'cutpoints': Vector(R, n-1)},
lambda value, eta, cutpoints: orderedlogistic_logpdf(value, eta, cutpoints))

def test_densitydist(self):
def logp(x):
return -log(2 * .5) - abs(x - .5) / .5
Expand Down
13 changes: 12 additions & 1 deletion pymc3/tests/test_transforms.py
Expand Up @@ -10,7 +10,7 @@

# some transforms (stick breaking) require additon of small slack in order to be numerically
# stable. The minimal addable slack for float32 is higher thus we need to be less strict
tol = 1e-7 if theano.config.floatX == 'flaot64' else 1e-6
tol = 1e-7 if theano.config.floatX == 'float64' else 1e-6


def check_transform_identity(transform, domain, constructor=tt.dscalar, test=0):
Expand Down Expand Up @@ -93,6 +93,17 @@ def check_jacobian_det(transform, domain,
actual_ljd(yval),
computed_ljd(yval), tol)

def test_ordered():
check_vector_transform_identity(tr.ordered, Vector(R, 6))

def test_ordered_jacobian_det():
check_jacobian_det(tr.ordered, Vector(R, 2),
tt.dvector, np.array([0, 0]), elemwise=True)

def test_ordered_vals():
vals = get_values(tr.ordered)
close_to_logicals(vals > 0, True, tol)


def test_log():
check_transform_identity(tr.log, Rplusbig)
Expand Down

0 comments on commit 6d515e9

Please sign in to comment.