Skip to content

Commit

Permalink
Implement Hurdle distributions
Browse files Browse the repository at this point in the history
  • Loading branch information
tomicapretto authored and ricardoV94 committed May 10, 2023
1 parent 970db18 commit a53e865
Show file tree
Hide file tree
Showing 4 changed files with 420 additions and 1 deletion.
4 changes: 4 additions & 0 deletions docs/source/api/distributions/mixture.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ Mixture
ZeroInflatedBinomial
ZeroInflatedNegativeBinomial
ZeroInflatedPoisson
HurdlePoisson
HurdleNegativeBinomial
HurdleGamma
HurdleLogNormal
8 changes: 8 additions & 0 deletions pymc/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@
SymbolicRandomVariable,
)
from pymc.distributions.mixture import (
HurdleGamma,
HurdleLogNormal,
HurdleNegativeBinomial,
HurdlePoisson,
Mixture,
NormalMixture,
ZeroInflatedBinomial,
Expand Down Expand Up @@ -195,4 +199,8 @@
"Censored",
"CAR",
"PolyaGamma",
"HurdleGamma",
"HurdleLogNormal",
"HurdleNegativeBinomial",
"HurdlePoisson",
]
245 changes: 244 additions & 1 deletion pymc/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pytensor.tensor.random.op import RandomVariable

from pymc.distributions import transforms
from pymc.distributions.continuous import Normal, get_tau_sigma
from pymc.distributions.continuous import Gamma, LogNormal, Normal, get_tau_sigma
from pymc.distributions.discrete import Binomial, NegativeBinomial, Poisson
from pymc.distributions.dist_math import check_parameters
from pymc.distributions.distribution import (
Expand All @@ -34,6 +34,7 @@
)
from pymc.distributions.shape_utils import _change_dist_size, change_dist_size
from pymc.distributions.transforms import _default_transform
from pymc.distributions.truncated import Truncated
from pymc.logprob.abstract import _logcdf, _logcdf_helper, _logprob, _logprob_helper
from pymc.logprob.transforms import IntervalTransform
from pymc.logprob.utils import ignore_logprob
Expand All @@ -42,6 +43,10 @@
from pymc.vartypes import continuous_types, discrete_types

__all__ = [
"HurdleGamma",
"HurdleLogNormal",
"HurdleNegativeBinomial",
"HurdlePoisson",
"Mixture",
"NormalMixture",
"ZeroInflatedBinomial",
Expand Down Expand Up @@ -794,3 +799,241 @@ def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
**kwargs,
)


def _hurdle_mixture(*, name, nonzero_p, nonzero_dist, dtype, **kwargs):
"""Helper function to create a hurdle mixtures
If name is `None`, this function returns an unregistered variable
In hurdle models, the zeros come from a completely different process than the rest of the data.
In other words, the zeros are not inflated, they come from a different process.
"""
if dtype == "float":
zero = 0.0
lower = np.finfo(pytensor.config.floatX).eps
elif dtype == "int":
zero = 0
lower = 1
else:
raise ValueError("dtype must be 'float' or 'int'")

nonzero_p = pt.as_tensor_variable(floatX(nonzero_p))
weights = pt.stack([1 - nonzero_p, nonzero_p], axis=-1)
comp_dists = [
DiracDelta.dist(zero),
Truncated.dist(nonzero_dist, lower=lower),
]

if name is not None:
return Mixture(name, weights, comp_dists, **kwargs)
else:
return Mixture.dist(weights, comp_dists, **kwargs)


class HurdlePoisson:
R"""
Hurdle Poisson log-likelihood.
The Poisson distribution is often used to model the number of events occurring
in a fixed period of time or space when the times or locations
at which events occur are independent.
The difference with ZeroInflatedPoisson is that the zeros are not inflated,
they come from a completely independent process.
The pmf of this distribution is
.. math::
f(x \mid \psi, \mu) =
\left\{
\begin{array}{l}
(1 - \psi) \ \text{if } x = 0 \\
\psi
\frac{\text{PoissonPDF}(x \mid \mu))}
{1 - \text{PoissonCDF}(0 \mid \mu)} \ \text{if } x=1,2,3,\ldots
\end{array}
\right.
Parameters
----------
psi : tensor_like of float
Expected proportion of Poisson variates (0 < psi < 1)
mu : tensor_like of float
Expected number of occurrences (mu >= 0).
"""

def __new__(cls, name, psi, mu, **kwargs):
return _hurdle_mixture(
name=name, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs
)

@classmethod
def dist(cls, psi, mu, **kwargs):
return _hurdle_mixture(
name=None, nonzero_p=psi, nonzero_dist=Poisson.dist(mu=mu), dtype="int", **kwargs
)


class HurdleNegativeBinomial:
R"""
Hurdle Negative Binomial log-likelihood.
The negative binomial distribution describes a Poisson random variable
whose rate parameter is gamma distributed.
The difference with ZeroInflatedNegativeBinomial is that the zeros are not inflated,
they come from a completely independent process.
The pmf of this distribution is
.. math::
f(x \mid \psi, \mu, \alpha) =
\left\{
\begin{array}{l}
(1 - \psi) \ \text{if } x = 0 \\
\psi
\frac{\text{NegativeBinomialPDF}(x \mid \mu, \alpha))}
{1 - \text{NegativeBinomialCDF}(0 \mid \mu, \alpha)} \ \text{if } x=1,2,3,\ldots
\end{array}
\right.
Parameters
----------
psi : tensor_like of float
Expected proportion of Negative Binomial variates (0 < psi < 1)
alpha : tensor_like of float
Gamma distribution shape parameter (alpha > 0).
mu : tensor_like of float
Gamma distribution mean (mu > 0).
p : tensor_like of float
Alternative probability of success in each trial (0 < p < 1).
n : tensor_like of float
Alternative number of target success trials (n > 0)
"""

def __new__(cls, name, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
return _hurdle_mixture(
name=name,
nonzero_p=psi,
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
dtype="int",
**kwargs,
)

@classmethod
def dist(cls, psi, mu=None, alpha=None, p=None, n=None, **kwargs):
return _hurdle_mixture(
name=None,
nonzero_p=psi,
nonzero_dist=NegativeBinomial.dist(mu=mu, alpha=alpha, p=p, n=n),
dtype="int",
**kwargs,
)


class HurdleGamma:
R"""
Hurdle Gamma log-likelihood.
.. math::
f(x \mid \psi, \alpha, \beta) =
\left\{
\begin{array}{l}
(1 - \psi) \ \text{if } x = 0 \\
\psi
\frac{\text{GammaPDF}(x \mid \alpha, \beta))}
{1 - \text{GammaCDF}(\epsilon \mid \alpha, \beta)} \ \text{if } x=1,2,3,\ldots
\end{array}
\right.
where :math:`\epsilon` is the machine precision.
Parameters
----------
psi : tensor_like of float
Expected proportion of Gamma variates (0 < psi < 1)
alpha : tensor_like of float, optional
Shape parameter (alpha > 0).
beta : tensor_like of float, optional
Rate parameter (beta > 0).
mu : tensor_like of float, optional
Alternative shape parameter (mu > 0).
sigma : tensor_like of float, optional
Alternative scale parameter (sigma > 0).
"""

def __new__(cls, name, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs):
return _hurdle_mixture(
name=name,
nonzero_p=psi,
nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma),
dtype="float",
**kwargs,
)

@classmethod
def dist(cls, psi, alpha=None, beta=None, mu=None, sigma=None, **kwargs):
return _hurdle_mixture(
name=None,
nonzero_p=psi,
nonzero_dist=Gamma.dist(alpha=alpha, beta=beta, mu=mu, sigma=sigma),
dtype="float",
**kwargs,
)


class HurdleLogNormal:
R"""
Hurdle LogNormal log-likelihood.
.. math::
f(x \mid \psi, \mu, \sigma) =
\left\{
\begin{array}{l}
(1 - \psi) \ \text{if } x = 0 \\
\psi
\frac{\text{LogNormalPDF}(x \mid \mu, \sigma))}
{1 - \text{LogNormalCDF}(\epsilon \mid \mu, \sigma)} \ \text{if } x=1,2,3,\ldots
\end{array}
\right.
where :math:`\epsilon` is the machine precision.
Parameters
----------
psi : tensor_like of float
Expected proportion of Gamma variates (0 < psi < 1)
mu : tensor_like of float, default 0
Location parameter.
sigma : tensor_like of float, optional
Standard deviation. (sigma > 0). (only required if tau is not specified).
Defaults to 1.
tau : tensor_like of float, optional
Scale parameter (tau > 0). (only required if sigma is not specified).
Defaults to 1.
"""

def __new__(cls, name, psi, mu=0, sigma=None, tau=None, **kwargs):
return _hurdle_mixture(
name=name,
nonzero_p=psi,
nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau),
dtype="float",
**kwargs,
)

@classmethod
def dist(cls, psi, mu=0, sigma=None, tau=None, **kwargs):
return _hurdle_mixture(
name=None,
nonzero_p=psi,
nonzero_dist=LogNormal.dist(mu=mu, sigma=sigma, tau=tau),
dtype="float",
**kwargs,
)
Loading

0 comments on commit a53e865

Please sign in to comment.