From 4ece871a677c6d8b9daf392ed4406aaeb156ee27 Mon Sep 17 00:00:00 2001 From: warren Date: Sun, 5 Mar 2023 02:13:27 -0500 Subject: [PATCH 1/6] ENH: Add the negative binomial distribution. --- rand_distr/README.md | 8 +- rand_distr/src/lib.rs | 3 + rand_distr/src/negative_binomial.rs | 238 ++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 4 deletions(-) create mode 100644 rand_distr/src/negative_binomial.rs diff --git a/rand_distr/README.md b/rand_distr/README.md index d11a3744c41..59e7e44771c 100644 --- a/rand_distr/README.md +++ b/rand_distr/README.md @@ -11,10 +11,10 @@ Implements a full suite of random number distribution sampling routines. This crate is a superset of the [rand::distributions] module, including support for sampling from Beta, Binomial, Cauchy, ChiSquared, Dirichlet, Exponential, -FisherF, Gamma, Geometric, Hypergeometric, InverseGaussian, LogNormal, Normal, -Pareto, PERT, Poisson, StudentT, Triangular and Weibull distributions. Sampling -from the unit ball, unit circle, unit disc and unit sphere surfaces is also -supported. +FisherF, Gamma, Geometric, Hypergeometric, InverseGaussian, LogNormal, +Negative Binomial, Normal, Pareto, PERT, Poisson, StudentT, Triangular and +Weibull distributions. Sampling from the unit ball, unit circle, unit disc and +unit sphere surfaces is also supported. It is worth mentioning the [statrs] crate which provides similar functionality along with various support functions, including PDF and CDF computation. In diff --git a/rand_distr/src/lib.rs b/rand_distr/src/lib.rs index c8fd298171d..697232dc08b 100644 --- a/rand_distr/src/lib.rs +++ b/rand_distr/src/lib.rs @@ -48,6 +48,7 @@ //! - [`Cauchy`] distribution //! - Related to Bernoulli trials (yes/no events, with a given probability): //! - [`Binomial`] distribution +//! - [`NegativeBinomial`] distribution //! - [`Geometric`] distribution //! - [`Hypergeometric`] distribution //! - Related to positive real-valued quantities that grow exponentially @@ -112,6 +113,7 @@ pub use self::geometric::{Error as GeoError, Geometric, StandardGeometric}; pub use self::gumbel::{Error as GumbelError, Gumbel}; pub use self::hypergeometric::{Error as HyperGeoError, Hypergeometric}; pub use self::inverse_gaussian::{Error as InverseGaussianError, InverseGaussian}; +pub use self::negative_binomial::{Error as NegativeBinomialError, NegativeBinomial}; pub use self::normal::{Error as NormalError, LogNormal, Normal, StandardNormal}; pub use self::normal_inverse_gaussian::{ Error as NormalInverseGaussianError, NormalInverseGaussian, @@ -197,6 +199,7 @@ mod geometric; mod gumbel; mod hypergeometric; mod inverse_gaussian; +mod negative_binomial; mod normal; mod normal_inverse_gaussian; mod pareto; diff --git a/rand_distr/src/negative_binomial.rs b/rand_distr/src/negative_binomial.rs new file mode 100644 index 00000000000..f6ec5fc20b5 --- /dev/null +++ b/rand_distr/src/negative_binomial.rs @@ -0,0 +1,238 @@ +//! The negative binomial distribution. + +use crate::{Distribution, Exp1, Gamma, Open01, Poisson, Standard, StandardNormal}; +use core::fmt; +#[allow(unused_imports)] +use num_traits::{Float, FloatConst}; +use rand::Rng; + +// +// This enum exists because we handle p=1 for NegativeBinomial(r, p). +// When p is 1, the distribution is trivial--the PMF is 1 for k=0 and 0 +// elsewhere. In that case, we don't need to store the Gamma distribution +// instance. +// +#[derive(Copy, Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +enum NegativeBinomialRepr +where + F: Float, + StandardNormal: Distribution, + Exp1: Distribution, + Open01: Distribution, +{ + /// Trivial distribution that always generates 0, for the case p=1. + POne, + + /// The stored Gamma distribution is Gamma(r, (1 - p)/p), where `r` is the + /// number of successes and `p` is the probability of success. + PLessThanOne(Gamma), +} + +/// The negative binomial distribution `NegativeBinomial(r, p)`. +/// +/// The negative binomial distribution is a discrete distribution with two +/// parameters, `r` and `p`. When `r` is an integer, the negative binomial +/// distribution can be interpreted as the distribution of the number of +/// failures in a sequence of Bernoulli trials that continue until `r` +/// successes occur. `p` is the probability of success in a single Bernoulli +/// trial. +/// +/// `NegativeBinomial` accepts floating point types for `r`. This is a +/// generalization of the more common case where `r` is an integer. +/// +/// The density function of this distribution is +/// +/// ```text +/// Γ(k + r) +/// f(k; r, p) = ------------- (1 - p)^k p^r +/// Γ(k + 1) Γ(r) +/// ``` +/// +/// where Γ(x) is the gamma function. +/// +/// When `r` is an integer, the density function is +/// +/// ```text +/// f(k; r, p) = C(k + r - 1, k) (1 - p)^k p^r +/// ``` +/// +/// where `C(m, k)` is the binomial coefficient, read "m choose k". +/// +/// Despite being a discrete distribution, the `sample` method of +/// `NegativeBinomial` returns a floating point type. This is consistent +/// with several other discrete distributions in the `rand_distr` crate. +/// +/// # Example +/// +/// ``` +/// use rand_distr::{Distribution, NegativeBinomial}; +/// +/// let nb = NegativeBinomial::::new(10.0, 0.02).unwrap(); +/// let v = nb.sample(&mut rand::thread_rng()); +/// println!("{} is from a NegativeBinomial(10, 0.02) distribution", v); +/// ``` +#[derive(Copy, Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] +pub struct NegativeBinomial +where + F: Float + FloatConst, + Standard: Distribution, + StandardNormal: Distribution, + Exp1: Distribution, + Open01: Distribution, +{ + repr: NegativeBinomialRepr, +} + +/// Error type returned from `NegativeBinomial::new`. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Error { + /// `r < 0` or `r` is `nan` + InvalidNumberOfSuccesses, + + /// `p <= 0 || p > 1` or `p` is `nan` + InvalidProbability, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Error::InvalidNumberOfSuccesses => "r is NaN or not strictly positive", + Error::InvalidProbability => "p is NaN or outside the interval (0, 1]", + }) + } +} + +#[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] +impl std::error::Error for Error {} + +impl NegativeBinomial +where + F: Float + FloatConst, + Standard: Distribution, + StandardNormal: Distribution, + Exp1: Distribution, + Open01: Distribution, +{ + /// Construct a new `NegativeBinomial` with the given parameters `r` + /// (number of successes) and `p` (probability of success on each trial). + pub fn new(r: F, p: F) -> Result { + if !r.is_finite() || r <= F::zero() { + Err(Error::InvalidNumberOfSuccesses) + } else if !p.is_finite() || p <= F::zero() || p > F::one() { + Err(Error::InvalidProbability) + } else if p == F::one() { + Ok(NegativeBinomial { + repr: NegativeBinomialRepr::POne, + }) + } else { + Ok(NegativeBinomial { + repr: NegativeBinomialRepr::PLessThanOne( + Gamma::::new(r, (F::one() - p) / p).unwrap(), + ), + }) + } + } +} + +impl Distribution for NegativeBinomial +where + F: Float + FloatConst, + Standard: Distribution, + StandardNormal: Distribution, + Exp1: Distribution, + Open01: Distribution, +{ + fn sample(&self, rng: &mut R) -> F { + match &self.repr { + NegativeBinomialRepr::POne => F::zero(), + NegativeBinomialRepr::PLessThanOne(gamma) => { + Poisson::::new(gamma.sample(rng)).unwrap().sample(rng) + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_nb_invalid_p() { + assert!(NegativeBinomial::::new(1.0, core::f64::NAN).is_err()); + assert!(NegativeBinomial::::new(1.0, core::f64::INFINITY).is_err()); + assert!(NegativeBinomial::::new(1.0, core::f64::NEG_INFINITY).is_err()); + + assert!(NegativeBinomial::::new(1.0, -0.5).is_err()); + assert!(NegativeBinomial::::new(1.0, 0.0).is_err()); + assert!(NegativeBinomial::::new(1.0, 0.2).is_ok()); + assert!(NegativeBinomial::::new(1.0, 1.0).is_ok()); + assert!(NegativeBinomial::::new(1.0, 2.0).is_err()); + } + + fn test_nb_mean_and_variance(r: f64, p: f64, rng: &mut R) { + let distr = NegativeBinomial::::new(r, p).unwrap(); + + let expected_mean = r * (1.0 - p) / p; + let expected_variance = r * (1.0 - p) / (p * p); + + let mut results = [0.0; 10000]; + for i in results.iter_mut() { + *i = distr.sample(rng); + } + + let mean = results.iter().sum::() / results.len() as f64; + // This test is equivalent to requiring that the relative error + // is less than 0.01. + assert!((mean - expected_mean).abs() < 0.01 * expected_mean); + + let variance = + results.iter().map(|x| (x - mean) * (x - mean)).sum::() / results.len() as f64; + assert!((variance - expected_variance).abs() < 0.05 * expected_variance); + } + + #[test] + fn test_negative_binomial() { + let mut rng = crate::test::rng(1894736523008208032); + + test_nb_mean_and_variance(8.0, 0.10, &mut rng); + test_nb_mean_and_variance(200.0, 0.10, &mut rng); + test_nb_mean_and_variance(8.0, 0.25, &mut rng); + test_nb_mean_and_variance(4.0, 0.50, &mut rng); + test_nb_mean_and_variance(3.0, 0.75, &mut rng); + test_nb_mean_and_variance(10.0, 0.90, &mut rng); + test_nb_mean_and_variance(1000.0, 0.90, &mut rng); + } + + fn check_p_one_returns_zero() + where + F: Float + FloatConst + core::fmt::Debug, + Standard: Distribution, + StandardNormal: Distribution, + Exp1: Distribution, + Open01: Distribution, + { + // When p is 1, the samples are all 0. + let mut rng = crate::test::rng(1894736523008208032); + let nb = NegativeBinomial::::new(F::one(), F::one()).unwrap(); + for _i in 0..10 { + assert_eq!(nb.sample(&mut rng), F::zero()); + } + } + + #[test] + fn test_p_one_returns_zero() { + check_p_one_returns_zero::(); + check_p_one_returns_zero::(); + } + + #[test] + fn negative_binomial_distributions_can_be_compared() { + assert_eq!( + NegativeBinomial::::new(3.0, 0.25), + NegativeBinomial::::new(3.0, 0.25) + ); + } +} From 93632f2cbb7803d72f10de07504baac6a9dc183f Mon Sep 17 00:00:00 2001 From: warren Date: Sun, 5 Mar 2023 11:36:27 -0500 Subject: [PATCH 2/6] Add value stability tests for NegativeBinomial. --- rand_distr/tests/value_stability.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/rand_distr/tests/value_stability.rs b/rand_distr/tests/value_stability.rs index d3754705db5..18c0bfe5291 100644 --- a/rand_distr/tests/value_stability.rs +++ b/rand_distr/tests/value_stability.rs @@ -91,6 +91,30 @@ fn hypergeometric_stability() { test_samples(7221, Hypergeometric::new(100, 50, 50).unwrap(), &[23, 27, 26, 27, 22, 24, 31, 22]); // Algorithm H2PE } +#[test] +fn negative_binomial_stability() { + test_samples( + 314159265, + NegativeBinomial::::new(25.0, 0.125).unwrap(), + &[212.0, 168.0, 129.0, 86.0, 246.0, 194.0, 218.0, 234.0], + ); + test_samples( + 314159265, + NegativeBinomial::::new(18.0, 1.0).unwrap(), + &[0.0; 8] + ); + test_samples( + 314159265, + NegativeBinomial::::new(25.0, 0.125).unwrap(), + &[224.0, 156.0, 285.0, 171.0, 179.0, 88.0, 176.0, 170.0], + ); + test_samples( + 314159265, + NegativeBinomial::::new(18.0, 1.0).unwrap(), + &[0.0; 8] + ); +} + #[test] fn unit_ball_stability() { test_samples(2, UnitBall, &[ From c4599918f67da2ab8115fb69a76c840659c9f06d Mon Sep 17 00:00:00 2001 From: warren Date: Sun, 5 Mar 2023 23:53:28 -0500 Subject: [PATCH 3/6] Add a comment about the generation of negative binomial variates. --- rand_distr/src/negative_binomial.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rand_distr/src/negative_binomial.rs b/rand_distr/src/negative_binomial.rs index f6ec5fc20b5..bdc387f0fad 100644 --- a/rand_distr/src/negative_binomial.rs +++ b/rand_distr/src/negative_binomial.rs @@ -149,6 +149,14 @@ where match &self.repr { NegativeBinomialRepr::POne => F::zero(), NegativeBinomialRepr::PLessThanOne(gamma) => { + // The method used here is to generate a Gamma(r, (1-p)/p) + // variate, and use it as the parameter of Poisson. See, + // for example, section X.4.7 of the text *Non-Uniform Random + // Variate Generation* by Luc Devroye (Springer-Verlag, 1986). + // The gamma distribution was created in the `new()` method + // and saved in the NegativeBinomial instance, because it + // depends on just the parameters `r` and `p`. We have to + // create a new Poisson instance for each variate generated. Poisson::::new(gamma.sample(rng)).unwrap().sample(rng) } } From 43f3d66b9128b8bd0193defad69eed8f9ca919a6 Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Wed, 3 May 2023 12:00:40 -0400 Subject: [PATCH 4/6] Simplify expression that checks for invalid p. Co-authored-by: Vinzent Steinberg --- rand_distr/src/negative_binomial.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rand_distr/src/negative_binomial.rs b/rand_distr/src/negative_binomial.rs index bdc387f0fad..db0e3c4f22f 100644 --- a/rand_distr/src/negative_binomial.rs +++ b/rand_distr/src/negative_binomial.rs @@ -121,7 +121,7 @@ where pub fn new(r: F, p: F) -> Result { if !r.is_finite() || r <= F::zero() { Err(Error::InvalidNumberOfSuccesses) - } else if !p.is_finite() || p <= F::zero() || p > F::one() { + } else if !(p > F::zero() && p <= F::one()) { Err(Error::InvalidProbability) } else if p == F::one() { Ok(NegativeBinomial { From cedbd8343f425217058e82c15126ef51f9d511ea Mon Sep 17 00:00:00 2001 From: warren Date: Wed, 3 May 2023 12:09:16 -0400 Subject: [PATCH 5/6] Add a comment that a validation expression also catches nan. --- rand_distr/src/negative_binomial.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rand_distr/src/negative_binomial.rs b/rand_distr/src/negative_binomial.rs index db0e3c4f22f..52964b6f48e 100644 --- a/rand_distr/src/negative_binomial.rs +++ b/rand_distr/src/negative_binomial.rs @@ -122,6 +122,7 @@ where if !r.is_finite() || r <= F::zero() { Err(Error::InvalidNumberOfSuccesses) } else if !(p > F::zero() && p <= F::one()) { + // This also catches p = nan implicitly. Err(Error::InvalidProbability) } else if p == F::one() { Ok(NegativeBinomial { From 764394394be42fb3d08b808ed6e40e365a908644 Mon Sep 17 00:00:00 2001 From: warren Date: Wed, 3 May 2023 12:15:11 -0400 Subject: [PATCH 6/6] rand_distr: Update CHANGELOG.md: new NegativeBinomial distribution. --- rand_distr/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/rand_distr/CHANGELOG.md b/rand_distr/CHANGELOG.md index d1da24a5a74..af08c3bb4fe 100644 --- a/rand_distr/CHANGELOG.md +++ b/rand_distr/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.5.0] - unreleased +- New `NegativeBinomial` distribution (#1296) - Remove unused fields from `Gamma`, `NormalInverseGaussian` and `Zipf` distributions (#1184) This breaks serialization compatibility with older versions. - Upgrade Rand