From ed06ec1ab851544234138952d357facb32eba6c5 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 13 Jul 2022 09:46:04 -0500 Subject: [PATCH] GH-81620: Add random.binomialvariate() (GH-94719) --- Doc/library/random.rst | 31 ++++-- Lib/random.py | 95 ++++++++++++++++++- Lib/test/test_random.py | 56 +++++++++++ ...2-07-09-15-17-02.gh-issue-81620.L0O_bV.rst | 1 + 4 files changed, 175 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 613fbce0fdf20d..78c2b030d6095f 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -258,6 +258,28 @@ Functions for sequences The *population* must be a sequence. Automatic conversion of sets to lists is no longer supported. +Discrete distributions +---------------------- + +The following function generates a discrete distribution. + +.. function:: binomialvariate(n=1, p=0.5) + + `Binomial distribution + `_. + Return the number of successes for *n* independent trials with the + probability of success in each trial being *p*: + + Mathematically equivalent to:: + + sum(random() < p for i in range(n)) + + The number of trials *n* should be a non-negative integer. + The probability of success *p* should be between ``0.0 <= p <= 1.0``. + The result is an integer in the range ``0 <= X <= n``. + + .. versionadded:: 3.12 + .. _real-valued-distributions: @@ -452,16 +474,13 @@ Simulations:: >>> # Deal 20 cards without replacement from a deck >>> # of 52 playing cards, and determine the proportion of cards >>> # with a ten-value: ten, jack, queen, or king. - >>> dealt = sample(['tens', 'low cards'], counts=[16, 36], k=20) - >>> dealt.count('tens') / 20 + >>> deal = sample(['tens', 'low cards'], counts=[16, 36], k=20) + >>> deal.count('tens') / 20 0.15 >>> # Estimate the probability of getting 5 or more heads from 7 spins >>> # of a biased coin that settles on heads 60% of the time. - >>> def trial(): - ... return choices('HT', cum_weights=(0.60, 1.00), k=7).count('H') >= 5 - ... - >>> sum(trial() for i in range(10_000)) / 10_000 + >>> sum(binomialvariate(n=7, p=0.6) >= 5 for i in range(10_000)) / 10_000 0.4169 >>> # Probability of the median of 5 samples being in middle two quartiles diff --git a/Lib/random.py b/Lib/random.py index 2166474af0554b..00849bd7e732fb 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -24,6 +24,7 @@ negative exponential gamma beta + binomial pareto Weibull @@ -49,6 +50,7 @@ from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from math import tau as TWOPI, floor as _floor, isfinite as _isfinite +from math import lgamma as _lgamma, fabs as _fabs from os import urandom as _urandom from _collections_abc import Sequence as _Sequence from operator import index as _index @@ -68,6 +70,7 @@ "Random", "SystemRandom", "betavariate", + "binomialvariate", "choice", "choices", "expovariate", @@ -725,6 +728,91 @@ def betavariate(self, alpha, beta): return y / (y + self.gammavariate(beta, 1.0)) return 0.0 + + def binomialvariate(self, n=1, p=0.5): + """Binomial random variable. + + Gives the number of successes for *n* independent trials + with the probability of success in each trial being *p*: + + sum(random() < p for i in range(n)) + + Returns an integer in the range: 0 <= X <= n + + """ + # Error check inputs and handle edge cases + if n < 0: + raise ValueError("n must be non-negative") + if p <= 0.0 or p >= 1.0: + if p == 0.0: + return 0 + if p == 1.0: + return n + raise ValueError("p must be in the range 0.0 <= p <= 1.0") + + random = self.random + + # Fast path for a common case + if n == 1: + return _index(random() < p) + + # Exploit symmetry to establish: p <= 0.5 + if p > 0.5: + return n - self.binomialvariate(n, 1.0 - p) + + if n * p < 10.0: + # BG: Geometric method by Devroye with running time of O(np). + # https://dl.acm.org/doi/pdf/10.1145/42372.42381 + x = y = 0 + c = _log(1.0 - p) + if not c: + return x + while True: + y += _floor(_log(random()) / c) + 1 + if y > n: + return x + x += 1 + + # BTRS: Transformed rejection with squeeze method by Wolfgang Hörmann + # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf + assert n*p >= 10.0 and p <= 0.5 + setup_complete = False + + spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution + b = 1.15 + 2.53 * spq + a = -0.0873 + 0.0248 * b + 0.01 * p + c = n * p + 0.5 + vr = 0.92 - 4.2 / b + + while True: + + u = random() + v = random() + u -= 0.5 + us = 0.5 - _fabs(u) + k = _floor((2.0 * a / us + b) * u + c) + if k < 0 or k > n: + continue + + # The early-out "squeeze" test substantially reduces + # the number of acceptance condition evaluations. + if us >= 0.07 and v <= vr: + return k + + # Acceptance-rejection test. + # Note, the original paper errorneously omits the call to log(v) + # when comparing to the log of the rescaled binomial distribution. + if not setup_complete: + alpha = (2.83 + 5.1 / b) * spq + lpq = _log(p / (1.0 - p)) + m = _floor((n + 1) * p) # Mode of the distribution + h = _lgamma(m + 1) + _lgamma(n - m + 1) + setup_complete = True # Only needs to be done once + v *= alpha / (a / (us * us) + b) + if _log(v) <= h - _lgamma(k + 1) - _lgamma(n - k + 1) + (k - m) * lpq: + return k + + def paretovariate(self, alpha): """Pareto distribution. alpha is the shape parameter.""" # Jain, pg. 495 @@ -810,6 +898,7 @@ def _notimplemented(self, *args, **kwds): gammavariate = _inst.gammavariate gauss = _inst.gauss betavariate = _inst.betavariate +binomialvariate = _inst.binomialvariate paretovariate = _inst.paretovariate weibullvariate = _inst.weibullvariate getstate = _inst.getstate @@ -834,15 +923,17 @@ def _test_generator(n, func, args): low = min(data) high = max(data) - print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}') + print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}{args!r}') print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high)) -def _test(N=2000): +def _test(N=10_000): _test_generator(N, random, ()) _test_generator(N, normalvariate, (0.0, 1.0)) _test_generator(N, lognormvariate, (0.0, 1.0)) _test_generator(N, vonmisesvariate, (0.0, 1.0)) + _test_generator(N, binomialvariate, (15, 0.60)) + _test_generator(N, binomialvariate, (100, 0.75)) _test_generator(N, gammavariate, (0.01, 1.0)) _test_generator(N, gammavariate, (0.1, 1.0)) _test_generator(N, gammavariate, (0.1, 2.0)) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index fcf17a949c2a62..1e825c3572d20a 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -1045,6 +1045,9 @@ def test_constant(self): (g.lognormvariate, (0.0, 0.0), 1.0), (g.lognormvariate, (-float('inf'), 0.0), 0.0), (g.normalvariate, (10.0, 0.0), 10.0), + (g.binomialvariate, (0, 0.5), 0), + (g.binomialvariate, (10, 0.0), 0), + (g.binomialvariate, (10, 1.0), 10), (g.paretovariate, (float('inf'),), 1.0), (g.weibullvariate, (10.0, float('inf')), 10.0), (g.weibullvariate, (0.0, 10.0), 0.0), @@ -1052,6 +1055,59 @@ def test_constant(self): for i in range(N): self.assertEqual(variate(*args), expected) + def test_binomialvariate(self): + B = random.binomialvariate + + # Cover all the code paths + with self.assertRaises(ValueError): + B(n=-1) # Negative n + with self.assertRaises(ValueError): + B(n=1, p=-0.5) # Negative p + with self.assertRaises(ValueError): + B(n=1, p=1.5) # p > 1.0 + self.assertEqual(B(10, 0.0), 0) # p == 0.0 + self.assertEqual(B(10, 1.0), 10) # p == 1.0 + self.assertTrue(B(1, 0.3) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.9) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.0) in {0}) # n == 1 fast path + self.assertTrue(B(1, 1.0) in {1}) # n == 1 fast path + + # BG method p <= 0.5 and n*p=1.25 + self.assertTrue(B(5, 0.25) in set(range(6))) + + # BG method p >= 0.5 and n*(1-p)=1.25 + self.assertTrue(B(5, 0.75) in set(range(6))) + + # BTRS method p <= 0.5 and n*p=25 + self.assertTrue(B(100, 0.25) in set(range(101))) + + # BTRS method p > 0.5 and n*(1-p)=25 + self.assertTrue(B(100, 0.75) in set(range(101))) + + # Statistical tests chosen such that they are + # exceedingly unlikely to ever fail for correct code. + + # BG code path + # Expected dist: [31641, 42188, 21094, 4688, 391] + c = Counter(B(4, 0.25) for i in range(100_000)) + self.assertTrue(29_641 <= c[0] <= 33_641, c) + self.assertTrue(40_188 <= c[1] <= 44_188) + self.assertTrue(19_094 <= c[2] <= 23_094) + self.assertTrue(2_688 <= c[3] <= 6_688) + self.assertEqual(set(c), {0, 1, 2, 3, 4}) + + # BTRS code path + # Sum of c[20], c[21], c[22], c[23], c[24] expected to be 36,214 + c = Counter(B(100, 0.25) for i in range(100_000)) + self.assertTrue(34_214 <= c[20]+c[21]+c[22]+c[23]+c[24] <= 38_214) + self.assertTrue(set(c) <= set(range(101))) + self.assertEqual(c.total(), 100_000) + + # Demonstrate the BTRS works for huge values of n + self.assertTrue(19_000_000 <= B(100_000_000, 0.2) <= 21_000_000) + self.assertTrue(89_000_000 <= B(100_000_000, 0.9) <= 91_000_000) + + def test_von_mises_range(self): # Issue 17149: von mises variates were not consistently in the # range [0, 2*PI]. diff --git a/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst b/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst new file mode 100644 index 00000000000000..b4ccea4924ff67 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-09-15-17-02.gh-issue-81620.L0O_bV.rst @@ -0,0 +1 @@ +Add random.binomialvariate().