From 0459ecf765bfe66d210b311af591a26a700a6495 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 25 Jan 2020 19:25:53 -0800 Subject: [PATCH 1/2] bpo-36018: Add another example for NormalDist() --- Doc/library/statistics.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 4c7239c1895fbf..86f166c90db017 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -772,6 +772,42 @@ Carlo simulation `_: >>> quantiles(map(model, X, Y, Z)) # doctest: +SKIP [1.4591308524824727, 1.8035946855390597, 2.175091447274739] +Normal distributions can be used to approximate `Binomial +distributions ` +when the sample size is large and when the probability of a successful +trial is near 50%. + +For example, an open source conference has 750 attendees and two rooms with a +500 person capacity. There is a talk about Python and another about Ruby. +In previous conferences, 65% of the attendees preferred to listen to Python +talks. Assuming the population preferences haven't changed, what is the +probability that the rooms will stay within their capacity limits? + +.. doctest:: + + >>> n = 750 # Sample size + >>> p = 0.65 # Preference for Python + >>> q = 1.0 - p # Preference for Ruby + >>> k = 500 # Room capacity + + >>> # Approximation using the cumulative normal distribution + >>> from math import sqrt + >>> round(NormalDist(mu=n*p, sigma=sqrt(n*p*q)).cdf(k + 0.5), 4) + 0.8402 + + >>> # Solution using the cumulative binomial distribution + >>> from math import comb, fsum + >>> round(fsum(comb(n, r) * p**r * q**(n-r) for r in range(k+1)), 4) + 0.8402 + + >>> # Approximation using a simulation + >>> from random import seed, choices + >>> seed(8675309) + >>> def trial(): + ... return choices(('Python', 'Ruby'), (p, q), k=n).count('Python') + >>> mean(trial() <= k for i in range(10_000)) + 0.8398 + Normal distributions commonly arise in machine learning problems. Wikipedia has a `nice example of a Naive Bayesian Classifier From 2c959620f44f6ba4fd5163baea6a915365dc2db2 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 25 Jan 2020 19:44:21 -0800 Subject: [PATCH 2/2] Fix markup --- Doc/library/statistics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 86f166c90db017..09b02cabf21f8e 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -773,7 +773,7 @@ Carlo simulation `_: [1.4591308524824727, 1.8035946855390597, 2.175091447274739] Normal distributions can be used to approximate `Binomial -distributions ` +distributions `_ when the sample size is large and when the probability of a successful trial is near 50%.