From 042aa88bcc6541cb8b312f1119452f7a58a5b4df Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 27 Aug 2023 08:59:40 -0500 Subject: [PATCH] gh-108322: Optimize statistics.NormalDist.samples() (gh-108324) --- Doc/library/statistics.rst | 5 +++++ Lib/statistics.py | 12 +++++++----- .../2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst | 2 ++ 3 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 483ebea67f0c6d..368b2a17cef997 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -828,6 +828,11 @@ of applications in statistics. number generator. This is useful for creating reproducible results, even in a multi-threading context. + .. versionchanged:: 3.13 + + Switched to a faster algorithm. To reproduce samples from previous + versions, use :func:`random.seed` and :func:`random.gauss`. + .. method:: NormalDist.pdf(x) Using a `probability density function (pdf) diff --git a/Lib/statistics.py b/Lib/statistics.py index a8036e9928c464..96c803483057e7 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -1135,7 +1135,7 @@ def linear_regression(x, y, /, *, proportional=False): >>> noise = NormalDist().samples(5, seed=42) >>> y = [3 * x[i] + 2 + noise[i] for i in range(5)] >>> linear_regression(x, y) #doctest: +ELLIPSIS - LinearRegression(slope=3.09078914170..., intercept=1.75684970486...) + LinearRegression(slope=3.17495..., intercept=1.00925...) If *proportional* is true, the independent variable *x* and the dependent variable *y* are assumed to be directly proportional. @@ -1148,7 +1148,7 @@ def linear_regression(x, y, /, *, proportional=False): >>> y = [3 * x[i] + noise[i] for i in range(5)] >>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS - LinearRegression(slope=3.02447542484..., intercept=0.0) + LinearRegression(slope=2.90475..., intercept=0.0) """ n = len(x) @@ -1279,9 +1279,11 @@ def from_samples(cls, data): def samples(self, n, *, seed=None): "Generate *n* samples for a given mean and standard deviation." - gauss = random.gauss if seed is None else random.Random(seed).gauss - mu, sigma = self._mu, self._sigma - return [gauss(mu, sigma) for _ in repeat(None, n)] + rnd = random.random if seed is None else random.Random(seed).random + inv_cdf = _normal_dist_inv_cdf + mu = self._mu + sigma = self._sigma + return [inv_cdf(rnd(), mu, sigma) for _ in repeat(None, n)] def pdf(self, x): "Probability density function. P(x <= X < x+dx) / dx" diff --git a/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst b/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst new file mode 100644 index 00000000000000..5416c01a43f113 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst @@ -0,0 +1,2 @@ +Speed-up NormalDist.samples() by using the inverse CDF method instead of +calling random.gauss().