python · gpshead · Aug 29, 2023 · gpshead · Aug 29, 2023
diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
@@ -830,8 +830,12 @@ of applications in statistics.
 
        .. versionchanged:: 3.13
 
-       Switched to a faster algorithm.  To reproduce samples from previous
-       versions, use :func:`random.seed` and :func:`random.gauss`.
+       The *use_gauss* keyword argument was added to facilitate a switch to a
+       faster algorithm.  The faster algorithm is used by default when no
+       *seed* is supplied.  The previous, :func:`random.gauss` based, slower
+       algorithm is used when a *seed* is provided in order to preserve
+       reproducability between Python versions.  To always use the faster
+       algorithm even when supplying *seed*, pass ``use_gauss=False``.
 
     .. method:: NormalDist.pdf(x)
 

diff --git a/Lib/statistics.py b/Lib/statistics.py
@@ -1135,7 +1135,7 @@ def linear_regression(x, y, /, *, proportional=False):
     >>> noise = NormalDist().samples(5, seed=42)
     >>> y = [3 * x[i] + 2 + noise[i] for i in range(5)]
     >>> linear_regression(x, y)  #doctest: +ELLIPSIS
-    LinearRegression(slope=3.17495..., intercept=1.00925...)
+    LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
 
     If *proportional* is true, the independent variable *x* and the
     dependent variable *y* are assumed to be directly proportional.
@@ -1148,7 +1148,7 @@ def linear_regression(x, y, /, *, proportional=False):
 
     >>> y = [3 * x[i] + noise[i] for i in range(5)]
     >>> linear_regression(x, y, proportional=True)  #doctest: +ELLIPSIS
-    LinearRegression(slope=2.90475..., intercept=0.0)
+    LinearRegression(slope=3.02447542484..., intercept=0.0)
 
     """
     n = len(x)
@@ -1277,8 +1277,13 @@ def from_samples(cls, data):
         "Make a normal distribution instance from sample data."
         return cls(*_mean_stdev(data))
 
-    def samples(self, n, *, seed=None):
-        "Generate *n* samples for a given mean and standard deviation."
+    def samples(self, n, *, seed=None, use_gauss=None):
+        """Generate *n* samples for a given mean and standard deviation."""
+        if ((seed is not None and use_gauss is None) or use_gauss):
+            # This is the Python <= 3.12 behavior (slower, different results).
+            gauss = random.gauss if seed is None else random.Random(seed).gauss
+            mu, sigma = self._mu, self._sigma
+            return [gauss(mu, sigma) for _ in repeat(None, n)]
         rnd = random.random if seed is None else random.Random(seed).random
         inv_cdf = _normal_dist_inv_cdf
         mu = self._mu

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
@@ -2769,6 +2769,14 @@ def test_sample_generation(self):
         xbar = self.module.mean(data)
         self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
 
+        # Ensure the <=3.12 legacy implementation continues working as well.
+        data = X.samples(n, use_gauss=True)
+        self.assertEqual(len(data), n)
+        self.assertEqual(set(map(type, data)), {float})
+        # mean(data) expected to fall within 8 standard deviations
+        xbar = self.module.mean(data)
+        self.assertTrue(mu - sigma*8 <= xbar <= mu + sigma*8)
+
         # verify that seeding makes reproducible sequences
         n = 100
         data1 = X.samples(n, seed='happiness and joy')
@@ -2779,6 +2787,17 @@ def test_sample_generation(self):
         self.assertEqual(data2, data4)
         self.assertNotEqual(data1, data2)
 
+        # Verify that seeding makes reproducible sequences with the faster
+        # 3.13+ implementation as well.
+        n = 100
+        data1 = X.samples(n, seed='happiness and joy', use_gauss=False)
+        data2 = X.samples(n, seed='trouble and despair', use_gauss=False)
+        data3 = X.samples(n, seed='happiness and joy', use_gauss=False)
+        data4 = X.samples(n, seed='trouble and despair', use_gauss=False)
+        self.assertEqual(data1, data3)
+        self.assertEqual(data2, data4)
+        self.assertNotEqual(data1, data2)
+
     def test_pdf(self):
         NormalDist = self.module.NormalDist
         X = NormalDist(100, 15)

diff --git a/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst b/Misc/NEWS.d/next/Library/2023-08-22-12-05-47.gh-issue-108322.kf3NJX.rst
@@ -1,2 +1,7 @@
-Speed-up NormalDist.samples() by using the inverse CDF method instead of
-calling random.gauss().
+Speed-up :meth:`statistics.NormalDist.samples` by using the inverse CDF method
+instead of calling :func:`random.gauss`.  When an explicit ``seed=`` is
+specified the original slower gauss based results remain the default to avoid
+introducing behavior differences between Python versions where people expect a
+consistent unchanging set of results.  Users can pass the new
+``use_gauss=False`` parameter along with ``seed=`` for better performance when
+using a fixed seed.