From 035120eeab6ef25e44db2b043a2ae64a44fc5055 Mon Sep 17 00:00:00 2001
From: Hans Dembinski <hans.dembinski@gmail.com>
Date: Tue, 1 Mar 2022 20:54:22 +0100
Subject: [PATCH] fix docs, remove precision keyword from tests

---
 README.rst                  |  5 +--
 doc/bootstrap.rst           |  2 --
 doc/changelog.rst           | 15 ++++-----
 doc/empirical.rst           |  2 --
 doc/index.rst               |  8 ++---
 doc/jackknife.rst           |  2 --
 doc/permutation.rst         |  2 --
 doc/reference.rst           | 26 +++++++++++++++
 src/resample/permutation.py | 63 ++++++++++++-------------------------
 tests/test_permutation.py   | 57 +++++++++------------------------
 10 files changed, 74 insertions(+), 108 deletions(-)
 delete mode 100644 doc/bootstrap.rst
 delete mode 100644 doc/empirical.rst
 delete mode 100644 doc/jackknife.rst
 delete mode 100644 doc/permutation.rst
 create mode 100644 doc/reference.rst

diff --git a/README.rst b/README.rst
index 0ed2874e..7a58bda3 100644
--- a/README.rst
+++ b/README.rst
@@ -23,8 +23,9 @@ Randomisation-based inference in Python based on data resampling and permutation
 Features
 --------
 
-- Bootstrap samples (ordinary or balanced with optional stratification) from N-D arrays
-- Apply parametric bootstrap (Gaussian, Poisson, gamma, etc.) on samples
+- Bootstrap samples (ordinary or balanced with optional stratification)
+- Support for parametric (Gaussian, Poisson, gamma, etc.) and extended
+  bootstrapping (also varies sample size)
 - Compute bootstrap confidence intervals (percentile or BCa) for any estimator
 - Jackknife estimates of bias and variance of any estimator
 - Permutation-based variants of traditional statistical tests (**USP test of independence** and others)
diff --git a/doc/bootstrap.rst b/doc/bootstrap.rst
deleted file mode 100644
index 29a48a95..00000000
--- a/doc/bootstrap.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-.. automodule:: resample.bootstrap
-    :members:
diff --git a/doc/changelog.rst b/doc/changelog.rst
index b08a337c..e28b02f8 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -1,21 +1,22 @@
 Changelog
 =========
 
-1.5.0 (February 25, 2022)
+1.5.0 (March 1, 2022)
 -------------------------
 
 This is an API breaking release. The backward-incompatible changes are limited to the
 ``resample.permutation`` submodule. Other modules are not affected.
 
-Warning: It was discovered that most tests implemented in ``resample.permutation`` had
+Warning: It was discovered that the tests implemented in ``resample.permutation`` had
 various issues and gave wrong results, so any results obtained with these tests should
 be revised. Since the old implementations were broken anyway, the API of
-``resample.permutation`` was altered to fix some design issues.
+``resample.permutation`` was altered to fix some design issues as well.
 
-This package now requires compiling a C extension. This is needed for the computation
-of the new USP test. This makes the installation of this package less convenient, since
-now a C compiler is required on the target machine. The plan is to move the compiled
-code to SciPy, which would allows us to drop the C extension again in the future.
+Installing resample now requires compiling a C extension. This is needed for the
+computation of the new USP test. This makes the installation of this package less
+convenient, since now a C compiler is required on the target machine (or we have to
+start building binary wheels). The plan is to move the compiled code to SciPy, which
+would allows us to drop the C extension again in the future.
 
 New features
 ~~~~~~~~~~~~
diff --git a/doc/empirical.rst b/doc/empirical.rst
deleted file mode 100644
index b528802e..00000000
--- a/doc/empirical.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-.. automodule:: resample.empirical
-    :members:
diff --git a/doc/index.rst b/doc/index.rst
index 4b9b8a74..d0a9f44c 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,11 +7,7 @@
   :start-after: skip-marker-do-not-remove
 
 .. toctree::
-   :hidden:
-   :maxdepth: 1
+   :maxdepth: 2
 
-   bootstrap.rst
-   jackknife.rst
-   permutation.rst
-   empirical.rst
+   reference.rst
    changelog.rst
diff --git a/doc/jackknife.rst b/doc/jackknife.rst
deleted file mode 100644
index 3a9b4223..00000000
--- a/doc/jackknife.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-.. automodule:: resample.jackknife
-    :members:
diff --git a/doc/permutation.rst b/doc/permutation.rst
deleted file mode 100644
index e587da5d..00000000
--- a/doc/permutation.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-.. automodule:: resample.permutation
-    :members:
diff --git a/doc/reference.rst b/doc/reference.rst
new file mode 100644
index 00000000..e4bf8036
--- /dev/null
+++ b/doc/reference.rst
@@ -0,0 +1,26 @@
+Reference
+=========
+
+bootstrap
+---------
+
+.. automodule:: resample.bootstrap
+    :members:
+
+jackknife
+---------
+
+.. automodule:: resample.jackknife
+    :members:
+
+permutation
+-----------
+
+.. automodule:: resample.permutation
+    :members:
+
+empirical
+---------
+
+.. automodule:: resample.empirical
+    :members:
diff --git a/src/resample/permutation.py b/src/resample/permutation.py
index 444fda18..a9de8e84 100644
--- a/src/resample/permutation.py
+++ b/src/resample/permutation.py
@@ -91,8 +91,7 @@ def __getitem__(self, idx):
 def usp(
     w: _ArrayLike,
     *,
-    precision: float = 0.01,
-    max_size: int = 9999,
+    size: int = 9999,
     method: str = "auto",
     random_state: _tp.Optional[_tp.Union[np.random.Generator, int]] = None,
 ):
@@ -113,36 +112,28 @@ def usp(
     w : array-like
         Two-dimensional array which represents the counts in a histogram. The counts
         can be of floating point type, but must have integral values.
-    precision : float, optional
-        Target precision (statistical) for the p-value. Used to compute the minimum
-        number of permutations needed to reach the target precision. The minimum of this
-        estimate and max_size is used. If precision is zero, max_size permutations are
-        used. Default 0.01.
-    max_size : int, optional
-        Maximum number of permutations. Default 9999.
+    size : int, optional
+        Number of permutations. Default 9999.
     method : str, optional
         Method used to generate random tables under the null hypothesis.
         'auto': Uses a heuristic to select the fastest algorithm for the given table.
         'shuffle': A shuffling algorithm, which requires extra space to store N + 1
-            integers for N entries in total and has O(N) time complexity. It performs
-            poorly when N is large, but does not depend on the number of table cells.
+        integers for N entries in total and has O(N) time complexity. It performs
+        poorly when N is large, but does not depend on the number of K table cells.
         'patefield': Patefield's algorithm, which does not require extra space and
-            has O(K log(N)) time complexity. It performs well even if N is huge. For
-            small N and large K, the shuffling algorithm is faster.
+        has O(K log(N)) time complexity. It performs well even if N is huge. For
+        small N and large K, the shuffling algorithm is faster.
         Default is 'auto'.
     random_state : numpy.random.Generator or int, optional
         Random number generator instance. If an integer is passed, seed the numpy
-        default generator with it. Default is to use `numpy.random.default_rng()`.
+        default generator with it. Default is to use ``numpy.random.default_rng()``.
 
     Returns
     -------
     TestResult
     """
-    if precision < 0:
-        raise ValueError("precision cannot be negative")
-
-    if max_size <= 0:
-        raise ValueError("max_size must be positive")
+    if size <= 0:
+        raise ValueError("size must be positive")
 
     methods = {"auto": 0, "shuffle": 1, "patefield": 2}
     imethod = methods.get(method, -1)
@@ -173,19 +164,15 @@ def usp(
 
     t = _usp(f1, f2, w, m)
 
-    # For Type I error probabilities to hold theoretically, the number of permutation
-    # samples drawn may not depend on the data (comment by Richard Samworth).
-    # So we compute the required number of samples with the worst-case p=0.5.
-    n = min(max_size, int(0.25 / precision**2)) if precision > 0 else max_size
-    ts = np.empty(n)
-    for b, w in enumerate(_util.rcont(n, r, c, imethod - 1, rng)):
+    ts = np.empty(size)
+    for b, w in enumerate(_util.rcont(size, r, c, imethod - 1, rng)):
         # m stays the same, since r and c remain unchanged
         ts[b] = _usp(f1, f2, w, m)
 
     # Thomas B. Berrett, Ioannis Kontoyiannis, Richard J. Samworth
     # Ann. Statist. 49(5): 2457-2490 (October 2021). DOI: 10.1214/20-AOS2041
     # Eq. 5 says we need to add 1 to n_pass and n_total
-    pvalue = (np.sum(t <= ts) + 1) / (n + 1)
+    pvalue = (np.sum(t <= ts) + 1) / (size + 1)
 
     return TestResult(t, pvalue, ts)
 
@@ -201,8 +188,7 @@ def same_population(
     y: _ArrayLike,
     *args: _ArrayLike,
     transform: _tp.Optional[_tp.Callable] = None,
-    precision: float = 0.01,
-    max_size: int = 9999,
+    size: int = 9999,
     random_state: _tp.Optional[_tp.Union[np.random.Generator, int]] = None,
 ) -> np.ndarray:
     """
@@ -233,13 +219,8 @@ def same_population(
     transform : Callable, optional
         Function with signature f(x) for the test statistic to turn it into a measure of
         deviation. Must be vectorised.
-    precision : float, optional
-        Target precision (statistical) for the p-value. Used to compute the minimum
-        number of permutations needed to reach the target precision. The minimum of this
-        estimate and max_size is used. If precision is zero, max_size permutations are
-        used. Default 0.01.
-    max_size : int, optional
-        Maximum number of permutations. Default 9999.
+    size : int, optional
+        Number of permutations. Default 9999.
     random_state : numpy.random.Generator or int, optional
         Random number generator instance. If an integer is passed, seed the numpy
         default generator with it. Default is to use `numpy.random.default_rng()`.
@@ -248,10 +229,7 @@ def same_population(
     -------
     TestResult
     """
-    if precision < 0:
-        raise ValueError("precision cannot be negative")
-
-    if max_size <= 0:
+    if size <= 0:
         raise ValueError("max_size must be positive")
 
     rng = _util.normalize_rng(random_state)
@@ -283,9 +261,8 @@ def same_population(
     joined_sample = np.concatenate(args)
 
     # For algorithm below, see comment in usp function.
-    n = min(max_size, int(0.25 / precision**2)) if precision > 0 else max_size
-    ts = np.empty(n)
-    for b in range(n):
+    ts = np.empty(size)
+    for b in range(size):
         rng.shuffle(joined_sample)
         ts[b] = fn(*(joined_sample[sl] for sl in slices))
     if transform is None:
@@ -295,7 +272,7 @@ def same_population(
         u = transform(t)
         us = transform(ts)
     # see usp for why we need to add 1 to both numerator and denominator
-    pvalue = (np.sum(u <= us) + 1) / (n + 1)
+    pvalue = (np.sum(u <= us) + 1) / (size + 1)
 
     return TestResult(t, pvalue, ts)
 
diff --git a/tests/test_permutation.py b/tests/test_permutation.py
index bfb37171..5b131615 100644
--- a/tests/test_permutation.py
+++ b/tests/test_permutation.py
@@ -64,7 +64,7 @@ def test_two_sample_same_size(test_name, size, rng):
 
     for a, b in ((x, y), (y, x)):
         expected = scipy_test(a, b)
-        got = test(a, b, max_size=500, random_state=1)
+        got = test(a, b, size=999, random_state=1)
         assert_allclose(expected[0], got[0])
         assert_allclose(expected[1], got[1], atol={10: 0.2, 100: 0.02}[size])
 
@@ -94,7 +94,7 @@ def test_two_sample_different_size(test_name, size, rng):
 
     for a, b in ((x, y), (y, x)):
         expected = scipy_test(a, b)
-        got = test(a, b, max_size=500, random_state=1)
+        got = test(a, b, size=999, random_state=1)
         assert_allclose(expected[0], got[0])
         assert_allclose(expected[1], got[1], atol=5e-2)
 
@@ -117,7 +117,7 @@ def test_three_sample_same_size(test_name, size, rng):
 
     for a, b, c in ((x, y, z), (z, y, x)):
         expected = scipy_test(a, b, c)
-        got = test(a, b, c, max_size=500, random_state=1)
+        got = test(a, b, c, size=999, random_state=1)
         assert_allclose(expected[0], got[0])
         assert_allclose(expected[1], got[1], atol=5e-2)
 
@@ -140,7 +140,7 @@ def test_three_sample_different_size(test_name, size, rng):
 
     for a, b, c in ((x, y, z), (z, y, x)):
         expected = scipy_test(a, b, c)
-        got = test(a, b, c, max_size=500, random_state=1)
+        got = test(a, b, c, size=500, random_state=1)
         assert_allclose(expected[0], got[0])
         assert_allclose(expected[1], got[1], atol=5e-2)
 
@@ -156,7 +156,7 @@ def test_usp_1(method, rng):
     y = rng.normal(1, 3, size=100)
 
     w = np.histogram2d(x, y, bins=(5, 10))[0]
-    r = perm.usp(w, max_size=100, random_state=1)
+    r = perm.usp(w, size=100, random_state=1)
     assert r.pvalue > 0.05
 
 
@@ -166,7 +166,7 @@ def test_usp_2(method, rng):
 
     w = np.histogram2d(x, x, range=((-5, 5), (-5, 5)))[0]
 
-    r = perm.usp(w, method=method, max_size=99, random_state=1)
+    r = perm.usp(w, method=method, size=99, random_state=1)
     assert r.pvalue == 0.01
 
 
@@ -191,8 +191,8 @@ def test_usp_3(method, rng):
 def test_usp_4(method):
     # table1 from https://doi.org/10.1098/rspa.2021.0549
     w = [[18, 36, 21, 9, 6], [12, 36, 45, 36, 21], [6, 9, 9, 3, 3], [3, 9, 9, 6, 3]]
-    r1 = perm.usp(w, precision=0, method=method, max_size=10000, random_state=1)
-    r2 = perm.usp(np.transpose(w), method=method, max_size=1, random_state=1)
+    r1 = perm.usp(w, method=method, size=9999, random_state=1)
+    r2 = perm.usp(np.transpose(w), method=method, size=1, random_state=1)
     assert_allclose(r1.statistic, r2.statistic)
     expected = 0.004106  # checked against USP R package
     assert_allclose(r1.statistic, expected, atol=1e-6)
@@ -207,16 +207,16 @@ def test_usp_5(method, rng):
     for i in range(100):
         for j in range(100):
             w[i, j] = (i + j) % 2
-    r = perm.usp(w, method=method, max_size=100, random_state=1)
+    r = perm.usp(w, method=method, size=99, random_state=1)
     assert r.pvalue > 0.1
 
 
 def test_usp_bias(rng):
     # We compute the p-value as an upper limit to the type I error rate.
-    # Therefore, the p-value is not unbiased. For max_size=1, we expect
+    # Therefore, the p-value is not unbiased. For size=1, we expect
     # an average p-value = (1 + 0.5) / (1 + 1) = 0.75
     got = [
-        perm.usp(rng.poisson(1000, size=(2, 2)), max_size=1, random_state=i).pvalue
+        perm.usp(rng.poisson(1000, size=(2, 2)), size=1, random_state=i).pvalue
         for i in range(1000)
     ]
     assert_allclose(np.mean(got), 0.75, atol=0.05)
@@ -224,13 +224,10 @@ def test_usp_bias(rng):
 
 def test_usp_bad_input():
     with pytest.raises(ValueError):
-        perm.usp([[1, 2], [3, 4]], precision=-1)
+        perm.usp([[1, 2], [3, 4]], size=0)
 
     with pytest.raises(ValueError):
-        perm.usp([[1, 2], [3, 4]], max_size=0)
-
-    with pytest.raises(ValueError):
-        perm.usp([[1, 2], [3, 4]], max_size=-1)
+        perm.usp([[1, 2], [3, 4]], size=-1)
 
     with pytest.raises(ValueError):
         perm.usp([1, 2])
@@ -241,37 +238,13 @@ def test_usp_bad_input():
 
 def test_ttest_bad_input():
     with pytest.raises(ValueError):
-        perm.ttest([1, 2], [3, 4], precision=-1)
-
-    with pytest.raises(ValueError):
-        perm.ttest([1, 2], [3, 4], max_size=0)
+        perm.ttest([1, 2], [3, 4], size=0)
 
     with pytest.raises(ValueError):
-        perm.ttest([1, 2], [3, 4], max_size=-1)
+        perm.ttest([1, 2], [3, 4], size=-1)
 
     with pytest.raises(ValueError):
         perm.ttest(1, 2)
 
     with pytest.raises(ValueError):
         perm.ttest([1], [2])
-
-
-@pytest.mark.parametrize("test", (perm.ttest, perm.usp))
-@pytest.mark.parametrize("prec", (0, 0.05, 0.005))
-def test_precision(test, prec, rng):
-    x = rng.normal(0, 1, size=100)
-    y = rng.normal(0, 2, size=100)
-    if test is perm.ttest:
-        args = (x, y)
-    else:
-        w = np.histogram2d(x, y)[0]
-        args = (w,)
-
-    r = test(*args, precision=prec, max_size=10000 if prec > 0 else 123, random_state=1)
-    if prec == 0:
-        assert len(r.samples) == 123
-    else:
-        n = len(r.samples)
-        _, interval = _util.wilson_score_interval(r.pvalue * n, n, 1)
-        actual_precision = (interval[1] - interval[0]) / 2
-        assert_allclose(actual_precision, prec, atol=0.5 * prec)