Add bin splitting to rank.percentile_mean

The algorithm works by summing bins of a histogram of the surrounding neighborhood together while skipping bins outside the defined percentile. However, the previous implementation would only deal with complete bins leading to unexpected behavior when according to the given percentile, a part of a bin should factor into the mean. The new implementation fixes this problem, while increasing performance such that it is on par with rank.mean for most tested scenarios.
scikit-image · Aug 28, 2023 · e5e62bb · e5e62bb
1 parent 5408455
commit e5e62bb
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 19 deletions.
diff --git a/skimage/filters/rank/_percentile.py b/skimage/filters/rank/_percentile.py
@@ -149,8 +149,33 @@ def mean_percentile(image, footprint, out=None, mask=None, shift_x=False,
     out : 2-D array (same dtype as input image)
         Output image.
 
+    Notes
+    -----
+    The algorithm is defined as follows:
+
+    1. Determine the local neighborhood and sort by value.
+    2. Drop values below percentile given by p0 and above percentile given by p1.
+    3. Calculate the arithmetic mean and drop the remainder (for backwards compability).
+
+    Note that the actual implementation differs for performance reasons.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import skimage as ski
+    >>> image = np.array([[0, 1, 2],
+    ...                   [3, 4, 5],
+    ...                   [6, 7, 8]], dtype=np.uint8)
+    >>> fp = np.ones((3, 3), dtype=bool)
+    >>> ski.filters.rank.mean_percentile(image, footprint=fp, p0=0.25, p1=0.75)
+    array([[2, 2, 3],
+           [3, 4, 4],
+           [5, 5, 6]], dtype=uint8)
     """
-
+    if not 0. <= p0 < p1 <= 1.:
+        raise ValueError(
+            f"Percentile interval doesn't satisfy 0 <= p0 < p1 <= 1, was [{p0}, {p1}]"
+        )
     return _apply(percentile_cy._mean,
                   image, footprint, out=out, mask=mask, shift_x=shift_x,
                   shift_y=shift_y, p0=p0, p1=p1)

diff --git a/skimage/filters/rank/percentile_cy.pyx b/skimage/filters/rank/percentile_cy.pyx
@@ -5,6 +5,7 @@
 
 cimport numpy as cnp
 from .core_cy cimport dtype_t, dtype_t_out, _core, _min, _max
+from libc.math cimport floor, ceil
 cnp.import_array()
 
 cdef inline void _kernel_autolevel(dtype_t_out* out, Py_ssize_t odepth,
@@ -76,25 +77,39 @@ cdef inline void _kernel_mean(dtype_t_out* out, Py_ssize_t odepth,
                               Py_ssize_t n_bins, Py_ssize_t mid_bin,
                               cnp.float64_t p0, cnp.float64_t p1,
                               Py_ssize_t s0, Py_ssize_t s1) noexcept nogil:
+    cdef:
+        Py_ssize_t i
+        # Decreasing counter for lower excluded pixels
+        Py_ssize_t lower = <Py_ssize_t>floor(pop * p0)
+        # Decreasing counter for valid included pixels
+        Py_ssize_t valid = <Py_ssize_t>ceil(pop * p1) - lower
+        cnp.uint64_t total = 0
+        cnp.float64_t valid_ = valid
+
+    if pop <= 0 or valid_ <= 0:
+        out[0] = <dtype_t_out> 0
+        return  # Return early
+
+    i = 0
+    # Deplete counter `lower` for excluded pixels in lower percentile
+    while 0 < lower:
+        lower -= histo[i]
+        i += 1
+    # If `lower` is negative, percentile border is inside bin
+    # so add as many pixel values as `lower` underflowed
+    total += -lower * (i - 1)
+    valid += lower
+    # Deplete counter `valid` for included pixels until upper excluded percentile
+    while 0 < valid:
+        valid -= histo[i]
+        total += histo[i] * i
+        i += 1
+    # If `upper` is negative, percentile border is inside bin, and we added too
+    # much, so subtract as many pixel values as `upper` underflowed
+    total += valid * (i - 1)
+    # Assign mean while dropping remainder, don't round for backwards compatibility
+    out[0] = <dtype_t_out>(total / valid_)
 
-    cdef Py_ssize_t i, sum, mean, n
-
-    if pop:
-        sum = 0
-        mean = 0
-        n = 0
-        for i in range(n_bins):
-            sum += histo[i]
-            if (sum >= p0 * pop) and (sum <= p1 * pop):
-                n += histo[i]
-                mean += histo[i] * i
-
-        if n > 0:
-            out[0] = <dtype_t_out>(mean / n)
-        else:
-            out[0] = <dtype_t_out>0
-    else:
-        out[0] = <dtype_t_out>0
 
 cdef inline void _kernel_sum(dtype_t_out* out, Py_ssize_t odepth,
                              Py_ssize_t[::1] histo,

diff --git a/skimage/filters/rank/tests/test_rank.py b/skimage/filters/rank/tests/test_rank.py
@@ -887,3 +887,34 @@ def test_input_boolean_dtype(self):
         elem = np.ones((3, 3), dtype=bool)
         with pytest.raises(ValueError):
             rank.maximum(image=image, footprint=elem)
+
+    @pytest.mark.parametrize("p0,p1", [(0.5, 0.5), (0.6, 0.4), (-1, 1), (0, 2)])
+    def test_percentile_mean_invalid(self, p0, p1):
+        image = np.ones((9, 9), dtype=np.uint8)
+        fp = np.ones((3, 3), dtype=bool)
+        with pytest.raises(ValueError, match="Percentile interval doesn't satisfy"):
+            rank.mean_percentile(image, footprint=fp, p0=p0, p1=p1)
+
+    @pytest.mark.parametrize("dtype", [np.uint8, np.uint16])
+    def test_percentile_mean_handcrafted(self, dtype):
+        image = np.array([[0, 1, 2],
+                          [3, 4, 5],
+                          [6, 7, 8]], dtype=dtype)
+        fp = np.ones((3, 3), dtype=bool)
+        result = rank.mean_percentile(image, footprint=fp, p0=0.25, p1=0.75)
+        desired = np.array([[2, 2, 3],
+                            [3, 4, 4],
+                            [5, 5, 6]], dtype=dtype)
+        np.testing.assert_equal(result, desired)
+
+    @pytest.mark.parametrize("dtype", [np.uint8, np.uint16])
+    def test_percentile_mean_pr7096(self, dtype):
+        image = np.array([[0, 0, 0],
+                          [0, 1, 1],
+                          [1, 1, 1]], dtype=dtype)
+        fp = np.ones((3, 3), dtype=bool)
+        result = rank.mean_percentile(image, footprint=fp, p0=0.25, p1=0.75)
+        desired = np.array([[0, 0, 0],
+                            [0, 0, 0],
+                            [1, 1, 1]], dtype=dtype)
+        np.testing.assert_equal(result, desired)