Merge pull request #20 from svank/averaging

Add percentile averaging mode
punch-mission · Apr 6, 2023 · ded2c7b · ded2c7b
2 parents 6fdcba1 + f744a35
commit ded2c7b
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 20 deletions.
diff --git a/regularizepsf/fitter.py b/regularizepsf/fitter.py
@@ -270,6 +270,7 @@ def find_stars_and_average(cls,
                                patch_size: int,
                                interpolation_scale: int = 1,
                                average_mode: str = "median",
+                               percentile: float = 10,
                                star_threshold: int = 3, hdu_choice: int=0) -> CoordinatePatchCollection:
         """Loads a series of images, finds stars in each, 
             and builds a CoordinatePatchCollection with averaged stars
@@ -288,7 +289,11 @@ def find_stars_and_average(cls,
             if >1, the image are first scaled by this factor. 
                 This results in stars being aligned at a subpixel scale
         average_mode : str
-            "median" or "mean" determines how patches are combined
+            "median", "percentile", or "mean": determines how patches are
+            combined
+        percentile : float
+            If `average_mode` is `"percentile"`, use this percentile value
+            (from 0 to 100)
         star_threshold : int
             SEP's threshold for finding stars. See `threshold`
                 in https://sep.readthedocs.io/en/v1.1.x/api/sep.extract.html#sep-extract
@@ -385,7 +390,7 @@ def generator():
                                       patch_size * interpolation_scale)
         averaged = this_collection.average(corners, 
                                            patch_size * interpolation_scale, psf_size * interpolation_scale,
-                                           mode=average_mode)
+                                           mode=average_mode, percentile=percentile)
 
         if interpolation_scale != 1:
             for coordinate, _ in averaged.items():
@@ -403,15 +408,20 @@ def generator():
         return output
 
     def average(self, corners: np.ndarray, patch_size: int, psf_size: int,  # noqa: ARG002, kept for consistency
-                mode: str = "median") -> PatchCollectionABC:
-        CoordinatePatchCollection._validate_average_mode(mode)
+                mode: str = "median", percentile: float = 10) -> PatchCollectionABC:
+        CoordinatePatchCollection._validate_average_mode(mode, percentile)
 
         if mode == "mean":
             mean_stack = {tuple(corner): np.zeros((psf_size, psf_size))
                           for corner in corners}
-            counts = {tuple(corner): 0 for corner in corners}
-        elif mode == "median":
-            median_stack = {tuple(corner): [] for corner in corners}
+            counts = {tuple(corner): np.zeros((psf_size, psf_size))
+                          for corner in corners}
+        else:
+            # n.b. If mode is 'median', we could set mode='percentile'
+            # and percentile=50 to simplify parts of this function, but
+            # np.nanpercentile(x, 50) seems to be about half as fast as
+            # np.nanmedian(x), so let's keep a speedy special case for medians.
+            stack = {tuple(corner): [] for corner in corners}
 
         corners_x, corners_y = corners[:, 0], corners[:, 1]
         x_bounds = np.stack([corners_x, corners_x + patch_size], axis=-1)
@@ -434,33 +444,43 @@ def average(self, corners: np.ndarray, patch_size: int, psf_size: int,  # noqa:
                 if mode == "mean":
                     mean_stack[match_corner] = np.nansum([mean_stack[match_corner], 
                                                           patch], axis=0)
-                    counts[match_corner] += 1
-                elif mode == "median":
-                    median_stack[match_corner].append(patch)
+                    counts[match_corner] += np.isfinite(patch)
+                else:
+                    stack[match_corner].append(patch)
 
         if mode == "mean":
             averages = {CoordinateIdentifier(None, corner[0], corner[1]): 
                         mean_stack[corner]/counts[corner]
                         for corner in mean_stack}
         elif mode == "median":
             averages = {CoordinateIdentifier(None, corner[0], corner[1]):
-                            np.nanmedian(median_stack[corner], axis=0)
-                                if len(median_stack[corner]) > 0 else 
+                            np.nanmedian(stack[corner], axis=0)
+                                if len(stack[corner]) > 0 else
+                                np.zeros((psf_size, psf_size))
+                        for corner in stack}
+        elif mode == "percentile":
+            averages = {CoordinateIdentifier(None, corner[0], corner[1]):
+                            np.nanpercentile(stack[corner],
+                                             percentile,
+                                             axis=0)
+                                if len(stack[corner]) > 0 else
                                 np.zeros((psf_size, psf_size))
-                        for corner in median_stack}
+                        for corner in stack}
         # Now that we have our combined patches, pad them as appropriate
         pad_shape = self._calculate_pad_shape(patch_size)
         for key, patch in averages.items():
             averages[key] = np.pad(patch, pad_shape, mode="constant")
         return CoordinatePatchCollection(averages)
 
     @staticmethod
-    def _validate_average_mode(mode: str) -> None:
+    def _validate_average_mode(mode: str, percentile: float) -> None:
         """Determine if the average_mode is a valid kind"""
-        valid_modes = ["median", "mean"]
+        valid_modes = ["median", "mean", "percentile"]
         if mode not in valid_modes:
             msg = f"Found a mode of {mode} but it must be in the list {valid_modes}."
             raise ValueError(msg)
+        if mode == "percentile" and not (0 <= percentile <= 100):
+            raise ValueError("`percentile` must be between 0 and 100, inclusive")
 
     def _calculate_pad_shape(self, size: int) -> Tuple[int, int]:
         pad_amount = size - self.size

diff --git a/tests/test_fitter.py b/tests/test_fitter.py
@@ -72,10 +72,31 @@ def test_saving_and_loading():
 
 
 def test_coordinate_patch_average():
-    collection = CoordinatePatchCollection({CoordinateIdentifier(0, 0, 0): np.zeros((10, 10)),
-                                            CoordinateIdentifier(0, 0, 0): np.ones((10, 10))*2})
-    averaged_collection = collection.average(np.array([[0, 0]]), 10, 10, mode='median')
-    assert averaged_collection[CoordinateIdentifier(None, 0, 0)][1, 1] == 1
+    collection = CoordinatePatchCollection(
+            {CoordinateIdentifier(0, 0, 0): np.full((10, 10), .3),
+             CoordinateIdentifier(1, 0, 0): np.full((10, 10), .5),
+             CoordinateIdentifier(2, 0, 0): np.full((10, 10), .9),
+             # Exercise the nan-rejection in CoordinatePatchCollection.average()
+             CoordinateIdentifier(3, 0, 0): np.full((10, 10), np.nan),
+             })
+    for patch in collection.values():
+        # Make the normalization of each patch a no-op
+        patch[-1, -1] = 1
+
+    averaged_collection = collection.average(
+            np.array([[0, 0]]), 10, 10, mode='median')
+    expected = np.nanmedian([.3, .5, .9])
+    assert averaged_collection[CoordinateIdentifier(None, 0, 0)][1, 1] == expected
+
+    averaged_collection = collection.average(
+            np.array([[0, 0]]), 10, 10, mode='mean')
+    expected = np.nanmean([.3, .5, .9])
+    assert averaged_collection[CoordinateIdentifier(None, 0, 0)][1, 1] == expected
+
+    averaged_collection = collection.average(
+            np.array([[0, 0]]), 10, 10, mode='percentile', percentile=20)
+    expected = np.nanpercentile([.3, .5, .9], 20)
+    assert averaged_collection[CoordinateIdentifier(None, 0, 0)][1, 1] == expected
 
 
 def test_calculate_pad_shape():
@@ -98,7 +119,20 @@ def test_odd_pad_shape_errors():
 
 def test_validate_average_mode():
     with pytest.raises(ValueError):
-        CoordinatePatchCollection._validate_average_mode("nonexistent_method")
+        CoordinatePatchCollection._validate_average_mode(
+                "nonexistent_method", 1)
+
+    # Ensure valid modes are accepted
+    for mode in ('mean', 'median', 'percentile'):
+        CoordinatePatchCollection._validate_average_mode(mode, 1)
+
+    # Check invalid percentile values
+    with pytest.raises(ValueError):
+        CoordinatePatchCollection._validate_average_mode(
+                "percentile", -1)
+    with pytest.raises(ValueError):
+        CoordinatePatchCollection._validate_average_mode(
+                "percentile", 101)
 
 
 def test_find_stars_and_average():