lsst · plazas · May 13, 2020 · May 4, 2020 · May 4, 2020 · May 6, 2020
diff --git a/python/lsst/meas/algorithms/defects.py b/python/lsst/meas/algorithms/defects.py
@@ -27,6 +27,7 @@
 import logging
 import itertools
 import collections.abc
+import contextlib
 import numpy as np
 import copy
 import datetime
@@ -59,26 +60,44 @@ class Defects(collections.abc.MutableSequence):
     defectList : iterable of `lsst.meas.algorithms.Defect`
                  or `lsst.geom.BoxI`, optional
         Collections of defects to apply to the image.
+    metadata : `lsst.daf.base.PropertyList`, optional
+        Metadata to associate with the defects.  Will be copied and
+        overwrite existing metadata, if any. If not supplied the existing
+        metadata will be reset.
+    normalize_on_init : `bool`
+        If True, normalization is applied to the defects in ``defectList`` to
+        remove duplicates, eliminate overlaps, etc.
+
+    Notes
+    -----
+    Defects are stored within this collection in a "reduced" or "normalized"
+    form: rather than simply storing the bounding boxes which are added to the
+    collection, we eliminate overlaps and duplicates. This normalization
+    procedure may introduce overhead when adding many new defects; it may be
+    temporarily disabled using the `Defects.bulk_update` context manager if
+    necessary.
     """
 
     _OBSTYPE = "defects"
     """The calibration type used for ingest."""
 
-    def __init__(self, defectList=None, metadata=None):
+    def __init__(self, defectList=None, metadata=None, *, normalize_on_init=True):
         self._defects = []
 
+        if defectList is not None:
+            self._bulk_update = True
+            for d in defectList:
+                self.append(d)
+        self._bulk_update = False
+
+        if normalize_on_init:
+            self._normalize()
+
         if metadata is not None:
             self._metadata = metadata
         else:
             self.setMetadata()
 
-        if defectList is None:
-            return
-
-        # Ensure that type checking
-        for d in defectList:
-            self.append(d)
-
     def _check_value(self, value):
         """Check that the supplied value is a `~lsst.meas.algorithms.Defect`
         or can be converted to one.
@@ -121,6 +140,7 @@ def __setitem__(self, index, value):
         """Can be given a `~lsst.meas.algorithms.Defect` or a `lsst.geom.BoxI`
         """
         self._defects[index] = self._check_value(value)
+        self._normalize()
 
     def __iter__(self):
         return iter(self._defects)
@@ -151,8 +171,50 @@ def __eq__(self, other):
     def __str__(self):
         return "Defects(" + ",".join(str(d.getBBox()) for d in self) + ")"
 
+    def _normalize(self):
+        """Recalculate defect bounding boxes for efficiency.
+
+        Notes
+        -----
+        Ideally, this would generate the provably-minimal set of bounding
+        boxes necessary to represent the defects. At present, however, that
+        doesn't happen: see DM-24781. In the cases of substantial overlaps or
+        duplication, though, this will produce a much reduced set.
+        """
+        # In bulk-update mode, normalization is a no-op.
+        if self._bulk_update:
+            return
+
+        # work out the minimum and maximum bounds from all defect regions.
+        minX, minY, maxX, maxY = float('inf'), float('inf'), float('-inf'), float('-inf')
+        for defect in self:
+            bbox = defect.getBBox()
+            minX = min(minX, bbox.getMinX())
+            minY = min(minY, bbox.getMinY())
+            maxX = max(maxX, bbox.getMaxX())
+            maxY = max(maxY, bbox.getMaxY())
+
+        region = lsst.geom.Box2I(lsst.geom.Point2I(minX, minY),
+                                 lsst.geom.Point2I(maxX, maxY))
+
+        mi = lsst.afw.image.MaskedImageF(region)
+        self.maskPixels(mi, maskName="BAD")
+        self._defects = Defects.fromMask(mi, "BAD")._defects
+
+    @contextlib.contextmanager
+    def bulk_update(self):
+        """Temporarily suspend normalization of the defect list.
+        """
+        self._bulk_update = True
+        try:
+            yield
+        finally:
+            self._bulk_update = False
+            self._normalize()
+
     def insert(self, index, value):
         self._defects.insert(index, self._check_value(value))
+        self._normalize()
 
     def getMetadata(self):
         """Retrieve metadata associated with these `Defects`.
@@ -702,8 +764,10 @@ def fromFootprintList(cls, fpList):
         defects : `Defects`
             List of defects.
         """
+        # normalize_on_init is set to False to avoid recursively calling
+        # fromMask/fromFootprintList in Defects.__init__.
         return cls(itertools.chain.from_iterable(lsst.afw.detection.footprintToBBoxList(fp)
-                                                 for fp in fpList))
+                                                 for fp in fpList), normalize_on_init=False)
 
     @classmethod
     def fromMask(cls, maskedImage, maskName):

diff --git a/tests/test_interp.py b/tests/test_interp.py
@@ -74,7 +74,7 @@ def test_defects(self):
 
         defects.append(lsst.geom.Box2I(lsst.geom.Point2I(0, 0),
                                        lsst.geom.Point2I(4, 5)))
-        defects.append(lsst.geom.Point2I(10, 12))
+        defects.append(lsst.geom.Point2I(50, 50))
         defects.append(afwImage.DefectBase(lsst.geom.Box2I(lsst.geom.Point2I(100, 200),
                                                            lsst.geom.Extent2I(5, 5))))
         self.assertEqual(len(defects), 4)
@@ -85,9 +85,15 @@ def test_defects(self):
         # Transposition
         transposed = defects.transpose()
         self.assertEqual(len(transposed), len(defects))
-        self.assertEqual(transposed[0].getBBox(),
-                         lsst.geom.Box2I(lsst.geom.Point2I(6, 5),
-                                         lsst.geom.Extent2I(45, 37)))
+
+        # Check that an individual defect is found properly transposed within
+        # the outputs.
+        found = False
+        for defect in transposed:
+            if defect.getBBox() == lsst.geom.Box2I(lsst.geom.Point2I(6, 5), lsst.geom.Extent2I(45, 37)):
+                found = True
+                break
+        self.assertTrue(found)
 
         # Serialization round trip
         meta = PropertyList()
@@ -125,12 +131,18 @@ def test_defects(self):
 
     def testAstropyRegion(self):
         """Read a FITS region file created by Astropy regions."""
+        # The file contains three regions:
+        #
+        # - Point2I(340, 344)
+        # - Point2I(340, 344)
+        # - Box2I(minimum=Point2I(5, -5), dimensions=Extent2I(10, 20))
+        #
+        # The two coincident points are combined on read, so we end up with two defects.
 
         with self.assertLogs():
             defects = algorithms.Defects.readFits(os.path.join(TESTDIR, "data", "fits_region.fits"))
 
-        # Should be able to read 3 regions from the file
-        self.assertEqual(len(defects), 3)
+        self.assertEqual(len(defects), 2)
 
     def testLsstTextfile(self):
         """Read legacy LSST text file format"""
@@ -151,9 +163,85 @@ def testLsstTextfile(self):
 
             defects = algorithms.Defects.readLsstDefectsFile(tmpFile)
 
-            self.assertEqual(len(defects), 9)
-            self.assertEqual(defects[3].getBBox(), lsst.geom.Box2I(lsst.geom.Point2I(1998, 4035),
-                                                                   lsst.geom.Extent2I(50, 141)))
+        # Although there are 9 defects listed above, we record 11 after
+        # normalization. This is due to non-optimal behaviour in
+        # Defects.fromMask; see DM-24781.
+        self.assertEqual(len(defects), 11)
+
+    def test_normalize_defects(self):
+        """A test for the lsst.meas.algorithms.Defect.normalize() method.
+        """
+        defects = algorithms.Defects()
+
+        # First series of 1-pixel contiguous defects
+        for yPix in range(1, 6):
+            defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(15, yPix),
+                           dimensions=lsst.geom.Extent2I(1, 1)))
+
+        # Defects are normalized as they are added; check that the above have
+        # been merged into a single bounding box.
+        self.assertEqual(len(defects), 1)
+
+        # Second series of 1-pixel contiguous defects in bulk mode
+        with defects.bulk_update():
+            for yPix in range(11, 16):
+                defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(20, yPix),
+                                               dimensions=lsst.geom.Extent2I(1, 1)))
+            # In bulk mode, defects are not normalized.
+            self.assertEqual(len(defects), 6)
+
+        # Normalization applied on exiting bulk mode.
+        self.assertEqual(len(defects), 2)
+
+        boxesMeasured = []
+        for defect in defects:
+            boxesMeasured.append(defect.getBBox())
+
+        # The normalizing function should have created the following two boxes out
+        # of the individual 1-pixel defects from above
+        expectedDefects = [lsst.geom.Box2I(corner=lsst.geom.Point2I(15, 1),
+                                           dimensions=lsst.geom.Extent2I(1, 5)),
+                           lsst.geom.Box2I(corner=lsst.geom.Point2I(20, 11),
+                                           dimensions=lsst.geom.Extent2I(1, 5))]
+
+        self.assertEqual(len(expectedDefects), len(boxesMeasured))
+        for expDef, measDef in zip(expectedDefects, boxesMeasured):
+            self.assertEqual(expDef, measDef)
+
+        # Normalize two distinct sets of Defects and ensure they compare to the same thing
+        defects = algorithms.Defects()
+        # Set 1
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 1), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 2), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 3), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 4), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 5), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 6), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 7), dimensions=lsst.geom.Extent2I(1, 1)))
+        defects.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 8), dimensions=lsst.geom.Extent2I(1, 1)))
+
+        # Set 2
+        defects2 = algorithms.Defects()
+        defects2.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 1), dimensions=lsst.geom.Extent2I(1, 5)))
+        defects2.append(lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 5), dimensions=lsst.geom.Extent2I(1, 4)))
+
+        self.assertEqual(defects, defects2)
+
+        boxesMeasured, boxesMeasured2 = [], []
+        for defect, defect2 in zip(defects, defects2):
+            boxesMeasured.append(defect.getBBox())
+            boxesMeasured2.append(defect2.getBBox())
+
+        expectedDefects = [lsst.geom.Box2I(corner=lsst.geom.Point2I(25, 1),
+                                           dimensions=lsst.geom.Extent2I(1, 8))]
+
+        self.assertEqual(len(expectedDefects), len(boxesMeasured))
+        for expDef, measDef in zip(expectedDefects, boxesMeasured):
+            self.assertEqual(expDef, measDef)
+
+        self.assertEqual(len(expectedDefects), len(boxesMeasured2))
+        for expDef, measDef in zip(expectedDefects, boxesMeasured2):
+            self.assertEqual(expDef, measDef)
 
 
 class InterpolationTestCase(lsst.utils.tests.TestCase):