Refactor reduceSources

Split function into more atomic operations. Also rename good/safeMatches to matchesFaint/Bright.
lsst-dm · Mar 24, 2020 · 709c306 · 709c306
1 parent b25f106
commit 709c306
Show file tree

Hide file tree

Showing 7 changed files with 115 additions and 71 deletions.
diff --git a/python/lsst/validate/drp/astromerrmodel.py b/python/lsst/validate/drp/astromerrmodel.py
@@ -152,7 +152,7 @@ def build_astrometric_error_model(matchedMultiVisitDataset, brightSnr=100,
     _compute(blob,
              matchedMultiVisitDataset['snr'].quantity,
              matchedMultiVisitDataset['dist'].quantity,
-             len(matchedMultiVisitDataset.goodMatches),
+             len(matchedMultiVisitDataset.matchesFaint),
              brightSnr, medianRef, matchRef)
     return blob
 

diff --git a/python/lsst/validate/drp/calcnonsrd/model_phot_rep.py b/python/lsst/validate/drp/calcnonsrd/model_phot_rep.py
@@ -19,7 +19,7 @@
 # see <https://www.lsstcorp.org/LegalNotices/>.
 
 from lsst.validate.drp.repeatability import measurePhotRepeat
-from lsst.validate.drp.matchreduce import reduceSources
+from lsst.validate.drp.matchreduce import getKeysFilter, filterSources
 
 
 def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
@@ -60,8 +60,10 @@ def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
         snr_bins = [((5, 10), (10, 20)), ((20, 40), (40, 80))]
     name_flux_all = ["base_PsfFlux", 'slot_ModelFlux']
     measurements = []
+    schema = matchedDataset._matchedCatalog.schema
     for name_flux in name_flux_all:
-        key_model_mag = matchedDataset._matchedCatalog.schema.find(f"{name_flux}_mag").key
+        keys = getKeysFilter(schema, nameFluxKey=name_flux)
+        key_model_mag = schema.find(f"{name_flux}_mag").key
         if name_flux == 'slot_ModelFlux':
             name_sources = ['Gal', 'Star']
             prefix_metric = 'model'
@@ -71,14 +73,17 @@ def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
         for idx_bins, ((snr_one_min, snr_one_max), (snr_two_min, snr_two_max)) in enumerate(snr_bins):
             bin_base = 1 + 2*idx_bins
             for source in name_sources:
-                reduceSources(matchedDataset, matchedDataset._matchedCatalog, extended=source == 'Gal',
-                              nameFluxKey=name_flux, goodSnr=snr_one_min, goodSnrMax=snr_one_max,
-                              safeSnr=snr_two_min, safeSnrMax=snr_two_max)
+                matches = filterSources(
+                    matchedDataset._matchedCatalog, keys=keys, extended=source == 'Gal',
+                    faintSnr=snr_one_min, faintSnrMax=snr_one_max,
+                    brightSnr=snr_two_min, brightSnrMax=snr_two_max,
+                )
                 for bin_offset in [0, 1]:
                     model_phot_rep = measurePhotRepeat(
                         metrics[f'validate_drp.{prefix_metric}PhotRep{source}{bin_base + bin_offset}'],
                         filterName,
-                        matchedDataset.goodMatches if bin_offset == 0 else matchedDataset.safeMatches,
+                        matches.matchesFaint if bin_offset == 0 else matches.matchesBright,
                         key_model_mag)
                     measurements.append(model_phot_rep)
+
     return measurements
diff --git a/python/lsst/validate/drp/calcsrd/amx.py b/python/lsst/validate/drp/calcsrd/amx.py
@@ -95,7 +95,7 @@ def measureAMx(metric, matchedDataset, D, width=2., magRange=None, verbose=False
     and to astrometric measurements performed in the r and i bands.
     """
 
-    matches = matchedDataset.safeMatches
+    matches = matchedDataset.matchesBright
 
     datums = {}
 

diff --git a/python/lsst/validate/drp/calcsrd/tex.py b/python/lsst/validate/drp/calcsrd/tex.py
@@ -93,7 +93,7 @@ def measureTEx(metric, matchedDataset, D, bin_range_operator, verbose=False):
     Table 27: These residual PSF ellipticity correlations apply to the r and i bands.
     """
 
-    matches = matchedDataset.safeMatches
+    matches = matchedDataset.matchesBright
 
     datums = {}
     datums['D'] = Datum(quantity=D, description="Separation distance")

diff --git a/python/lsst/validate/drp/matchreduce.py b/python/lsst/validate/drp/matchreduce.py
@@ -21,7 +21,7 @@
 for measurement classes, plotting functions, and JSON persistence.
 """
 
-__all__ = ['build_matched_dataset', 'reduceSources']
+__all__ = ['build_matched_dataset', 'getKeysFilter', 'filterSources', 'summarizeSources']
 
 import numpy as np
 import astropy.units as u
@@ -35,6 +35,7 @@
                             SOURCE_IO_NO_FOOTPRINTS)
 import lsst.afw.table as afwTable
 from lsst.afw.fits import FitsError
+import lsst.pipe.base as pipeBase
 from lsst.verify import Blob, Datum
 
 from .util import (getCcdKeyName, raftSensorToInt, positionRmsFromCat,
@@ -96,25 +97,22 @@ def build_matched_dataset(repo, dataIds, matchRadius=None, safeSnr=50.,
         RMS of sky coordinates of stars over multiple visits (milliarcseconds).
 
         *Not serialized.*
-    goodMatches
-        all good matches, as an afw.table.GroupView;
-        good matches contain only objects whose detections all have
+    matchesFaint : `afw.table.GroupView`
+        Faint matches containing only objects that have:
 
-        1. a PSF Flux measurement with S/N > 1
-        2. a finite (non-nan) PSF magnitude. This separate check is largely
+        1. A PSF Flux measurement with sufficient S/N.
+        2. A finite (non-nan) PSF magnitude. This separate check is largely
            to reject failed zeropoints.
-        3. and do not have flags set for bad, cosmic ray, edge or saturated
+        3. No flags set for bad, cosmic ray, edge or saturated.
+        4. Extendedness consistent with a point source.
 
         *Not serialized.*
-
-    safeMatches
-        safe matches, as an afw.table.GroupView. Safe matches
-        are good matches that are sufficiently bright and sufficiently
-        compact.
+    matchesBright : `afw.table.GroupView`
+        Bright matches matching a higher S/N threshold than matchesFaint.
 
         *Not serialized.*
     magKey
-        Key for `"base_PsfFlux_mag"` in the `goodMatches` and `safeMatches`
+        Key for `"base_PsfFlux_mag"` in the `matchesFaint` and `matchesBright`
         catalog tables.
 
         *Not serialized.*
@@ -163,7 +161,7 @@ def build_matched_dataset(repo, dataIds, matchRadius=None, safeSnr=50.,
     blob.magKey = blob._matchedCatalog.schema.find("base_PsfFlux_mag").key
     # Reduce catalogs into summary statistics.
     # These are the serialiable attributes of this class.
-    reduceSources(blob, blob._matchedCatalog, safeSnr)
+    summarizeSources(blob, filterSources(blob._matchedCatalog, brightSnr=safeSnr))
     return blob
 
 
@@ -345,101 +343,142 @@ def _loadAndMatchCatalogs(repo, dataIds, matchRadius,
     return srcVis, allMatches
 
 
-def reduceSources(blob, allMatches, goodSnr=5.0, safeSnr=50.0, safeExtendedness=1.0, extended=False,
-                  nameFluxKey=None, goodSnrMax=np.Inf, safeSnrMax=np.Inf):
-    """Calculate summary statistics for each star. These are persisted
-    as object attributes.
+def getKeysFilter(schema, nameFluxKey=None):
+    """ Get schema keys for filtering sources.
+
+    schema : `lsst.afw.table.Schema`
+        A table schema to retrieve keys from.
+    nameFluxKey : `str`
+        The name of a flux field to retrieve
+
+    Returns
+    -------
+    keys : `lsst.pipe.base.Struct`
+        A struct storing schema keys to aggregate over.
+    """
+    if nameFluxKey is None:
+        nameFluxKey = "base_PsfFlux"
+        # Filter down to matches with at least 2 sources and good flags
+
+    return pipeBase.Struct(
+        flags=[schema.find("base_PixelFlags_flag_%s" % flag).key
+               for flag in ("saturated", "cr", "bad", "edge")],
+        snr=schema.find(f"{nameFluxKey}_snr").key,
+        mag=schema.find(f"{nameFluxKey}_mag").key,
+        magErr=schema.find(f"{nameFluxKey}_magErr").key,
+        extended=schema.find("base_ClassificationExtendedness_value").key,
+    )
+
+
+def filterSources(allMatches, keys=None, faintSnr=5.0, brightSnr=50.0, safeExtendedness=1.0, extended=False,
+                  faintSnrMax=np.Inf, brightSnrMax=np.Inf):
+    """Filter matched sources on flags and SNR.
 
     Parameters
     ----------
-    blob : `lsst.verify.blob.Blob`
-        A verification blob to store Datums in.
     allMatches : `lsst.afw.table.GroupView`
         GroupView object with matches.
-    goodSnr : float, optional
-        Minimum median SNR for a match to be considered "good"; default 3.
-    safeSnr : float, optional
-        Minimum median SNR for a match to be considered "safe"; default 50.
+    keys : `lsst.pipe.base.Struct`
+        A struct storing schema keys to aggregate over.
+    faintSnr : float, optional
+        Minimum median SNR for a faint source match; default 5.
+    brightSnr : float, optional
+        Minimum median SNR for a bright source match; default 50.
     safeExtendedness: float, optional
         Maximum (exclusive) extendedness for sources or minimum (inclusive) if extended==True.
     extended: bool, optional
         Whether to select extended sources, i.e. galaxies.
-    goodSnrMax : float, optional
-        Maximum median SNR for a match to be considered "good"; default np.Inf.
-    safeSnrMax : float, optional
-        Maximum median SNR for a match to be considered "safe"; default np.Inf.
+    faintSnrMax : float, optional
+        Maximum median SNR for a faint source match; default np.Inf.
+    brightSnrMax : float, optional
+        Maximum median SNR for a bright source match; default np.Inf.
+
+    Returns
+    -------
+    filterResult : `lsst.pipe.base.Struct`
+        A struct containing good and safe matches and the necessary keys to use them.
     """
-    if nameFluxKey is None:
-        nameFluxKey = "slot_ModelFlux" if extended else "base_PsfFlux"
-    # Filter down to matches with at least 2 sources and good flags
-    flagKeys = [allMatches.schema.find("base_PixelFlags_flag_%s" % flag).key
-                for flag in ("saturated", "cr", "bad", "edge")]
+    if keys is None:
+        keys = getKeysFilter(allMatches.schema, "slot_ModelFlux" if extended else "base_PsfFlux")
     nMatchesRequired = 2
-
-    snrKey = allMatches.schema.find(f"{nameFluxKey}_snr").key
-    magKey = allMatches.schema.find(f"{nameFluxKey}_mag").key
-    magErrKey = allMatches.schema.find(f"{nameFluxKey}_magErr").key
-    extendedKey = allMatches.schema.find("base_ClassificationExtendedness_value").key
-
-    snrMin, snrMax = goodSnr, goodSnrMax
+    snrMin, snrMax = faintSnr, faintSnrMax
 
     def extendedFilter(cat):
         if len(cat) < nMatchesRequired:
             return False
-        for flagKey in flagKeys:
+        for flagKey in keys.flags:
             if cat.get(flagKey).any():
                 return False
-        if not np.isfinite(cat.get(magKey)).all():
+        if not np.isfinite(cat.get(keys.mag)).all():
             return False
-        extendedness = cat.get(extendedKey)
+        extendedness = cat.get(keys.extended)
         return np.min(extendedness) >= safeExtendedness if extended else \
             np.max(extendedness) < safeExtendedness
 
     def snrFilter(cat):
         # Note that this also implicitly checks for psfSnr being non-nan.
-        snr = np.median(cat.get(snrKey))
+        snr = np.median(cat.get(keys.snr))
         return snrMax >= snr >= snrMin
 
     def fullFilter(cat):
         return extendedFilter(cat) and snrFilter(cat)
 
-    # If safeSnr range is a subset of goodSnr, it's safe to only filter on snr again
+    # If brightSnr range is a subset of faintSnr, it's safe to only filter on snr again
     # Otherwise, filter on flags/extendedness first, then snr
-    isSafeSubset = goodSnrMax >= safeSnrMax and goodSnr <= safeSnr
-    goodMatches = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
-    snrMin, snrMax = safeSnr, safeSnrMax
-    safeMatches = goodMatches.where(snrFilter)
+    isSafeSubset = faintSnrMax >= brightSnrMax and faintSnr <= brightSnr
+    matchesFaint = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
+    snrMin, snrMax = brightSnr, brightSnrMax
+    matchesBright = matchesFaint.where(snrFilter)
+    # This means that matchesFaint has had extendedFilter but not snrFilter applied
     if not isSafeSubset:
-        snrMin, snrMax = goodSnr, goodSnrMax
-        goodMatches = goodMatches.where(snrFilter)
+        snrMin, snrMax = faintSnr, faintSnrMax
+        matchesFaint = matchesFaint.where(snrFilter)
+
+    return pipeBase.Struct(
+        extended=extended, keys=keys, matchesFaint=matchesFaint, matchesBright=matchesBright,
+    )
+
 
+def summarizeSources(blob, filterResult):
+    """Calculate summary statistics for each source. These are persisted
+    as object attributes.
+
+    Parameters
+    ----------
+    blob : `lsst.verify.blob.Blob`
+        A verification blob to store Datums in.
+    filterResult : `lsst.pipe.base.Struct`
+        A struct containing bright and faint filter matches, as returned by `filterSources`.
+    """
     # Pass field=psfMagKey so np.mean just gets that as its input
-    typeMag = "model" if extended else "PSF"
+    typeMag = "model" if filterResult.extended else "PSF"
     filter_name = blob['filterName']
-    source_type = f'{"extended" if extended else "point"} sources"'
-    blob['snr'] = Datum(quantity=goodMatches.aggregate(np.median, field=snrKey) * u.Unit(''),
+    source_type = f'{"extended" if filterResult.extended else "point"} sources"'
+    matches = filterResult.matchesFaint
+    keys = filterResult.keys
+    blob['snr'] = Datum(quantity=matches.aggregate(np.median, field=keys.snr) * u.Unit(''),
                         label='SNR({band})'.format(band=filter_name),
                         description=f'Median signal-to-noise ratio of {typeMag} magnitudes for {source_type}'
                                     f' over multiple visits')
-    blob['mag'] = Datum(quantity=goodMatches.aggregate(np.mean, field=magKey) * u.mag,
+    blob['mag'] = Datum(quantity=matches.aggregate(np.mean, field=keys.mag) * u.mag,
                         label='{band}'.format(band=filter_name),
                         description=f'Mean of {typeMag} magnitudes for {source_type} over multiple visits')
-    blob['magrms'] = Datum(quantity=goodMatches.aggregate(np.std, field=magKey) * u.mag,
+    blob['magrms'] = Datum(quantity=matches.aggregate(np.std, field=keys.mag) * u.mag,
                            label='RMS({band})'.format(band=filter_name),
                            description=f'RMS of {typeMag} magnitudes for {source_type} over multiple visits')
-    blob['magerr'] = Datum(quantity=goodMatches.aggregate(np.median, field=magErrKey) * u.mag,
+    blob['magerr'] = Datum(quantity=matches.aggregate(np.median, field=keys.magErr) * u.mag,
                            label='sigma({band})'.format(band=filter_name),
                            description=f'Median 1-sigma uncertainty of {typeMag} magnitudes for {source_type}'
                                        f' over multiple visits')
     # positionRmsFromCat knows how to query a group
     # so we give it the whole thing by going with the default `field=None`.
-    blob['dist'] = Datum(quantity=goodMatches.aggregate(positionRmsFromCat) * u.milliarcsecond,
+    blob['dist'] = Datum(quantity=matches.aggregate(positionRmsFromCat) * u.milliarcsecond,
                          label='d',
                          description=f'RMS of sky coordinates of {source_type} over multiple visits')
 
     # These attributes are not serialized
-    blob.goodMatches = goodMatches
-    blob.safeMatches = safeMatches
+    blob.matchesFaint = filterResult.matchesFaint
+    blob.matchesBright = filterResult.matchesBright
 
 
 def _loadPhotoCalib(butler, dataId, doApplyExternalPhotoCalib, externalPhotoCalibName):

diff --git a/python/lsst/validate/drp/photerrmodel.py b/python/lsst/validate/drp/photerrmodel.py
@@ -205,7 +205,7 @@ def build_photometric_error_model(matchedMultiVisitDataset, brightSnr=100, media
              matchedMultiVisitDataset['magerr'].quantity,
              matchedMultiVisitDataset['magrms'].quantity,
              matchedMultiVisitDataset['dist'].quantity,
-             len(matchedMultiVisitDataset.goodMatches),
+             len(matchedMultiVisitDataset.matchesFaint),
              brightSnr,
              medianRef,
              matchRef)

diff --git a/python/lsst/validate/drp/validate.py b/python/lsst/validate/drp/validate.py
@@ -362,7 +362,7 @@ def add_measurement(measurement):
             add_measurement(afx)
 
     pa1 = measurePA1(
-        metrics['validate_drp.PA1'], filterName, matchedDataset.safeMatches, matchedDataset.magKey)
+        metrics['validate_drp.PA1'], filterName, matchedDataset.matchesBright, matchedDataset.magKey)
     add_measurement(pa1)
 
     if not skipNonSrd: