Skip to content

Commit

Permalink
Refactor reduceSources
Browse files Browse the repository at this point in the history
Split function into more atomic operations.
Also rename good/safeMatches to matchesFaint/Bright.
  • Loading branch information
taranu committed Mar 24, 2020
1 parent b25f106 commit 709c306
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 71 deletions.
2 changes: 1 addition & 1 deletion python/lsst/validate/drp/astromerrmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def build_astrometric_error_model(matchedMultiVisitDataset, brightSnr=100,
_compute(blob,
matchedMultiVisitDataset['snr'].quantity,
matchedMultiVisitDataset['dist'].quantity,
len(matchedMultiVisitDataset.goodMatches),
len(matchedMultiVisitDataset.matchesFaint),
brightSnr, medianRef, matchRef)
return blob

Expand Down
17 changes: 11 additions & 6 deletions python/lsst/validate/drp/calcnonsrd/model_phot_rep.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# see <https://www.lsstcorp.org/LegalNotices/>.

from lsst.validate.drp.repeatability import measurePhotRepeat
from lsst.validate.drp.matchreduce import reduceSources
from lsst.validate.drp.matchreduce import getKeysFilter, filterSources


def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
Expand Down Expand Up @@ -60,8 +60,10 @@ def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
snr_bins = [((5, 10), (10, 20)), ((20, 40), (40, 80))]
name_flux_all = ["base_PsfFlux", 'slot_ModelFlux']
measurements = []
schema = matchedDataset._matchedCatalog.schema
for name_flux in name_flux_all:
key_model_mag = matchedDataset._matchedCatalog.schema.find(f"{name_flux}_mag").key
keys = getKeysFilter(schema, nameFluxKey=name_flux)
key_model_mag = schema.find(f"{name_flux}_mag").key
if name_flux == 'slot_ModelFlux':
name_sources = ['Gal', 'Star']
prefix_metric = 'model'
Expand All @@ -71,14 +73,17 @@ def measure_model_phot_rep(metrics, filterName, matchedDataset, snr_bins=None):
for idx_bins, ((snr_one_min, snr_one_max), (snr_two_min, snr_two_max)) in enumerate(snr_bins):
bin_base = 1 + 2*idx_bins
for source in name_sources:
reduceSources(matchedDataset, matchedDataset._matchedCatalog, extended=source == 'Gal',
nameFluxKey=name_flux, goodSnr=snr_one_min, goodSnrMax=snr_one_max,
safeSnr=snr_two_min, safeSnrMax=snr_two_max)
matches = filterSources(
matchedDataset._matchedCatalog, keys=keys, extended=source == 'Gal',
faintSnr=snr_one_min, faintSnrMax=snr_one_max,
brightSnr=snr_two_min, brightSnrMax=snr_two_max,
)
for bin_offset in [0, 1]:
model_phot_rep = measurePhotRepeat(
metrics[f'validate_drp.{prefix_metric}PhotRep{source}{bin_base + bin_offset}'],
filterName,
matchedDataset.goodMatches if bin_offset == 0 else matchedDataset.safeMatches,
matches.matchesFaint if bin_offset == 0 else matches.matchesBright,
key_model_mag)
measurements.append(model_phot_rep)

return measurements
2 changes: 1 addition & 1 deletion python/lsst/validate/drp/calcsrd/amx.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def measureAMx(metric, matchedDataset, D, width=2., magRange=None, verbose=False
and to astrometric measurements performed in the r and i bands.
"""

matches = matchedDataset.safeMatches
matches = matchedDataset.matchesBright

datums = {}

Expand Down
2 changes: 1 addition & 1 deletion python/lsst/validate/drp/calcsrd/tex.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def measureTEx(metric, matchedDataset, D, bin_range_operator, verbose=False):
Table 27: These residual PSF ellipticity correlations apply to the r and i bands.
"""

matches = matchedDataset.safeMatches
matches = matchedDataset.matchesBright

datums = {}
datums['D'] = Datum(quantity=D, description="Separation distance")
Expand Down
159 changes: 99 additions & 60 deletions python/lsst/validate/drp/matchreduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
for measurement classes, plotting functions, and JSON persistence.
"""

__all__ = ['build_matched_dataset', 'reduceSources']
__all__ = ['build_matched_dataset', 'getKeysFilter', 'filterSources', 'summarizeSources']

import numpy as np
import astropy.units as u
Expand All @@ -35,6 +35,7 @@
SOURCE_IO_NO_FOOTPRINTS)
import lsst.afw.table as afwTable
from lsst.afw.fits import FitsError
import lsst.pipe.base as pipeBase
from lsst.verify import Blob, Datum

from .util import (getCcdKeyName, raftSensorToInt, positionRmsFromCat,
Expand Down Expand Up @@ -96,25 +97,22 @@ def build_matched_dataset(repo, dataIds, matchRadius=None, safeSnr=50.,
RMS of sky coordinates of stars over multiple visits (milliarcseconds).
*Not serialized.*
goodMatches
all good matches, as an afw.table.GroupView;
good matches contain only objects whose detections all have
matchesFaint : `afw.table.GroupView`
Faint matches containing only objects that have:
1. a PSF Flux measurement with S/N > 1
2. a finite (non-nan) PSF magnitude. This separate check is largely
1. A PSF Flux measurement with sufficient S/N.
2. A finite (non-nan) PSF magnitude. This separate check is largely
to reject failed zeropoints.
3. and do not have flags set for bad, cosmic ray, edge or saturated
3. No flags set for bad, cosmic ray, edge or saturated.
4. Extendedness consistent with a point source.
*Not serialized.*
safeMatches
safe matches, as an afw.table.GroupView. Safe matches
are good matches that are sufficiently bright and sufficiently
compact.
matchesBright : `afw.table.GroupView`
Bright matches matching a higher S/N threshold than matchesFaint.
*Not serialized.*
magKey
Key for `"base_PsfFlux_mag"` in the `goodMatches` and `safeMatches`
Key for `"base_PsfFlux_mag"` in the `matchesFaint` and `matchesBright`
catalog tables.
*Not serialized.*
Expand Down Expand Up @@ -163,7 +161,7 @@ def build_matched_dataset(repo, dataIds, matchRadius=None, safeSnr=50.,
blob.magKey = blob._matchedCatalog.schema.find("base_PsfFlux_mag").key
# Reduce catalogs into summary statistics.
# These are the serialiable attributes of this class.
reduceSources(blob, blob._matchedCatalog, safeSnr)
summarizeSources(blob, filterSources(blob._matchedCatalog, brightSnr=safeSnr))
return blob


Expand Down Expand Up @@ -345,101 +343,142 @@ def _loadAndMatchCatalogs(repo, dataIds, matchRadius,
return srcVis, allMatches


def reduceSources(blob, allMatches, goodSnr=5.0, safeSnr=50.0, safeExtendedness=1.0, extended=False,
nameFluxKey=None, goodSnrMax=np.Inf, safeSnrMax=np.Inf):
"""Calculate summary statistics for each star. These are persisted
as object attributes.
def getKeysFilter(schema, nameFluxKey=None):
""" Get schema keys for filtering sources.
schema : `lsst.afw.table.Schema`
A table schema to retrieve keys from.
nameFluxKey : `str`
The name of a flux field to retrieve
Returns
-------
keys : `lsst.pipe.base.Struct`
A struct storing schema keys to aggregate over.
"""
if nameFluxKey is None:
nameFluxKey = "base_PsfFlux"
# Filter down to matches with at least 2 sources and good flags

return pipeBase.Struct(
flags=[schema.find("base_PixelFlags_flag_%s" % flag).key
for flag in ("saturated", "cr", "bad", "edge")],
snr=schema.find(f"{nameFluxKey}_snr").key,
mag=schema.find(f"{nameFluxKey}_mag").key,
magErr=schema.find(f"{nameFluxKey}_magErr").key,
extended=schema.find("base_ClassificationExtendedness_value").key,
)


def filterSources(allMatches, keys=None, faintSnr=5.0, brightSnr=50.0, safeExtendedness=1.0, extended=False,
faintSnrMax=np.Inf, brightSnrMax=np.Inf):
"""Filter matched sources on flags and SNR.
Parameters
----------
blob : `lsst.verify.blob.Blob`
A verification blob to store Datums in.
allMatches : `lsst.afw.table.GroupView`
GroupView object with matches.
goodSnr : float, optional
Minimum median SNR for a match to be considered "good"; default 3.
safeSnr : float, optional
Minimum median SNR for a match to be considered "safe"; default 50.
keys : `lsst.pipe.base.Struct`
A struct storing schema keys to aggregate over.
faintSnr : float, optional
Minimum median SNR for a faint source match; default 5.
brightSnr : float, optional
Minimum median SNR for a bright source match; default 50.
safeExtendedness: float, optional
Maximum (exclusive) extendedness for sources or minimum (inclusive) if extended==True.
extended: bool, optional
Whether to select extended sources, i.e. galaxies.
goodSnrMax : float, optional
Maximum median SNR for a match to be considered "good"; default np.Inf.
safeSnrMax : float, optional
Maximum median SNR for a match to be considered "safe"; default np.Inf.
faintSnrMax : float, optional
Maximum median SNR for a faint source match; default np.Inf.
brightSnrMax : float, optional
Maximum median SNR for a bright source match; default np.Inf.
Returns
-------
filterResult : `lsst.pipe.base.Struct`
A struct containing good and safe matches and the necessary keys to use them.
"""
if nameFluxKey is None:
nameFluxKey = "slot_ModelFlux" if extended else "base_PsfFlux"
# Filter down to matches with at least 2 sources and good flags
flagKeys = [allMatches.schema.find("base_PixelFlags_flag_%s" % flag).key
for flag in ("saturated", "cr", "bad", "edge")]
if keys is None:
keys = getKeysFilter(allMatches.schema, "slot_ModelFlux" if extended else "base_PsfFlux")
nMatchesRequired = 2

snrKey = allMatches.schema.find(f"{nameFluxKey}_snr").key
magKey = allMatches.schema.find(f"{nameFluxKey}_mag").key
magErrKey = allMatches.schema.find(f"{nameFluxKey}_magErr").key
extendedKey = allMatches.schema.find("base_ClassificationExtendedness_value").key

snrMin, snrMax = goodSnr, goodSnrMax
snrMin, snrMax = faintSnr, faintSnrMax

def extendedFilter(cat):
if len(cat) < nMatchesRequired:
return False
for flagKey in flagKeys:
for flagKey in keys.flags:
if cat.get(flagKey).any():
return False
if not np.isfinite(cat.get(magKey)).all():
if not np.isfinite(cat.get(keys.mag)).all():
return False
extendedness = cat.get(extendedKey)
extendedness = cat.get(keys.extended)
return np.min(extendedness) >= safeExtendedness if extended else \
np.max(extendedness) < safeExtendedness

def snrFilter(cat):
# Note that this also implicitly checks for psfSnr being non-nan.
snr = np.median(cat.get(snrKey))
snr = np.median(cat.get(keys.snr))
return snrMax >= snr >= snrMin

def fullFilter(cat):
return extendedFilter(cat) and snrFilter(cat)

# If safeSnr range is a subset of goodSnr, it's safe to only filter on snr again
# If brightSnr range is a subset of faintSnr, it's safe to only filter on snr again
# Otherwise, filter on flags/extendedness first, then snr
isSafeSubset = goodSnrMax >= safeSnrMax and goodSnr <= safeSnr
goodMatches = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
snrMin, snrMax = safeSnr, safeSnrMax
safeMatches = goodMatches.where(snrFilter)
isSafeSubset = faintSnrMax >= brightSnrMax and faintSnr <= brightSnr
matchesFaint = allMatches.where(fullFilter) if isSafeSubset else allMatches.where(extendedFilter)
snrMin, snrMax = brightSnr, brightSnrMax
matchesBright = matchesFaint.where(snrFilter)
# This means that matchesFaint has had extendedFilter but not snrFilter applied
if not isSafeSubset:
snrMin, snrMax = goodSnr, goodSnrMax
goodMatches = goodMatches.where(snrFilter)
snrMin, snrMax = faintSnr, faintSnrMax
matchesFaint = matchesFaint.where(snrFilter)

return pipeBase.Struct(
extended=extended, keys=keys, matchesFaint=matchesFaint, matchesBright=matchesBright,
)


def summarizeSources(blob, filterResult):
"""Calculate summary statistics for each source. These are persisted
as object attributes.
Parameters
----------
blob : `lsst.verify.blob.Blob`
A verification blob to store Datums in.
filterResult : `lsst.pipe.base.Struct`
A struct containing bright and faint filter matches, as returned by `filterSources`.
"""
# Pass field=psfMagKey so np.mean just gets that as its input
typeMag = "model" if extended else "PSF"
typeMag = "model" if filterResult.extended else "PSF"
filter_name = blob['filterName']
source_type = f'{"extended" if extended else "point"} sources"'
blob['snr'] = Datum(quantity=goodMatches.aggregate(np.median, field=snrKey) * u.Unit(''),
source_type = f'{"extended" if filterResult.extended else "point"} sources"'
matches = filterResult.matchesFaint
keys = filterResult.keys
blob['snr'] = Datum(quantity=matches.aggregate(np.median, field=keys.snr) * u.Unit(''),
label='SNR({band})'.format(band=filter_name),
description=f'Median signal-to-noise ratio of {typeMag} magnitudes for {source_type}'
f' over multiple visits')
blob['mag'] = Datum(quantity=goodMatches.aggregate(np.mean, field=magKey) * u.mag,
blob['mag'] = Datum(quantity=matches.aggregate(np.mean, field=keys.mag) * u.mag,
label='{band}'.format(band=filter_name),
description=f'Mean of {typeMag} magnitudes for {source_type} over multiple visits')
blob['magrms'] = Datum(quantity=goodMatches.aggregate(np.std, field=magKey) * u.mag,
blob['magrms'] = Datum(quantity=matches.aggregate(np.std, field=keys.mag) * u.mag,
label='RMS({band})'.format(band=filter_name),
description=f'RMS of {typeMag} magnitudes for {source_type} over multiple visits')
blob['magerr'] = Datum(quantity=goodMatches.aggregate(np.median, field=magErrKey) * u.mag,
blob['magerr'] = Datum(quantity=matches.aggregate(np.median, field=keys.magErr) * u.mag,
label='sigma({band})'.format(band=filter_name),
description=f'Median 1-sigma uncertainty of {typeMag} magnitudes for {source_type}'
f' over multiple visits')
# positionRmsFromCat knows how to query a group
# so we give it the whole thing by going with the default `field=None`.
blob['dist'] = Datum(quantity=goodMatches.aggregate(positionRmsFromCat) * u.milliarcsecond,
blob['dist'] = Datum(quantity=matches.aggregate(positionRmsFromCat) * u.milliarcsecond,
label='d',
description=f'RMS of sky coordinates of {source_type} over multiple visits')

# These attributes are not serialized
blob.goodMatches = goodMatches
blob.safeMatches = safeMatches
blob.matchesFaint = filterResult.matchesFaint
blob.matchesBright = filterResult.matchesBright


def _loadPhotoCalib(butler, dataId, doApplyExternalPhotoCalib, externalPhotoCalibName):
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/validate/drp/photerrmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def build_photometric_error_model(matchedMultiVisitDataset, brightSnr=100, media
matchedMultiVisitDataset['magerr'].quantity,
matchedMultiVisitDataset['magrms'].quantity,
matchedMultiVisitDataset['dist'].quantity,
len(matchedMultiVisitDataset.goodMatches),
len(matchedMultiVisitDataset.matchesFaint),
brightSnr,
medianRef,
matchRef)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/validate/drp/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def add_measurement(measurement):
add_measurement(afx)

pa1 = measurePA1(
metrics['validate_drp.PA1'], filterName, matchedDataset.safeMatches, matchedDataset.magKey)
metrics['validate_drp.PA1'], filterName, matchedDataset.matchesBright, matchedDataset.magKey)
add_measurement(pa1)

if not skipNonSrd:
Expand Down

0 comments on commit 709c306

Please sign in to comment.