Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-25782: Add an "detect_isPrimary" flag to single frame processing schema #400

Merged
merged 4 commits into from
Aug 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/lsst/pipe/tasks/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
ApplyApCorrTask,
CatalogCalculationTask)
from lsst.meas.deblender import SourceDeblendTask
from lsst.pipe.tasks.setPrimaryFlags import SetPrimaryFlagsTask
from .fakes import BaseFakeSourcesTask
from .photoCal import PhotoCalTask

Expand Down Expand Up @@ -254,6 +255,11 @@ class CalibrateConfig(pipeBase.PipelineTaskConfig, pipelineConnections=Calibrate
target=SingleFrameMeasurementTask,
doc="Measure sources"
)
setPrimaryFlags = pexConfig.ConfigurableField(
target=SetPrimaryFlagsTask,
doc=("Set flags for primary source classification in single frame "
"processing. True if sources are not sky sources and not a parent.")
)
doApCorr = pexConfig.Field(
dtype=bool,
default=True,
Expand Down Expand Up @@ -499,6 +505,7 @@ def __init__(self, butler=None, astromRefObjLoader=None,
self.skySourceKey = self.schema.addField("sky_source", type="Flag", doc="Sky objects.")
self.makeSubtask('measurement', schema=self.schema,
algMetadata=self.algMetadata)
self.makeSubtask("setPrimaryFlags", schema=self.schema, isSingleFrame=True)
if self.config.doApCorr:
self.makeSubtask('applyApCorr', schema=self.schema)
self.makeSubtask('catalogCalculation', schema=self.schema)
Expand Down Expand Up @@ -698,6 +705,8 @@ def run(self, exposure, exposureIdInfo=None, background=None,
)
self.catalogCalculation.run(sourceCat)

self.setPrimaryFlags.run(sourceCat, includeDeblend=self.config.doDeblend)

if icSourceCat is not None and \
len(self.config.icSourceFieldsToCopy) > 0:
self.copyIcSourceFields(icSourceCat=icSourceCat,
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/pipe/tasks/multiBand.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,8 +1093,8 @@ def run(self, exposure, sources, skyInfo, exposureId, ccdInputs=None, visitCatal
if self.config.doRunCatalogCalculation:
self.catalogCalculation.run(sources)

self.setPrimaryFlags.run(sources, skyInfo.skyMap, skyInfo.tractInfo, skyInfo.patchInfo,
includeDeblend=self.deblended)
self.setPrimaryFlags.run(sources, skyMap=skyInfo.skyMap, tractInfo=skyInfo.tractInfo,
patchInfo=skyInfo.patchInfo, includeDeblend=self.deblended)
if self.config.doPropagateFlags:
self.propagateFlags.run(butler, sources, ccdInputs, exposure.getWcs(), visitCatalogs, wcsUpdates)

Expand Down
169 changes: 106 additions & 63 deletions python/lsst/pipe/tasks/setPrimaryFlags.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,82 +34,125 @@ class SetPrimaryFlagsConfig(Config):


class SetPrimaryFlagsTask(Task):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this brief class docstring:

"""Add isPrimaryKey to a given schema.

Parameters
----------
schema :
    The input schema.
isSingleFrame : `bool`
    Flag specifying if task is operating with single frame imaging.
kwargs :
    Keyword arguments passed to the task.
"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! The only missing piece is the type for schema. I don't know it offhand... and that's exactly why having it in the docs is useful :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, thanks! Added (it's lsst.afw.table.Schema!)

"""Add isPrimaryKey to a given schema.

Parameters
----------
schema : `lsst.afw.table.Schema`
The input schema.
isSingleFrame : `bool`
Flag specifying if task is operating with single frame imaging.
kwargs :
Keyword arguments passed to the task.
"""

ConfigClass = SetPrimaryFlagsConfig

def __init__(self, schema, **kwargs):
def __init__(self, schema, isSingleFrame=False, **kwargs):
Task.__init__(self, **kwargs)
self.schema = schema
self.isPatchInnerKey = self.schema.addField(
"detect_isPatchInner", type="Flag",
doc="true if source is in the inner region of a coadd patch",
)
self.isTractInnerKey = self.schema.addField(
"detect_isTractInner", type="Flag",
doc="true if source is in the inner region of a coadd tract",
)
self.isSingleFrame = isSingleFrame
if not self.isSingleFrame:
primaryDoc = ("true if source has no children and is in the inner region of a coadd patch "
"and is in the inner region of a coadd tract "
"and is not \"detected\" in a pseudo-filter (see config.pseudoFilterList)")
self.isPatchInnerKey = self.schema.addField(
"detect_isPatchInner", type="Flag",
doc="true if source is in the inner region of a coadd patch",
)
self.isTractInnerKey = self.schema.addField(
"detect_isTractInner", type="Flag",
doc="true if source is in the inner region of a coadd tract",
)
else:
primaryDoc = "true if source has no children and is not a sky source"
self.isPrimaryKey = self.schema.addField(
"detect_isPrimary", type="Flag",
doc="true if source has no children and is in the inner region of a coadd patch "
"and is in the inner region of a coadd tract "
"and is not \"detected\" in a pseudo-filter (see config.pseudoFilterList)",
doc=primaryDoc,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need both "doc" and "primaryDoc"? Where do these each appear? I also noticed the coadd primary doc does not specify anything about sky sources, while the single frame one does. Is that intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, the doc string for SetPrimaryFlagsTask is different depending on whether or not self.isSingleFrame is True (line 55) or False (line 43). If true, the doc string refers only to children and sky sources (or pseudo-filters, see below), however, if false, the doc string additionally refers to coadd patch and tract selections (which would not make sense in the single frame output). We use the if logic above to define primaryDoc, which is then subsequently placed into the main self.isPrimaryKey doc string. I think that's the best way to do it, but happy to consider another approach if you have something in mind?

With regards not mentioning sky sources in the coadd primary doc, the coadd primary doc refers to "pseduo-filters", of which sky sources are included. I agree that the language here may not be immediately obvious, but this was a decision made prior to this ticket and so we were reluctant to change it. I think the logic is that other additional 'pseudo-filters' can be added in addition to sky sources which would then be picked up by this task at the coadd level, but currently, only sky sources are implemented.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that is subtle! The order in which they're defined threw me off. For coadds, you define primaryDoc after the two docs because it includes them both. Can you follow this order for single frame even though it's not necessary, for consistency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I don't quite follow. The two extra docs being defined for coadds are isPatchInnerKey and isTractInnerKey. These docs don't apply in the single frame case, so they're not being set there (so there's nothing to move primaryDoc above or below of). The single frame __init__ doc string is just the primaryDoc, which then gets added to isPrimaryKey. Sorry if I've missed something here, please let me know! (Edit: we could move the primaryDoc to the top of the coadd if block so that it's the first thing read, if that helps?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry :) I'm suggesting delete line 56, explicitly set doc = "true if source has no children and is not a sky source" on line 59, and then explicitly define primaryDoc as the last thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks again for the feedback here. Marking the issue as resolved following our earlier discussion, cheers!

)

def run(self, sources, skyMap, tractInfo, patchInfo, includeDeblend=True):
"""Set is-primary and related flags on sources
def run(self, sources, skyMap=None, tractInfo=None, patchInfo=None,
includeDeblend=True):
"""Set is-patch-inner, is-tract-inner and is-primary flags on sources.
For coadded imaging, the is-primary flag returns True when an object
has no children, is in the inner region of a coadd patch, is in the
inner region of a coadd trach, and is not detected in a pseudo-filter
(e.g., a sky_object).
For single frame imaging, the is-primary flag returns True when a
source has no children and is not a sky source.

@param[in,out] sources a SourceTable
- reads centroid fields and an nChild field
- writes is-patch-inner, is-tract-inner and is-primary flags
@param[in] skyMap sky tessellation object (subclass of lsst.skymap.BaseSkyMap)
@param[in] tractInfo tract object (subclass of lsst.skymap.TractInfo)
@param[in] patchInfo patch object (subclass of lsst.skymap.PatchInfo)
@param[in] includeDeblend include deblend information in isPrimary?
Parameters
----------
sources : `lsst.afw.table.SourceCatalog`
A sourceTable. Reads in centroid fields and an nChild field.
Writes is-patch-inner, is-tract-inner, and is-primary flags.
skyMap : `lsst.skymap.BaseSkyMap`
Sky tessellation object
tractInfo : `lsst.skymap.TractInfo`
Tract object
patchInfo : `lsst.skymap.PatchInfo`
Patch object
includeDeblend : `bool`
Include deblend information in isPrimary?
"""
nChildKey = None
if includeDeblend:
nChildKey = self.schema.find(self.config.nChildKeyName).key

# set inner flags for each source and set primary flags for sources with no children
# (or all sources if deblend info not available)
innerFloatBBox = Box2D(patchInfo.getInnerBBox())

# When the centroider fails, we can still fall back to the peak, but we don't trust
# that quite as much - so we use a slightly smaller box for the patch comparison.
# That's trickier for the tract comparison, so we just use the peak without extra
# care there.
shrunkInnerFloatBBox = Box2D(innerFloatBBox)
shrunkInnerFloatBBox.grow(-1)

pseudoFilterKeys = []
for filt in self.config.pseudoFilterList:
try:
pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
except Exception:
self.log.warn("merge_peak is not set for pseudo-filter %s" % filt)

tractId = tractInfo.getId()
for source in sources:
centroidPos = source.getCentroid()
if numpy.any(numpy.isnan(centroidPos)):
continue
if source.getCentroidFlag():
# Use a slightly smaller box to guard against bad centroids (see above)
isPatchInner = shrunkInnerFloatBBox.contains(centroidPos)
else:
isPatchInner = innerFloatBBox.contains(centroidPos)
source.setFlag(self.isPatchInnerKey, isPatchInner)

skyPos = source.getCoord()
sourceInnerTractId = skyMap.findTract(skyPos).getId()
isTractInner = sourceInnerTractId == tractId
source.setFlag(self.isTractInnerKey, isTractInner)

if nChildKey is None or source.get(nChildKey) == 0:
for pseudoFilterKey in pseudoFilterKeys:
if source.get(pseudoFilterKey):
isPseudo = True
break
# coadd case
if not self.isSingleFrame:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment explicitly stating this is the coadd case

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment added.

# set inner flags for each source and set primary flags for sources with no children
# (or all sources if deblend info not available)
innerFloatBBox = Box2D(patchInfo.getInnerBBox())

# When the centroider fails, we can still fall back to the peak, but we don't trust
# that quite as much - so we use a slightly smaller box for the patch comparison.
# That's trickier for the tract comparison, so we just use the peak without extra
# care there.
shrunkInnerFloatBBox = Box2D(innerFloatBBox)
shrunkInnerFloatBBox.grow(-1)

pseudoFilterKeys = []
for filt in self.config.pseudoFilterList:
try:
pseudoFilterKeys.append(self.schema.find("merge_peak_%s" % filt).getKey())
except Exception:
self.log.warn("merge_peak is not set for pseudo-filter %s" % filt)

tractId = tractInfo.getId()
for source in sources:
centroidPos = source.getCentroid()
if numpy.any(numpy.isnan(centroidPos)):
continue
if source.getCentroidFlag():
# Use a slightly smaller box to guard against bad centroids (see above)
isPatchInner = shrunkInnerFloatBBox.contains(centroidPos)
else:
isPseudo = False
isPatchInner = innerFloatBBox.contains(centroidPos)
source.setFlag(self.isPatchInnerKey, isPatchInner)

skyPos = source.getCoord()
sourceInnerTractId = skyMap.findTract(skyPos).getId()
isTractInner = sourceInnerTractId == tractId
source.setFlag(self.isTractInnerKey, isTractInner)

if nChildKey is None or source.get(nChildKey) == 0:
for pseudoFilterKey in pseudoFilterKeys:
if source.get(pseudoFilterKey):
isPseudo = True
break
else:
isPseudo = False

source.setFlag(self.isPrimaryKey, isPatchInner and isTractInner and not isPseudo)

source.setFlag(self.isPrimaryKey, isPatchInner and isTractInner and not isPseudo)
# single frame case
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add comment indicating this is the single frame case, especially since the if was quite long

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments added prior to the start of both if and else.

hasSkySources = True if "sky_source" in sources.schema else False
for source in sources:
hasNoChildren = True if nChildKey is None or source.get(nChildKey) == 0 else False
isSkySource = False
if hasSkySources:
if source["sky_source"]:
isSkySource = True
source.setFlag(self.isPrimaryKey, hasNoChildren and not isSkySource)
75 changes: 75 additions & 0 deletions tests/test_isPrimaryFlag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# This file is part of pipe_tasks.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import os
import unittest

import lsst.afw.image as afwImage
import lsst.utils.tests
from lsst.utils import getPackageDir
from lsst.log import Log
from lsst.pipe.tasks.characterizeImage import CharacterizeImageTask, CharacterizeImageConfig
from lsst.pipe.tasks.calibrate import CalibrateTask, CalibrateConfig


class IsPrimaryTestCase(lsst.utils.tests.TestCase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it would be quick to add a parallel test for the coadd case, that would be lovely, but it's not essential.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback on this. On balance, as this ticket is focussed on the single frame case, we think it best to push the coadd unit test to a future ticket and leave this one as-is, but this is certainly something worth coming back to once this has been merged.


def setUp(self):
# Load sample input from disk
expPath = os.path.join(getPackageDir("pipe_tasks"), "tests", "data", "v695833-e0-c000-a00.sci.fits")
self.exposure = afwImage.ExposureF(expPath)
# set log level so that warnings do not display
Log.getLogger("calibrate").setLevel(Log.ERROR)

def tearDown(self):
del self.exposure

def testIsPrimaryFlag(self):
"""Tests detect_isPrimary column gets added when run, and that sources
labelled as detect_isPrimary are not sky sources and have no children.
"""
charImConfig = CharacterizeImageConfig()
charImTask = CharacterizeImageTask(config=charImConfig)
charImResults = charImTask.run(self.exposure)
calibConfig = CalibrateConfig()
calibConfig.doAstrometry = False
calibConfig.doPhotoCal = False
calibTask = CalibrateTask(config=calibConfig)
calibResults = calibTask.run(charImResults.exposure)
outputCat = calibResults.outputCat
self.assertTrue("detect_isPrimary" in outputCat.schema.getNames())
# make sure all sky sources are flagged as not primary
self.assertEqual(sum((outputCat["detect_isPrimary"]) & (outputCat["sky_source"])), 0)
# make sure all parent sources are flagged as not primary
self.assertEqual(sum((outputCat["detect_isPrimary"]) & (outputCat["deblend_nChild"] > 0)), 0)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass


def setup_module(module):
lsst.utils.tests.init()


if __name__ == "__main__":
lsst.utils.tests.init()
unittest.main()
2 changes: 1 addition & 1 deletion tests/test_skySources.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This file is part of package_name.
# This file is part of pipe_tasks.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
Expand Down