Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-23026: Make sure ic source fields are being propagated #334

Merged
merged 3 commits into from
Feb 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions python/lsst/pipe/tasks/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,12 @@ class CalibrateConnections(pipeBase.PipelineTaskConnections, dimensions=("instru
doc="Schema produced by characterize image task, used to initialize this task",
name="icSrc_schema",
storageClass="SourceCatalog",
multiple=True
)

outputSchema = cT.InitOutput(
doc="Schema after CalibrateTask has been initialized",
name="src_schema",
storageClass="SourceCatalog",
multiple=True
)

exposure = cT.Input(
Expand Down
111 changes: 106 additions & 5 deletions python/lsst/pipe/tasks/processCcdWithFakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from lsst.obs.base import ExposureIdInfo
from lsst.pipe.base import PipelineTask, PipelineTaskConfig, CmdLineTask, PipelineTaskConnections
import lsst.pipe.base.connectionTypes as cT
import lsst.afw.table as afwTable


__all__ = ["ProcessCcdWithFakesConfig", "ProcessCcdWithFakesTask"]
Expand Down Expand Up @@ -109,6 +110,20 @@ class ProcessCcdWithFakesConfig(PipelineTaskConfig,
default="deep",
)

calibrationFieldsToCopy = pexConfig.ListField(
dtype=str,
default=("calib_psf_candidate", "calib_psf_used", "calib_psf_reserved"),
doc=("Fields to copy from the icSource catalog to the output catalog "
"for matching sources Any missing fields will trigger a "
"RuntimeError exception.")
)

matchRadiusPix = pexConfig.Field(
dtype=float,
default=3,
doc=("Match radius for matching icSourceCat objects to sourceCat objects (pixels)"),
)

insertFakes = pexConfig.ConfigurableField(target=InsertFakesTask,
doc="Configuration for the fake sources")

Expand All @@ -124,7 +139,7 @@ class ProcessCcdWithFakesConfig(PipelineTaskConfig,
doc="The apply aperture correction task to use.")

catalogCalculation = pexConfig.ConfigurableField(target=CatalogCalculationTask,
doc="The catalog calculation ask to use.")
doc="The catalog calculation task to use.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This belongs in a separate commit.


def setDefaults(self):
self.detection.reEstimateBackground = False
Expand Down Expand Up @@ -167,8 +182,8 @@ class ProcessCcdWithFakesTask(PipelineTask, CmdLineTask):
_DefaultName = "processCcdWithFakes"
ConfigClass = ProcessCcdWithFakesConfig

def __init__(self, schema=None, **kwargs):
"""Initalize tings! This should go above in the class docstring
def __init__(self, schema=None, butler=None, **kwargs):
"""Initalize things! This should go above in the class docstring
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two lines belong in separate commits.

"""

super().__init__(**kwargs)
Expand Down Expand Up @@ -221,8 +236,10 @@ def runDataRef(self, dataRef):
wcs = calexp.getWcs()
photoCalib = calexp.getPhotoCalib()

icSourceCat = dataRef.get("icSrc", immediate=True)

resultStruct = self.run(fakeCat, calexp, wcs=wcs, photoCalib=photoCalib,
exposureIdInfo=exposureIdInfo)
exposureIdInfo=exposureIdInfo, icSourceCat=icSourceCat)

dataRef.put(resultStruct.outputExposure, "fakes_calexp")
dataRef.put(resultStruct.outputCat, "fakes_src")
Expand All @@ -249,7 +266,7 @@ def _makeArgumentParser(cls):
ContainerClass=PerTractCcdDataIdContainer)
return parser

def run(self, fakeCat, exposure, wcs=None, photoCalib=None, exposureIdInfo=None):
def run(self, fakeCat, exposure, wcs=None, photoCalib=None, exposureIdInfo=None, icSourceCat=None):
"""Add fake sources to a calexp and then run detection, deblending and measurement.

Parameters
Expand Down Expand Up @@ -309,6 +326,90 @@ def run(self, fakeCat, exposure, wcs=None, photoCalib=None, exposureIdInfo=None)
self.measurement.run(measCat=sourceCat, exposure=exposure, exposureId=exposureIdInfo.expId)
self.applyApCorr.run(catalog=sourceCat, apCorrMap=exposure.getInfo().getApCorrMap())
self.catalogCalculation.run(sourceCat)
sourceCat = self.copyCalibrationFields(icSourceCat, sourceCat)

resultStruct = pipeBase.Struct(outputExposure=exposure, outputCat=sourceCat)
return resultStruct

def copyCalibrationFields(self, calibCat, sourceCat):
"""Match sources in calibCat and sourceCat and copy the specified fields

Parameters
----------
calibCat : `lsst.afw.table.SourceCatalog`
Catalog from which to copy fields.
sourceCat : `lsst.afw.table.SourceCatalog`
Catalog to which to copy fields.

Returns
-------
newCat : `lsst.afw.table.SourceCatalog`
Catalog which includes the copied fields.

The fields copied are those specified by `config.calibrationFieldsToCopy` that actually exist
in the schema of `calibCat`.

This version was based on and adapted from the one in calibrateTask.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How much code is shared? Would it be possible to factor it out of both places?
I've also wondered if it's worth making this operation a Task in its own right, to allow more extensive re-use.

"""

# Make a new SourceCatalog with the data from sourceCat so that we can add the new columns to it
sourceSchemaMapper = afwTable.SchemaMapper(sourceCat.schema)
sourceSchemaMapper.addMinimalSchema(sourceCat.schema, True)

calibSchemaMapper = afwTable.SchemaMapper(calibCat.schema, sourceCat.schema)

# Add the desired columns from the config option calibrationFieldsToCopy
missingFieldNames = []
for fieldName in self.config.calibrationFieldsToCopy:
if fieldName in calibCat.schema:
schemaItem = calibCat.schema.find(fieldName)
calibSchemaMapper.editOutputSchema().addField(schemaItem.getField())
schema = calibSchemaMapper.editOutputSchema()
calibSchemaMapper.addMapping(schemaItem.getKey(), schema.find(fieldName).getField())
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is prolly a matter of personal preference, but since everything you're doing is embedded in a for loop, I'd put a continue in the except block, get rid of the else and move its contents to the left --- I'd find that easier to read. Totally up to you.

missingFieldNames.append(fieldName)
if missingFieldNames:
raise RuntimeError(f"calibCat is missing fields {missingFieldNames} specified in "
"calibrationFieldsToCopy")

self.calibSourceKey = calibSchemaMapper.addOutputField(afwTable.Field["Flag"]("calib_detected",
"Source was detected as an icSource"))

schema = calibSchemaMapper.getOutputSchema()
newCat = afwTable.SourceCatalog(schema)
newCat.reserve(len(sourceCat))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's prolly not necessary (I think extend does it for you), but glad to see you're thinking about memory allocation issues.

newCat.extend(sourceCat, sourceSchemaMapper)

# Set the aliases so it doesn't complain.
for k, v in sourceCat.schema.getAliasMap().items():
newCat.schema.getAliasMap().set(k, v)

select = newCat["deblend_nChild"] == 0
matches = afwTable.matchXy(newCat[select], calibCat, self.config.matchRadiusPix)
# Check that no sourceCat sources are listed twice (we already know
# that each match has a unique calibCat source ID, due to using
# that ID as the key in bestMatches)
numMatches = len(matches)
numUniqueSources = len(set(m[1].getId() for m in matches))
if numUniqueSources != numMatches:
self.log.warn("%d calibCat sources matched only %d sourceCat sources", numMatches,
numUniqueSources)

self.log.info("Copying flags from calibCat to sourceCat for %s sources", numMatches)

# For each match: set the calibSourceKey flag and copy the desired
# fields
for src, calibSrc, d in matches:
src.setFlag(self.calibSourceKey, True)
# src.assign copies the footprint from calibSrc, which we don't want
# (DM-407)
# so set calibSrc's footprint to src's footprint before src.assign,
# then restore it
calibSrcFootprint = calibSrc.getFootprint()
try:
calibSrc.setFootprint(src.getFootprint())
src.assign(calibSrc, calibSchemaMapper)
finally:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

calibSrc.setFootprint(calibSrcFootprint)

return newCat