Skip to content

Commit

Permalink
Allow DatasetType to be marked for later load
Browse files Browse the repository at this point in the history
Although DatasetTypes need to be known prior to task execution to
build a QuantumGraph, not all data products need to be loaded before
task execution. This change introduces a way to indicate a
DatasetType will be manually read in and handled by a task.
  • Loading branch information
natelust committed Jan 16, 2019
1 parent 2c6bf4a commit 308f1ea
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
18 changes: 15 additions & 3 deletions python/lsst/pipe/base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ def wrappedFunc(*, doc, storageClass, check=None, name="", nameTemplate=''):
extraFields = ""
elif issubclass(dtype, _DatasetTypeConfig):
# Handle dataset types like InputDatasetConfig, note these take a dimensions argument
def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate=''):
def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate='',
manualLoad=False):
return factory(**{k: v for k, v in locals().items() if k != 'factory'})
# Set the string corresponding to the dimensions parameter documentation
# formatting is to support final output of the docstring variable
Expand All @@ -145,11 +146,15 @@ def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=N
Template for the `name` field which is specified as a python formattable
string. The template is formatted during the configuration of a Config
class with a user defined string. Defaults to empty string, in which
case no formatting is done."""
case no formatting is done.
manualLoad : `bool`
Indicates runQuantum will not load the data from the butler, and that
the task intends to do the loading itself. Defaults to False
"""
# Set a string to add the dimensions argument to the list of arguments in the
# docstring explanation section formatting is to support final output
# of the docstring variable
extraFields = ", dimensions, scalar, nameTemplate"
extraFields = ", dimensions, scalar, nameTemplate, manualLoad"
else:
# if someone tries to create a config factory for a type that is not
# handled raise and exception
Expand Down Expand Up @@ -245,6 +250,13 @@ class should not be used directly, instead one of `InputDatasetConfig` or
"objects/DataIds will be unpacked before calling task "
"methods, returned data is expected to contain single "
"objects as well."))
manualLoad = pexConfig.Field(dtype=bool,
default=False,
optional=True,
doc=("If this is set to True, the class intendes to load "
"the data associated with this Configurable Field "
"manually, and runQuantum should not load it. Should "
"not be set by configuration override"))


class InputDatasetConfig(_DatasetTypeConfig):
Expand Down
1 change: 1 addition & 0 deletions python/lsst/pipe/base/graphBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def _dataRefKey(dataRef):
for key in dataRefs.keys():
if dataRefs[key].id is not None:
dataRefs[key] = self.registry.getDataset(dataRefs[key].id)
self.registry.expandDataId(dataRefs[key].dataId, region=True)

# all nodes for this task
quanta = []
Expand Down
16 changes: 13 additions & 3 deletions python/lsst/pipe/base/pipelineTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ class DatasetTypeDescriptor:
`True` if this is a scalar dataset.
"""

def __init__(self, datasetType, scalar):
def __init__(self, datasetType, scalar, manualLoad):
self._datasetType = datasetType
self._scalar = scalar
self._manualLoad = manualLoad


@classmethod
def fromConfig(cls, datasetConfig):
Expand All @@ -84,7 +86,8 @@ def fromConfig(cls, datasetConfig):
storageClass=datasetConfig.storageClass)
# Use scalar=True for Init dataset types
scalar = getattr(datasetConfig, 'scalar', True)
return cls(datasetType=datasetType, scalar=scalar)
manualLoad = getattr(datasetConfig, 'manualLoad', False)
return cls(datasetType=datasetType, scalar=scalar, manualLoad=manualLoad)

@property
def datasetType(self):
Expand All @@ -98,6 +101,12 @@ def scalar(self):
"""
return self._scalar

@property
def manualLoad(self):
"""`True` if the task will handle loading the data
"""
return self._manualLoad


class PipelineTask(Task):
"""Base class for all pipeline tasks.
Expand Down Expand Up @@ -478,7 +487,8 @@ def makeDataRefs(descriptors, refMap):
keyDataRefs = keyDataRefs[0]
keyDataIds = keyDataIds[0]
dataIds[key] = keyDataIds
dataRefs[key] = keyDataRefs
if not descriptor.manualLoad:
dataRefs[key] = keyDataRefs
return dataIds, dataRefs

# lists of DataRefs/DataIds for input datasets
Expand Down

0 comments on commit 308f1ea

Please sign in to comment.