Allow DatasetType to be marked for later load

Although DatasetTypes need to be known prior to task execution to build a QuantumGraph, not all data products need to be loaded before task execution. This change introduces a way to indicate a DatasetType will be manually read in and handled by a task.
lsst · Jan 16, 2019 · 308f1ea · 308f1ea
1 parent 2c6bf4a
commit 308f1ea
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 6 deletions.
diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py
@@ -129,7 +129,8 @@ def wrappedFunc(*, doc, storageClass, check=None, name="", nameTemplate=''):
         extraFields = ""
     elif issubclass(dtype, _DatasetTypeConfig):
         # Handle dataset types like InputDatasetConfig, note these take a dimensions argument
-        def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate=''):
+        def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=None, nameTemplate='',
+                        manualLoad=False):
             return factory(**{k: v for k, v in locals().items() if k != 'factory'})
         # Set the string corresponding to the dimensions parameter documentation
         # formatting is to support final output of the docstring variable
@@ -145,11 +146,15 @@ def wrappedFunc(*, doc, dimensions, storageClass, name="", scalar=False, check=N
                 Template for the `name` field which is specified as a python formattable
                 string. The template is formatted during the configuration of a Config
                 class with a user defined string. Defaults to empty string, in which
-                case no formatting is done."""
+                case no formatting is done.
+            manualLoad : `bool`
+                Indicates runQuantum will not load the data from the butler, and that
+                the task intends to do the loading itself. Defaults to False
+            """
         # Set a string to add the dimensions argument to the list of arguments in the
         # docstring explanation section formatting is to support final output
         # of the docstring variable
-        extraFields = ", dimensions, scalar, nameTemplate"
+        extraFields = ", dimensions, scalar, nameTemplate, manualLoad"
     else:
         # if someone tries to create a config factory for a type that is not
         # handled raise and exception
@@ -245,6 +250,13 @@ class should not be used directly, instead one of `InputDatasetConfig` or
                                   "objects/DataIds will be unpacked before calling task "
                                   "methods, returned data is expected to contain single "
                                   "objects as well."))
+    manualLoad = pexConfig.Field(dtype=bool,
+                                 default=False,
+                                 optional=True,
+                                 doc=("If this is set to True, the class intendes to load "
+                                      "the data associated with this Configurable Field "
+                                      "manually, and runQuantum should not load it. Should "
+                                      "not be set by configuration override"))
 
 
 class InputDatasetConfig(_DatasetTypeConfig):

diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py
@@ -337,6 +337,7 @@ def _dataRefKey(dataRef):
                     for key in dataRefs.keys():
                         if dataRefs[key].id is not None:
                             dataRefs[key] = self.registry.getDataset(dataRefs[key].id)
+                            self.registry.expandDataId(dataRefs[key].dataId, region=True)
 
             # all nodes for this task
             quanta = []

diff --git a/python/lsst/pipe/base/pipelineTask.py b/python/lsst/pipe/base/pipelineTask.py
@@ -61,9 +61,11 @@ class DatasetTypeDescriptor:
         `True` if this is a scalar dataset.
     """
 
-    def __init__(self, datasetType, scalar):
+    def __init__(self, datasetType, scalar, manualLoad):
         self._datasetType = datasetType
         self._scalar = scalar
+        self._manualLoad = manualLoad
+
 
     @classmethod
     def fromConfig(cls, datasetConfig):
@@ -84,7 +86,8 @@ def fromConfig(cls, datasetConfig):
                                   storageClass=datasetConfig.storageClass)
         # Use scalar=True for Init dataset types
         scalar = getattr(datasetConfig, 'scalar', True)
-        return cls(datasetType=datasetType, scalar=scalar)
+        manualLoad = getattr(datasetConfig, 'manualLoad', False)
+        return cls(datasetType=datasetType, scalar=scalar, manualLoad=manualLoad)
 
     @property
     def datasetType(self):
@@ -98,6 +101,12 @@ def scalar(self):
         """
         return self._scalar
 
+    @property
+    def manualLoad(self):
+        """`True` if the task will handle loading the data
+        """
+        return self._manualLoad
+
 
 class PipelineTask(Task):
     """Base class for all pipeline tasks.
@@ -478,7 +487,8 @@ def makeDataRefs(descriptors, refMap):
                     keyDataRefs = keyDataRefs[0]
                     keyDataIds = keyDataIds[0]
                 dataIds[key] = keyDataIds
-                dataRefs[key] = keyDataRefs
+                if not descriptor.manualLoad:
+                    dataRefs[key] = keyDataRefs
             return dataIds, dataRefs
 
         # lists of DataRefs/DataIds for input datasets