lsst · TallJimbo · Sep 26, 2020 · Sep 8, 2020 · Sep 9, 2020 · Sep 21, 2020
diff --git a/python/lsst/pipe/base/butlerQuantumContext.py b/python/lsst/pipe/base/butlerQuantumContext.py
@@ -71,11 +71,11 @@ def __init__(self, butler: Butler, quantum: Quantum):
         def _get(self, ref):
             if isinstance(ref, DeferredDatasetRef):
                 self._checkMembership(ref.datasetRef, self.allInputs)
-                return butler.getDeferred(ref.datasetRef)
+                return butler.getDirectDeferred(ref.datasetRef)
 
             else:
                 self._checkMembership(ref, self.allInputs)
-                return butler.get(ref)
+                return butler.getDirect(ref)
 
         def _put(self, value, ref):
             self._checkMembership(ref, self.allOutputs)

diff --git a/python/lsst/pipe/base/connectionTypes.py b/python/lsst/pipe/base/connectionTypes.py
@@ -37,6 +37,7 @@
     DatasetType,
     DimensionUniverse,
     Registry,
+    StorageClass,
 )
 
 
@@ -92,6 +93,27 @@ def __get__(self, inst, klass):
         # information provided by the connection class instance
         return inst._connectionCache.setdefault(idSelf, self.__class__(**params))
 
+    def makeDatasetType(self, universe: DimensionUniverse,
+                        parentStorageClass: Optional[StorageClass] = None):
+        """Construct a true `DatasetType` instance with normalized dimensions.
+        Parameters
+        ----------
+        universe : `lsst.daf.butler.DimensionUniverse`
+            Set of all known dimensions to be used to normalize the dimension
+            names specified in config.
+        parentStorageClass : `lsst.daf.butler.StorageClass`, optional
+            Parent storage class for component datasets; `None` otherwise.
+
+        Returns
+        -------
+        datasetType : `DatasetType`
+            The `DatasetType` defined by this connection.
+        """
+        return DatasetType(self.name,
+                           universe.empty,
+                           self.storageClass,
+                           parentStorageClass=parentStorageClass)
+
 
 @dataclasses.dataclass(frozen=True)
 class DimensionedConnection(BaseConnection):
@@ -110,8 +132,13 @@ class DimensionedConnection(BaseConnection):
     dimensions : iterable of `str`
         The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
         to identify the dataset type identified by the specified name
+    isCalibration: `bool`, optional
+        `True` if this dataset type may be included in CALIBRATION-type
+        collections to associate it with a validity range, `False` (default)
+        otherwise.
     """
     dimensions: typing.Iterable[str] = ()
+    isCalibration: bool = False
 
     def __post_init__(self):
         if isinstance(self.dimensions, str):
@@ -120,21 +147,26 @@ def __post_init__(self):
         if not isinstance(self.dimensions, typing.Iterable):
             raise TypeError("Dimensions must be iterable of dimensions")
 
-    def makeDatasetType(self, universe: DimensionUniverse):
+    def makeDatasetType(self, universe: DimensionUniverse,
+                        parentStorageClass: Optional[StorageClass] = None):
         """Construct a true `DatasetType` instance with normalized dimensions.
         Parameters
         ----------
         universe : `lsst.daf.butler.DimensionUniverse`
             Set of all known dimensions to be used to normalize the dimension
             names specified in config.
+        parentStorageClass : `lsst.daf.butler.StorageClass`, optional
+            Parent storage class for component datasets; `None` otherwise.
+
         Returns
         -------
         datasetType : `DatasetType`
             The `DatasetType` defined by this connection.
         """
         return DatasetType(self.name,
                            universe.extract(self.dimensions),
-                           self.storageClass)
+                           self.storageClass, isCalibration=self.isCalibration,
+                           parentStorageClass=parentStorageClass)
 
 
 @dataclasses.dataclass(frozen=True)

diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py
@@ -707,22 +707,37 @@ def resolveDatasetRefs(self, registry, collections, run, commonDataIds, *, skipE
                 # These may have dimensions that extend beyond those we queried
                 # for originally, because we want to permit those data ID
                 # values to differ across quanta and dataset types.
-                # For example, the same quantum may have a flat and bias with
-                # a different calibration_label, or a refcat with a skypix
-                # value that overlaps the quantum's data ID's region, but not
-                # the user expression used for the initial query.
                 for datasetType in task.prerequisites:
                     lookupFunction = lookupFunctions.get(datasetType.name)
                     if lookupFunction is not None:
+                        # PipelineTask has provided its own function to do the
+                        # lookup.  This always takes precedence.
                         refs = list(
                             lookupFunction(datasetType, registry, quantum.dataId, collections)
                         )
+                    elif (datasetType.isCalibration()
+                            and datasetType.dimensions <= quantum.dataId.graph
+                            and quantum.dataId.graph.temporal):
+                        # This is a master calibration lookup, which we have to
+                        # handle specially because the query system can't do a
+                        # temporal join on a non-dimension-based timespan yet.
+                        timespan = quantum.dataId.timespan
+                        try:
+                            refs = [registry.findDataset(datasetType, quantum.dataId,
+                                                         collections=collections,
+                                                         timespan=timespan)]
+                        except KeyError:
+                            # This dataset type is not present in the registry,
+                            # which just means there are no datasets here.
+                            refs = []
                     else:
+                        # Most general case.
                         refs = list(registry.queryDatasets(datasetType,
                                                            collections=collections,
                                                            dataId=quantum.dataId,
                                                            deduplicate=True).expanded())
-                    quantum.prerequisites[datasetType].update({ref.dataId: ref for ref in refs})
+                    quantum.prerequisites[datasetType].update({ref.dataId: ref for ref in refs
+                                                               if ref is not None})
             # Actually remove any quanta that we decided to skip above.
             if dataIdsToSkip:
                 _LOG.debug("Pruning %d quanta for task with label '%s' because all of their outputs exist.",

diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py
@@ -508,14 +508,16 @@ def makeDatasetTypesSet(connectionType, freeze=True):
                         compositeName, componentName = DatasetType.splitDatasetTypeName(c.name)
                         parentStorageClass = DatasetType.PlaceholderParentStorageClass \
                             if componentName else None
-                        datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions),
-                                                  c.storageClass,
-                                                  parentStorageClass=parentStorageClass)
+                        datasetType = c.makeDatasetType(
+                            registry.dimensions,
+                            parentStorageClass=parentStorageClass
+                        )
                         registryDatasetType = datasetType
                     else:
-                        datasetType = DatasetType(c.name, registry.dimensions.extract(dimensions),
-                                                  c.storageClass,
-                                                  parentStorageClass=registryDatasetType.parentStorageClass)
+                        datasetType = c.makeDatasetType(
+                            registry.dimensions,
+                            parentStorageClass=registryDatasetType.parentStorageClass
+                        )
 
                     if registryDatasetType and datasetType != registryDatasetType:
                         raise ValueError(f"Supplied dataset type ({datasetType}) inconsistent with "

diff --git a/python/lsst/pipe/base/testUtils.py b/python/lsst/pipe/base/testUtils.py
@@ -163,6 +163,41 @@ def _refFromConnection(butler, connection, dataId, **kwargs):
             from e
 
 
+def _resolveTestQuantumInputs(butler, quantum):
+    """Look up all input datasets a test quantum in the `Registry` to resolve
+    all `DatasetRef` objects (i.e. ensure they have not-`None` ``id`` and
+    ``run`` attributes).
+
+    Parameters
+    ----------
+    quantum : `~lsst.daf.butler.Quantum`
+        Single Quantum instance.
+    butler : `~lsst.daf.butler.Butler`
+        Data butler.
+    """
+    # TODO (DM-26819): This function is a direct copy of
+    # `lsst.ctrl.mpexec.SingleQuantumExecutor.updateQuantumInputs`, but the
+    # `runTestQuantum` function that calls it is essentially duplicating logic
+    # in that class as well (albeit not verbatim).  We should probably move
+    # `SingleQuantumExecutor` to ``pipe_base`` and see if it is directly usable
+    # in test code instead of having these classes at all.
+    for refsForDatasetType in quantum.predictedInputs.values():
+        newRefsForDatasetType = []
+        for ref in refsForDatasetType:
+            if ref.id is None:
+                resolvedRef = butler.registry.findDataset(ref.datasetType, ref.dataId,
+                                                          collections=butler.collections)
+                if resolvedRef is None:
+                    raise ValueError(
+                        f"Cannot find {ref.datasetType.name} with id {ref.dataId} "
+                        f"in collections {butler.collections}."
+                    )
+                newRefsForDatasetType.append(resolvedRef)
+            else:
+                newRefsForDatasetType.append(ref)
+        refsForDatasetType[:] = newRefsForDatasetType
+
+
 def runTestQuantum(task, butler, quantum, mockRun=True):
     """Run a PipelineTask on a Quantum.
 
@@ -185,6 +220,7 @@ def runTestQuantum(task, butler, quantum, mockRun=True):
         If ``mockRun`` is set, the mock that replaced ``run``. This object can
         be queried for the arguments ``runQuantum`` passed to ``run``.
     """
+    _resolveTestQuantumInputs(butler, quantum)
     butlerQc = ButlerQuantumContext(butler, quantum)
     connections = task.config.ConnectionsClass(config=task.config)
     inputRefs, outputRefs = connections.buildDatasetRefs(quantum)

diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py
@@ -38,16 +38,10 @@ def __init__(self):
         self.datasets = {}
         self.registry = SimpleNamespace(dimensions=DimensionUniverse())
 
-    def get(self, datasetRefOrType, dataId=None):
-        if isinstance(datasetRefOrType, DatasetRef):
-            dataId = datasetRefOrType.dataId
-            dsTypeName = datasetRefOrType.datasetType.name
-        else:
-            dsTypeName = datasetRefOrType
-        key = dataId
-        dsdata = self.datasets.get(dsTypeName)
+    def getDirect(self, ref):
+        dsdata = self.datasets.get(ref.datasetType.name)
         if dsdata:
-            return dsdata.get(key)
+            return dsdata.get(ref.dataId)
         return None
 
     def put(self, inMemoryDataset, dsRef, producer=None):