lsst · TallJimbo · Nov 5, 2021 · Nov 4, 2021
diff --git a/doc/changes/DM-32454.bugfix.md b/doc/changes/DM-32454.bugfix.md
@@ -0,0 +1 @@
+Make it possible to run `queryDimensionRecords` while constraining on the existence of a dataset whose dimensions are not a subset of the record element's dependencies (e.g. `raw` and `exposure`).
diff --git a/python/lsst/daf/butler/registries/sql.py b/python/lsst/daf/butler/registries/sql.py
@@ -923,6 +923,7 @@ def queryDatasets(self, datasetType: Any, *,
             bind=bind,
             defaults=self.defaults.dataId,
             check=check,
+            datasets=[datasetType],
         )
         builder = self._makeQueryBuilder(summary)
         # Add the dataset subquery to the query, telling the QueryBuilder to
@@ -948,7 +949,6 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
         standardizedDataId = self.expandDataId(dataId, **kwargs)
         standardizedDatasetTypes = set()
         requestedDimensions = self.dimensions.extract(dimensions)
-        queryDimensionNames = set(requestedDimensions.names)
         missing: List[str] = []
         if datasets is not None:
             if not collections:
@@ -961,7 +961,6 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
                 # times below).
                 collections = CollectionQuery.fromExpression(collections)
             for datasetType in self.queryDatasetTypes(datasets, components=components, missing=missing):
-                queryDimensionNames.update(datasetType.dimensions.names)
                 # If any matched dataset type is a component, just operate on
                 # its parent instead, because Registry doesn't know anything
                 # about what components exist, and here (unlike queryDatasets)
@@ -974,12 +973,13 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
             raise TypeError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")
 
         summary = queries.QuerySummary(
-            requested=DimensionGraph(self.dimensions, names=queryDimensionNames),
+            requested=requestedDimensions,
             dataId=standardizedDataId,
             expression=where,
             bind=bind,
             defaults=self.defaults.dataId,
             check=check,
+            datasets=standardizedDatasetTypes,
         )
         builder = self._makeQueryBuilder(
             summary,

diff --git a/python/lsst/daf/butler/registry/queries/_builder.py b/python/lsst/daf/butler/registry/queries/_builder.py
@@ -143,7 +143,7 @@ def joinDataset(self, datasetType: DatasetType, collections: Any, *,
             the inner join that would normally be present), the full query will
             return no results.
         """
-        assert datasetType.dimensions.issubset(self.summary.requested)
+        assert datasetType in self.summary.datasets
         if isResult and findFirst:
             collections = CollectionSearch.fromExpression(collections)
         else:

diff --git a/python/lsst/daf/butler/registry/queries/_results.py b/python/lsst/daf/butler/registry/queries/_results.py
@@ -55,6 +55,7 @@
 )
 from ..interfaces import Database
 from ._query import Query
+from ._structs import QuerySummary
 
 
 class DataCoordinateQueryResults(DataCoordinateIterable):
@@ -295,7 +296,6 @@ def findDatasets(self, datasetType: Union[DatasetType, str], collections: Any, *
                              f"the DataCoordinateQueryResult used as input to the search, but "
                              f"{datasetType.name} has dimensions {datasetType.dimensions}, while the input "
                              f"dimensions are {self.graph}.")
-        builder = self._query.makeBuilder()
         if datasetType.isComponent():
             # We were given a true DatasetType instance, but it's a component.
             parentName, componentName = datasetType.nameAndComponent()
@@ -304,6 +304,8 @@ def findDatasets(self, datasetType: Union[DatasetType, str], collections: Any, *
             components = [componentName]
         else:
             components = [None]
+        summary = QuerySummary(self.graph, whereRegion=self._query.whereRegion, datasets=[datasetType])
+        builder = self._query.makeBuilder(summary)
         builder.joinDataset(datasetType, collections=collections, findFirst=findFirst)
         query = builder.finish(joinMissing=False)
         return ParentDatasetQueryResults(db=self._db, query=query, components=components,

diff --git a/python/lsst/daf/butler/registry/queries/_structs.py b/python/lsst/daf/butler/registry/queries/_structs.py
@@ -23,7 +23,7 @@
 __all__ = ["QuerySummary", "RegistryManagers"]  # other classes here are local to subpackage
 
 from dataclasses import dataclass
-from typing import AbstractSet, Any, Iterator, List, Mapping, Optional, Type, Union
+from typing import AbstractSet, Any, Iterable, Iterator, List, Mapping, Optional, Type, Union
 
 from sqlalchemy.sql import ColumnElement
 
@@ -266,6 +266,12 @@ class QuerySummary:
         query expression, keyed by the identifiers they replace.
     defaults : `DataCoordinate`, optional
         A data ID containing default for governor dimensions.
+    datasets : `Iterable` [ `DatasetType` ], optional
+        Dataset types whose searches may be joined into the query.  Callers
+        must still call `QueryBuilder.joinDataset` explicitly to control how
+        that join happens (e.g. which collections are searched), but by
+        declaring them here first we can ensure that the query includes the
+        right dimensions for those joins.
     check : `bool`
         If `True` (default) check the query for consistency.  This may reject
         some valid queries that resemble common mistakes (e.g. queries for
@@ -277,6 +283,7 @@ def __init__(self, requested: DimensionGraph, *,
                  whereRegion: Optional[Region] = None,
                  bind: Optional[Mapping[str, Any]] = None,
                  defaults: Optional[DataCoordinate] = None,
+                 datasets: Iterable[DatasetType] = (),
                  check: bool = True):
         self.requested = requested
         if expression is None:
@@ -287,6 +294,7 @@ def __init__(self, requested: DimensionGraph, *,
             raise TypeError("New bind parameters passed, but expression is already a QueryWhereExpression.")
         self.where = expression.attach(self.requested, dataId=dataId, region=whereRegion, defaults=defaults,
                                        check=check)
+        self.datasets = NamedValueSet(datasets).freeze()
 
     requested: DimensionGraph
     """Dimensions whose primary keys should be included in the result rows of
@@ -298,6 +306,11 @@ def __init__(self, requested: DimensionGraph, *,
     query (`QueryWhereClause`).
     """
 
+    datasets: NamedValueAbstractSet[DatasetType]
+    """Dataset types whose searches may be joined into the query
+    (`NamedValueAbstractSet` [ `DatasetType` ]).
+    """
+
     @property
     def universe(self) -> DimensionUniverse:
         """All known dimensions (`DimensionUniverse`).
@@ -364,6 +377,8 @@ def mustHaveKeysJoined(self) -> DimensionGraph:
         dataset.
         """
         names = set(self.requested.names | self.where.dimensions.names)
+        for dataset_type in self.datasets:
+            names.update(dataset_type.dimensions.names)
         return DimensionGraph(self.universe, names=names)
 
     @property  # type: ignore

diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py
@@ -2474,3 +2474,20 @@ def testQueryResultSummaries(self):
                 for message in messages
             )
         )
+
+    def testDatasetConstrainedDimensionRecordQueries(self):
+        """Test that queryDimensionRecords works even when given a dataset
+        constraint whose dimensions extend beyond the requested dimension
+        element's.
+        """
+        registry = self.makeRegistry()
+        self.loadData(registry, "base.yaml")
+        self.loadData(registry, "datasets.yaml")
+        # Query for physical_filter dimension records, using a dataset that
+        # has both physical_filter and dataset dimensions.
+        records = registry.queryDimensionRecords(
+            "physical_filter",
+            datasets=["flat"],
+            collections="imported_r",
+        )
+        self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})