Skip to content

Commit

Permalink
Separate dataset dimensions from requested dimensions in queries.
Browse files Browse the repository at this point in the history
The dimensions passed directly to queryDataIds were always intended to
be the dimensions the caller wanted back, which may not be the same as
the dimensions used to constrain the query.  We'd partially handled
that distinction in the past, but we had been rolling dataset
constraint dimensions into the requested set.  This was most obvious
in queryDimensionRecords, which didn't quite handle this properly, and
hence couldn't handle constraints on datasets that have dimensions
beyond the dimension records' own.  Fixing queryDataIds to track
dataset-constraint dimensions separately solves that problem, too.
  • Loading branch information
TallJimbo committed Nov 4, 2021
1 parent 1393936 commit 1f0a257
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-32454.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make it possible to run `queryDimensionRecords` while constraining on the existence of a dataset whose dimensions are not a subset of the record element's dependencies (e.g. `raw` and `exposure`).
6 changes: 3 additions & 3 deletions python/lsst/daf/butler/registries/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ def queryDatasets(self, datasetType: Any, *,
bind=bind,
defaults=self.defaults.dataId,
check=check,
datasets=[datasetType],
)
builder = self._makeQueryBuilder(summary)
# Add the dataset subquery to the query, telling the QueryBuilder to
Expand All @@ -936,7 +937,6 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
standardizedDataId = self.expandDataId(dataId, **kwargs)
standardizedDatasetTypes = set()
requestedDimensions = self.dimensions.extract(dimensions)
queryDimensionNames = set(requestedDimensions.names)
if datasets is not None:
if not collections:
if not self.defaults.collections:
Expand All @@ -948,7 +948,6 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
# times below).
collections = CollectionQuery.fromExpression(collections)
for datasetType in self.queryDatasetTypes(datasets, components=components):
queryDimensionNames.update(datasetType.dimensions.names)
# If any matched dataset type is a component, just operate on
# its parent instead, because Registry doesn't know anything
# about what components exist, and here (unlike queryDatasets)
Expand All @@ -961,12 +960,13 @@ def queryDataIds(self, dimensions: Union[Iterable[Union[Dimension, str]], Dimens
raise TypeError(f"Cannot pass 'collections' (='{collections}') without 'datasets'.")

summary = queries.QuerySummary(
requested=DimensionGraph(self.dimensions, names=queryDimensionNames),
requested=requestedDimensions,
dataId=standardizedDataId,
expression=where,
bind=bind,
defaults=self.defaults.dataId,
check=check,
datasets=standardizedDatasetTypes,
)
builder = self._makeQueryBuilder(summary)
for datasetType in standardizedDatasetTypes:
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/registry/queries/_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def joinDataset(self, datasetType: DatasetType, collections: Any, *,
the inner join that would normally be present), the full query will
return no results.
"""
assert datasetType.dimensions.issubset(self.summary.requested)
assert datasetType in self.summary.datasets
if isResult and findFirst:
collections = CollectionSearch.fromExpression(collections)
else:
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/registry/queries/_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
)
from ..interfaces import Database
from ._query import Query
from ._structs import QuerySummary


class DataCoordinateQueryResults(DataCoordinateIterable):
Expand Down Expand Up @@ -295,7 +296,8 @@ def findDatasets(self, datasetType: Union[DatasetType, str], collections: Any, *
f"the DataCoordinateQueryResult used as input to the search, but "
f"{datasetType.name} has dimensions {datasetType.dimensions}, while the input "
f"dimensions are {self.graph}.")
builder = self._query.makeBuilder()
summary = QuerySummary(self.graph, whereRegion=self._query.whereRegion, datasets=[datasetType])
builder = self._query.makeBuilder(summary)
if datasetType.isComponent():
# We were given a true DatasetType instance, but it's a component.
parentName, componentName = datasetType.nameAndComponent()
Expand Down
17 changes: 16 additions & 1 deletion python/lsst/daf/butler/registry/queries/_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
__all__ = ["QuerySummary", "RegistryManagers"] # other classes here are local to subpackage

from dataclasses import dataclass
from typing import AbstractSet, Any, Iterator, List, Mapping, Optional, Type, Union
from typing import AbstractSet, Any, Iterable, Iterator, List, Mapping, Optional, Type, Union

from sqlalchemy.sql import ColumnElement

Expand Down Expand Up @@ -266,6 +266,12 @@ class QuerySummary:
query expression, keyed by the identifiers they replace.
defaults : `DataCoordinate`, optional
A data ID containing default for governor dimensions.
datasets : `Iterable` [ `DatasetType` ], optional
Dataset types whose searches may be joined into the query. Callers
must still call `QueryBuilder.joinDataset` explictly to control how
than join happens (e.g. which collections are searched), but by
declaring them here first we can ensure that the query includes the
right dimensions for those joins.
check : `bool`
If `True` (default) check the query for consistency. This may reject
some valid queries that resemble common mistakes (e.g. queries for
Expand All @@ -277,6 +283,7 @@ def __init__(self, requested: DimensionGraph, *,
whereRegion: Optional[Region] = None,
bind: Optional[Mapping[str, Any]] = None,
defaults: Optional[DataCoordinate] = None,
datasets: Iterable[DatasetType] = (),
check: bool = True):
self.requested = requested
if expression is None:
Expand All @@ -287,6 +294,7 @@ def __init__(self, requested: DimensionGraph, *,
raise TypeError("New bind parameters passed, but expression is already a QueryWhereExpression.")
self.where = expression.attach(self.requested, dataId=dataId, region=whereRegion, defaults=defaults,
check=check)
self.datasets = NamedValueSet(datasets).freeze()

requested: DimensionGraph
"""Dimensions whose primary keys should be included in the result rows of
Expand All @@ -298,6 +306,11 @@ def __init__(self, requested: DimensionGraph, *,
query (`QueryWhereClause`).
"""

datasets: NamedValueAbstractSet[DatasetType]
"""Dataset types whose searches may be joined into the query
(`NamedValueAbstractSet` [ `DatasetType` ]).
"""

@property
def universe(self) -> DimensionUniverse:
"""All known dimensions (`DimensionUniverse`).
Expand Down Expand Up @@ -364,6 +377,8 @@ def mustHaveKeysJoined(self) -> DimensionGraph:
dataset.
"""
names = set(self.requested.names | self.where.dimensions.names)
for dataset_type in self.datasets:
names.update(dataset_type.dimensions.names)
return DimensionGraph(self.universe, names=names)

@property # type: ignore
Expand Down
17 changes: 17 additions & 0 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2467,3 +2467,20 @@ def testQueryResultSummaries(self):
for message in messages
)
)

def testDatasetConstrainedDimensionRecordQueries(self):
"""Test that queryDimensionRecords works even when given a dataset
constraint whose dimensions extend beyond the requested dimension
element's.
"""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
self.loadData(registry, "datasets.yaml")
# Query for physical_filter dimension records, using a dataset that
# has both physical_filter
records = registry.queryDimensionRecords(
"physical_filter",
datasets=["flat"],
collections="imported_r",
)
self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})

0 comments on commit 1f0a257

Please sign in to comment.