Skip to content

Commit

Permalink
Merge pull request #736 from lsst/tickets/DM-36312
Browse files Browse the repository at this point in the history
DM-36312: Deprecate support for component datasets in Registry
  • Loading branch information
TallJimbo committed Oct 5, 2022
2 parents 738e63c + c7f82e6 commit a21ee8f
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 92 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-36312.api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Deprecate support for components in `Registry.query*` methods, per RFC-879.
20 changes: 16 additions & 4 deletions python/lsst/daf/butler/registries/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,12 @@ def removeDatasetType(self, name: str) -> None:

def getDatasetType(self, name: str) -> DatasetType:
# Docstring inherited from lsst.daf.butler.registry.Registry
return self._managers.datasets[name].datasetType
parent_name, component = DatasetType.splitDatasetTypeName(name)
storage = self._managers.datasets[parent_name]
if component is None:
return storage.datasetType
else:
return storage.datasetType.makeComponentDatasetType(component)

def supportsIdGenerationMode(self, mode: DatasetIdGenEnum) -> bool:
# Docstring inherited from lsst.daf.butler.registry.Registry
Expand All @@ -435,9 +440,10 @@ def findDataset(
) -> Optional[DatasetRef]:
# Docstring inherited from lsst.daf.butler.registry.Registry
if isinstance(datasetType, DatasetType):
storage = self._managers.datasets[datasetType.name]
parent_name, component = datasetType.nameAndComponent()
else:
storage = self._managers.datasets[datasetType]
parent_name, component = DatasetType.splitDatasetTypeName(datasetType)
storage = self._managers.datasets[parent_name]
dataId = DataCoordinate.standardize(
dataId,
graph=storage.datasetType.dimensions,
Expand All @@ -460,6 +466,8 @@ def findDataset(
continue
result = storage.find(collectionRecord, dataId, timespan=timespan)
if result is not None:
if component is not None:
return result.makeComponentRef(component)
return result

return None
Expand Down Expand Up @@ -929,6 +937,10 @@ def _standardize_query_dataset_args(
parent datasets were not matched by the expression.
Fully-specified component datasets (`str` or `DatasetType`
instances) are always included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
mode : `str`, optional
The way in which datasets are being used in this query; one of:
Expand Down Expand Up @@ -1026,7 +1038,7 @@ def queryDatasets(
check=check,
datasets=[parent_dataset_type],
)
builder = self._makeQueryBuilder(summary, doomed_by=doomed_by)
builder = self._makeQueryBuilder(summary)
# Add the dataset subquery to the query, telling the QueryBuilder
# to include the rank of the selected collection in the results
# only if we need to findFirst. Note that if any of the
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/registry/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class DatasetTypeError(RegistryError):
"""Exception raised for problems with dataset types."""


class MissingDatasetTypeError(DatasetTypeError):
class MissingDatasetTypeError(DatasetTypeError, KeyError):
"""Exception raised when a dataset type does not exist."""


Expand Down
30 changes: 27 additions & 3 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,8 +639,13 @@ def getDatasetType(self, name: str) -> DatasetType:
Raises
------
KeyError
Requested named DatasetType could not be found in registry.
MissingDatasetTypeError
Raised if the requested dataset type has not been registered.
Notes
-----
This method handles component dataset types automatically, though most
other registry operations do not.
"""
raise NotImplementedError()

Expand Down Expand Up @@ -712,7 +717,7 @@ def findDataset(
``self.defaults.collections`` is `None`.
LookupError
Raised if one or more data ID keys are missing.
KeyError
MissingDatasetTypeError
Raised if the dataset type does not exist.
MissingCollectionError
Raised if any of ``collections`` does not exist in the registry.
Expand All @@ -728,6 +733,9 @@ def findDataset(
reported consistently, regardless of the reason, and that adding
additional collections that do not contain a match to the search path
never changes the behavior.
This method handles component dataset types automatically, though most
other registry operations do not.
"""
raise NotImplementedError()

Expand Down Expand Up @@ -1213,6 +1221,10 @@ def queryDatasetTypes(
parent datasets were not matched by the expression.
Fully-specified component datasets (`str` or `DatasetType`
instances) are always included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
missing : `list` of `str`, optional
String dataset type names that were explicitly given (i.e. not
regular expression patterns) but not found will be appended to this
Expand Down Expand Up @@ -1348,6 +1360,10 @@ def queryDatasets(
if their parent datasets were not matched by the expression.
Fully-specified component datasets (`str` or `DatasetType`
instances) are always included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
bind : `Mapping`, optional
Mapping containing literal values that should be injected into the
``where`` expression, keyed by the identifiers they replace.
Expand Down Expand Up @@ -1459,6 +1475,10 @@ def queryDataIds(
if their parent datasets were not matched by the expression.
Fully-specified component datasets (`str` or `DatasetType`
instances) are always included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
bind : `Mapping`, optional
Mapping containing literal values that should be injected into the
``where`` expression, keyed by the identifiers they replace.
Expand Down Expand Up @@ -1547,6 +1567,10 @@ def queryDimensionRecords(
components : `bool`, optional
Whether to apply dataset expressions to components as well.
See `queryDataIds` for more information.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
bind : `Mapping`, optional
Mapping containing literal values that should be injected into the
``where`` expression, keyed by the identifiers they replace.
Expand Down
27 changes: 18 additions & 9 deletions python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"ByDimensionsDatasetRecordStorageManagerUUID",
)

import copy
import logging
import warnings
from collections import defaultdict
Expand Down Expand Up @@ -254,14 +253,7 @@ def remove(self, name: str) -> None:

def find(self, name: str) -> DatasetRecordStorage | None:
# Docstring inherited from DatasetRecordStorageManager.
compositeName, componentName = DatasetType.splitDatasetTypeName(name)
storage = self._byName.get(compositeName)
if storage is not None and componentName is not None:
componentStorage = copy.copy(storage)
componentStorage.datasetType = storage.datasetType.makeComponentDatasetType(componentName)
return componentStorage
else:
return storage
return self._byName.get(name)

def register(self, datasetType: DatasetType) -> tuple[DatasetRecordStorage, bool]:
# Docstring inherited from DatasetRecordStorageManager.
Expand Down Expand Up @@ -342,8 +334,18 @@ def resolve_wildcard(
) -> dict[DatasetType, list[str | None]]:
wildcard = DatasetTypeWildcard.from_expression(expression)
result: defaultdict[DatasetType, set[str | None]] = defaultdict(set)
# This message can be transformed into an error on DM-36303 after v26,
# and the components argument here (and in all callers) can be removed
# entirely on DM-36457 after v27.
deprecation_message = (
"Querying for component datasets via Registry query methods is deprecated in favor of using "
"DatasetRef and DatasetType methods on parent datasets. Only components=False will be supported "
"after v26, and the components argument will be removed after v27."
)
for name, dataset_type in wildcard.values.items():
parent_name, component_name = DatasetType.splitDatasetTypeName(name)
if component_name is not None:
warnings.warn(deprecation_message, FutureWarning)
if (found_storage := self.find(parent_name)) is not None:
found_parent = found_storage.datasetType
if component_name is not None:
Expand All @@ -366,6 +368,7 @@ def resolve_wildcard(
result[found_parent].add(component_name)
elif missing is not None:
missing.append(name)
already_warned = False
if wildcard.patterns is Ellipsis:
if explicit_only:
raise TypeError(
Expand All @@ -378,6 +381,9 @@ def resolve_wildcard(
result[storage.datasetType].update(
storage.datasetType.storageClass.allComponents().keys()
)
if storage.datasetType.storageClass.allComponents() and not already_warned:
warnings.warn(deprecation_message, FutureWarning)
already_warned = True
except KeyError as err:
_LOG.warning(
f"Could not load storage class {err} for {storage.datasetType.name}; "
Expand Down Expand Up @@ -414,6 +420,9 @@ def resolve_wildcard(
for p in wildcard.patterns
):
result[storage.datasetType].add(component_name)
if not already_warned:
warnings.warn(deprecation_message, FutureWarning)
already_warned = True
return {k: list(v) for k, v in result.items()}

def getDatasetRef(self, id: DatasetId) -> DatasetRef | None:
Expand Down
7 changes: 6 additions & 1 deletion python/lsst/daf/butler/registry/interfaces/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import sqlalchemy.sql

from ...core import DataCoordinate, DatasetId, DatasetRef, DatasetType, SimpleQuery, Timespan, ddl
from .._exceptions import MissingDatasetTypeError
from ._versioning import VersionedExtension

if TYPE_CHECKING:
Expand Down Expand Up @@ -596,7 +597,7 @@ def __getitem__(self, name: str) -> DatasetRecordStorage:
"""
result = self.find(name)
if result is None:
raise KeyError(f"Dataset type with name '{name}' not found.")
raise MissingDatasetTypeError(f"Dataset type with name '{name}' not found.")
return result

@abstractmethod
Expand Down Expand Up @@ -680,6 +681,10 @@ def resolve_wildcard(
datasets were not matched by the expression. Fully-specified
component datasets (`str` or `DatasetType` instances) are always
included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
missing : `list` of `str`, optional
String dataset type names that were explicitly given (i.e. not
regular expression patterns) but not found will be appended to this
Expand Down
4 changes: 4 additions & 0 deletions python/lsst/daf/butler/registry/queries/_query_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def resolve_single_dataset_type_wildcard(
datasets were not matched by the expression. Fully-specified
component datasets (`str` or `DatasetType` instances) are always
included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
explicit_only : `bool`, optional
If `True`, require explicit `DatasetType` instances or `str` names,
with `re.Pattern` instances deprecated and ``...`` prohibited.
Expand Down
24 changes: 20 additions & 4 deletions python/lsst/daf/butler/registry/queries/_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,12 @@ def constrain(self, query: SimpleQuery, columns: Callable[[str], sqlalchemy.sql.
)

def findDatasets(
self, datasetType: DatasetType | str, collections: Any, *, findFirst: bool = True
self,
datasetType: DatasetType | str,
collections: Any,
*,
findFirst: bool = True,
components: bool | None = None,
) -> ParentDatasetQueryResults:
"""Find datasets using the data IDs identified by this query.
Expand All @@ -380,6 +385,17 @@ def findDatasets(
dataset type appears (according to the order of ``collections``
passed in). If `True`, ``collections`` must not contain regular
expressions and may not be ``...``.
components : `bool`, optional
If `True`, apply all expression patterns to component dataset type
names as well. If `False`, never apply patterns to components. If
`None` (default), apply patterns to components only if their parent
datasets were not matched by the expression. Fully-specified
component datasets (`str` or `DatasetType` instances) are always
included.
Values other than `False` are deprecated, and only `False` will be
supported after v26. After v27 this argument will be removed
entirely.
Returns
-------
Expand All @@ -396,8 +412,8 @@ def findDatasets(
MissingDatasetTypeError
Raised if the given dataset type is not registered.
"""
parent_dataset_type, components = self._query.backend.resolve_single_dataset_type_wildcard(
datasetType, explicit_only=True
parent_dataset_type, components_found = self._query.backend.resolve_single_dataset_type_wildcard(
datasetType, components=components, explicit_only=True
)
if not parent_dataset_type.dimensions.issubset(self.graph):
raise ValueError(
Expand All @@ -415,7 +431,7 @@ def findDatasets(
return ParentDatasetQueryResults(
db=self._db,
query=query,
components=components,
components=components_found,
records=self._records,
datasetType=parent_dataset_type,
)
Expand Down

0 comments on commit a21ee8f

Please sign in to comment.