Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-43027: Make RemoteButler.find_dataset consistent with DirectButler #968

Merged
merged 5 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 13 additions & 4 deletions python/lsst/daf/butler/_timespan.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,20 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import annotations

__all__ = ("Timespan",)
__all__ = (
"SerializedTimespan",
"Timespan",
)

import enum
import warnings
from collections.abc import Generator
from typing import TYPE_CHECKING, Any, ClassVar, Union
from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Union

import astropy.time
import astropy.utils.exceptions
import yaml
from pydantic import Field

# As of astropy 4.2, the erfa interface is shipped independently and
# ErfaWarning is no longer an AstropyWarning
Expand Down Expand Up @@ -69,6 +73,11 @@ class _SpecialTimespanBound(enum.Enum):

TimespanBound = Union[astropy.time.Time, _SpecialTimespanBound, None]

SerializedTimespan = Annotated[list[int], Field(min_length=2, max_length=2)]
"""JSON-serializable representation of the Timespan class, as a list of two
integers ``[begin, end]`` in nanoseconds since the epoch.
"""


class Timespan:
"""A half-open time interval with nanosecond precision.
Expand Down Expand Up @@ -483,7 +492,7 @@ def difference(self, other: Timespan) -> Generator[Timespan, None, None]:
if intersection._nsec[1] < self._nsec[1]:
yield Timespan(None, None, _nsec=(intersection._nsec[1], self._nsec[1]))

def to_simple(self, minimal: bool = False) -> list[int]:
def to_simple(self, minimal: bool = False) -> SerializedTimespan:
"""Return simple python type form suitable for serialization.

Parameters
Expand All @@ -502,7 +511,7 @@ def to_simple(self, minimal: bool = False) -> list[int]:
@classmethod
def from_simple(
cls,
simple: list[int] | None,
simple: SerializedTimespan | None,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
) -> Timespan | None:
Expand Down
18 changes: 1 addition & 17 deletions python/lsst/daf/butler/datastores/fileDatastoreClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any, Literal

import pydantic
from lsst.daf.butler import DatasetRef, Location, StorageClass
from lsst.daf.butler import DatasetRef, Location
from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager
from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo
from lsst.daf.butler.datastores.file_datastore.get import (
Expand Down Expand Up @@ -47,8 +47,6 @@ def get_dataset_as_python_object(
payload: FileDatastoreGetPayload,
*,
parameters: Mapping[str, Any] | None,
storageClass: StorageClass | str | None,
component: str | None,
) -> Any:
"""Retrieve an artifact from storage and return it as a Python object.

Expand All @@ -62,12 +60,6 @@ def get_dataset_as_python_object(
parameters : `Mapping`[`str`, `typing.Any`]
`StorageClass` and `Formatter` parameters to be used when converting
the artifact to a Python object.
storageClass : `StorageClass` | `str` | `None`
Overrides the `StorageClass` to be used when converting the artifact to
a Python object. If `None`, uses the `StorageClass` specified by
``payload``.
component : `str` | `None`
Selects which component of the artifact to retrieve.

Returns
-------
Expand All @@ -79,14 +71,6 @@ def get_dataset_as_python_object(
for file_info in payload.file_info
]

# If we have both a component override and a storage class override, the
# component override has to be applied first. DatasetRef cares because it
# is checking compatibility of the storage class with its DatasetType.
if component is not None:
ref = ref.makeComponentRef(component)
if storageClass is not None:
ref = ref.overrideStorageClass(storageClass)

datastore_file_info = generate_datastore_get_information(
fileLocations,
ref=ref,
Expand Down
76 changes: 76 additions & 0 deletions python/lsst/daf/butler/remote_butler/_collection_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

import re
from typing import cast

from ..registry import CollectionArgType
from ..registry.wildcards import CollectionWildcard
from .server_models import CollectionList


def convert_collection_arg_to_glob_string_list(arg: CollectionArgType) -> CollectionList:
"""Convert the collections argument used by many Butler/registry methods to
a format suitable for sending to Butler server.

Parameters
----------
arg : `CollectionArgType`
Collection search pattern in many possible formats.

Returns
-------
glob_list : `CollectionList`
Collection search patterns normalized to a list of globs string.

Raises
------
TypeError
If the search pattern provided by the user cannot be converted to a
glob string.
"""
if arg is ...:
return CollectionList(["*"])
elif isinstance(arg, str):
return CollectionList([arg])
elif isinstance(arg, re.Pattern):
raise TypeError("RemoteButler does not support regular expressions as search patterns")

Check warning on line 63 in python/lsst/daf/butler/remote_butler/_collection_args.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_collection_args.py#L63

Added line #L63 was not covered by tests
elif isinstance(arg, CollectionWildcard):
# In theory this could work, but CollectionWildcard eagerly converts
# any glob strings to regexes. So it would need some rework to
# preserve the original string inputs. CollectionWildcard has likely
# never been used in the wild by an end-user so this probably isn't
# worth figuring out.
raise TypeError("RemoteButler does not accept CollectionWildcard instances as search patterns")

Check warning on line 70 in python/lsst/daf/butler/remote_butler/_collection_args.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_collection_args.py#L70

Added line #L70 was not covered by tests
else:
search = list(arg)
for item in search:
if not isinstance(item, str):
raise TypeError("RemoteButler only accepts strings and lists of strings as search patterns")

Check warning on line 75 in python/lsst/daf/butler/remote_butler/_collection_args.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_collection_args.py#L75

Added line #L75 was not covered by tests
return CollectionList(cast(list[str], search))
48 changes: 43 additions & 5 deletions python/lsst/daf/butler/remote_butler/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,17 @@
DimensionRecord,
DimensionUniverse,
)
from ..registry import CollectionArgType, CollectionSummary, CollectionType, Registry, RegistryDefaults
from ..registry import (
CollectionArgType,
CollectionSummary,
CollectionType,
DatasetTypeError,
Registry,
RegistryDefaults,
)
from ..registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults
from ..remote_butler import RemoteButler
from ._collection_args import convert_collection_arg_to_glob_string_list


class RemoteButlerRegistry(Registry):
Expand Down Expand Up @@ -146,10 +154,29 @@
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
# There is an implementation of find_dataset on RemoteButler, but the
# definition of the collections parameter is incompatible and timespans
# aren't supported yet.
raise NotImplementedError()
# Components are supported by Butler.find_dataset, but are required to
# raise an exception in registry.findDataset (see
# RegistryTests.testComponentLookups). Apparently the registry version
# used to support components, but at some point a decision was made to
# draw a hard architectural boundary between registry and datastore so
# that registry is no longer allowed to know about components.
if _is_component_dataset_type(datasetType):
raise DatasetTypeError(
"Component dataset types are not supported;"
" use DatasetRef or DatasetType methods to obtain components from parents instead."
)

if collections is not None:
collections = convert_collection_arg_to_glob_string_list(collections)

return self._butler.find_dataset(
datasetType,
dataId,
collections=collections,
timespan=timespan,
datastore_records=datastore_records,
**kwargs,
)

def insertDatasets(
self,
Expand Down Expand Up @@ -310,3 +337,14 @@
@storageClasses.setter
def storageClasses(self, value: StorageClassFactory) -> None:
raise NotImplementedError()


def _is_component_dataset_type(dataset_type: DatasetType | str) -> bool:
"""Return true if the given dataset type refers to a component."""
if isinstance(dataset_type, DatasetType):
return dataset_type.component() is not None
elif isinstance(dataset_type, str):
parent, component = DatasetType.splitDatasetTypeName(dataset_type)
return component is not None
else:
raise TypeError(f"Expected DatasetType or str, got {dataset_type!r}")

Check warning on line 350 in python/lsst/daf/butler/remote_butler/_registry.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_registry.py#L350

Added line #L350 was not covered by tests