Skip to content

Commit

Permalink
Factor out ref utilities from RemoteButler
Browse files Browse the repository at this point in the history
Pull out some standalone functions from RemoteButler to a new file to cut down clutter.
  • Loading branch information
dhirving committed Apr 8, 2024
1 parent c9e8f6e commit e444325
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 87 deletions.
136 changes: 136 additions & 0 deletions python/lsst/daf/butler/remote_butler/_ref_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = (
"apply_storage_class_override",
"normalize_dataset_type_name",
"simplify_dataId",
)

from pydantic import TypeAdapter

from .._dataset_ref import DatasetRef
from .._dataset_type import DatasetType
from .._storage_class import StorageClass
from ..dimensions import DataCoordinate, DataId, DataIdValue, SerializedDataId
from .server_models import DatasetTypeName

_SERIALIZED_DATA_ID_TYPE_ADAPTER = TypeAdapter(SerializedDataId)


def apply_storage_class_override(
ref: DatasetRef,
original_dataset_ref_or_type: DatasetRef | DatasetType | str,
explicit_storage_class: StorageClass | str | None,
) -> DatasetRef:
"""Return a DatasetRef with its storage class overridden to match the
StorageClass supplied by the user as input to one of the search functions.
Parameters
----------
ref : `DatasetRef`
The ref to which we will apply the StorageClass override.
original_dataset_ref_or_type : `DatasetRef` | `DatasetType` | `str`
The ref or type that was input to the search, which may contain a
storage class override.
explicit_storage_class : `StorageClass` | `str` | `None`
A storage class that the user explicitly requested as an override.
"""
if explicit_storage_class is not None:
return ref.overrideStorageClass(explicit_storage_class)

# If the caller provided a DatasetRef or DatasetType, they may have
# overridden the storage class on it, and we need to propagate that to the
# output.
dataset_type = _extract_dataset_type(original_dataset_ref_or_type)
if dataset_type is not None:
return ref.overrideStorageClass(dataset_type.storageClass)

return ref


def normalize_dataset_type_name(datasetTypeOrName: DatasetType | str) -> DatasetTypeName:
"""Convert DatasetType parameters in the format used by Butler methods
to a standardized string name for the REST API.
Parameters
----------
datasetTypeOrName : `DatasetType` | `str`
A DatasetType, or the name of a DatasetType. This union is a common
parameter in many `Butler` methods.
"""
if isinstance(datasetTypeOrName, DatasetType):
return DatasetTypeName(datasetTypeOrName.name)
elif isinstance(datasetTypeOrName, str):
return DatasetTypeName(datasetTypeOrName)
else:
raise TypeError(f"Got unexpected object for DatasetType: {datasetTypeOrName}")

Check warning on line 93 in python/lsst/daf/butler/remote_butler/_ref_utils.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_ref_utils.py#L93

Added line #L93 was not covered by tests


def simplify_dataId(dataId: DataId | None, kwargs: dict[str, DataIdValue]) -> SerializedDataId:
"""Take a generic Data ID and convert it to a serializable form.
Parameters
----------
dataId : `dict`, `None`, `DataCoordinate`
The data ID to serialize.
kwargs : `dict`
Additional entries to augment or replace the values in ``dataId``.
Returns
-------
data_id : `SerializedDataId`
A serializable form.
"""
if dataId is None:
dataId = {}
elif isinstance(dataId, DataCoordinate):
dataId = dataId.to_simple(minimal=True).dataId
else:
dataId = dict(dataId)

return _SERIALIZED_DATA_ID_TYPE_ADAPTER.validate_python(dataId | kwargs)


def _extract_dataset_type(datasetRefOrType: DatasetRef | DatasetType | str) -> DatasetType | None:
"""Return the DatasetType associated with the argument, or None if the
argument is not an object that contains a DatasetType object.
Parameters
----------
datasetRefOrType : `DatasetRef` | `DatasetType` | `str`
A DatasetRef, DatasetType, or name of a DatasetType. This union is a
common parameter in many `Butler` methods.
"""
if isinstance(datasetRefOrType, DatasetType):
return datasetRefOrType
elif isinstance(datasetRefOrType, DatasetRef):
return datasetRefOrType.datasetType
else:
return None
95 changes: 8 additions & 87 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
get_dataset_as_python_object,
)
from lsst.resources import ResourcePath, ResourcePathExpression
from pydantic import TypeAdapter

from .._butler import Butler
from .._butler_instance_options import ButlerInstanceOptions
Expand All @@ -54,7 +53,7 @@
from .._storage_class import StorageClass, StorageClassFactory
from .._utilities.locked_object import LockedObject
from ..datastore import DatasetRefURIs
from ..dimensions import DataCoordinate, DataIdValue, DimensionConfig, DimensionUniverse, SerializedDataId
from ..dimensions import DataIdValue, DimensionConfig, DimensionUniverse
from ..registry import (
CollectionArgType,
CollectionSummary,
Expand All @@ -63,9 +62,9 @@
RegistryDefaults,
)
from ._collection_args import convert_collection_arg_to_glob_string_list
from ._ref_utils import apply_storage_class_override, normalize_dataset_type_name, simplify_dataId
from .server_models import (
CollectionList,
DatasetTypeName,
FindDatasetRequestModel,
FindDatasetResponseModel,
GetCollectionInfoResponseModel,
Expand All @@ -86,8 +85,6 @@

from ._http_connection import RemoteButlerHttpConnection, parse_model

_SERIALIZED_DATA_ID_TYPE_ADAPTER = TypeAdapter(SerializedDataId)


class RemoteButler(Butler): # numpydoc ignore=PR02
"""A `Butler` that can be used to connect through a remote server.
Expand Down Expand Up @@ -172,30 +169,6 @@ def dimensions(self) -> DimensionUniverse:
cache.dimensions = universe
return cache.dimensions

def _simplify_dataId(self, dataId: DataId | None, kwargs: dict[str, DataIdValue]) -> SerializedDataId:
"""Take a generic Data ID and convert it to a serializable form.
Parameters
----------
dataId : `dict`, `None`, `DataCoordinate`
The data ID to serialize.
kwargs : `dict`
Additional entries to augment or replace the values in ``dataId``.
Returns
-------
data_id : `SerializedDataId`
A serializable form.
"""
if dataId is None:
dataId = {}
elif isinstance(dataId, DataCoordinate):
dataId = dataId.to_simple(minimal=True).dataId
else:
dataId = dict(dataId)

return _SERIALIZED_DATA_ID_TYPE_ADAPTER.validate_python(dataId | kwargs)

def _caching_context(self) -> AbstractContextManager[None]:
# Docstring inherited.
# Not implemented for now, will have to think whether this needs to
Expand Down Expand Up @@ -262,7 +235,7 @@ def get(
componentOverride = datasetRefOrType.datasetType.component()
if componentOverride:
ref = ref.makeComponentRef(componentOverride)
ref = _apply_storage_class_override(ref, datasetRefOrType, storageClass)
ref = apply_storage_class_override(ref, datasetRefOrType, storageClass)

return self._get_dataset_as_python_object(ref, model, parameters)

Expand Down Expand Up @@ -297,9 +270,9 @@ def _get_file_info(
return self._get_file_info_for_ref(datasetRefOrType)
else:
request = GetFileByDataIdRequestModel(
dataset_type_name=self._normalize_dataset_type_name(datasetRefOrType),
dataset_type_name=normalize_dataset_type_name(datasetRefOrType),
collections=self._normalize_collections(collections),
data_id=self._simplify_dataId(dataId, kwargs),
data_id=simplify_dataId(dataId, kwargs),
timespan=timespan.to_simple() if timespan is not None else None,
)
response = self._connection.post("get_file_by_data_id", request)
Expand Down Expand Up @@ -386,14 +359,14 @@ def find_dataset(
raise ValueError("Datastore records can not yet be returned in client/server butler.")

query = FindDatasetRequestModel(
data_id=self._simplify_dataId(data_id, kwargs),
data_id=simplify_dataId(data_id, kwargs),
collections=self._normalize_collections(collections),
timespan=timespan.to_simple() if timespan is not None else None,
dimension_records=dimension_records,
datastore_records=datastore_records,
)

dataset_type_name = self._normalize_dataset_type_name(dataset_type)
dataset_type_name = normalize_dataset_type_name(dataset_type)
path = f"find_dataset/{dataset_type_name}"
response = self._connection.post(path, query)

Expand All @@ -402,7 +375,7 @@ def find_dataset(
return None

ref = DatasetRef.from_simple(model.dataset_ref, universe=self.dimensions)
return _apply_storage_class_override(ref, dataset_type, storage_class)
return apply_storage_class_override(ref, dataset_type, storage_class)

def retrieveArtifacts(
self,
Expand Down Expand Up @@ -586,15 +559,6 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle
collections = self.collections
return convert_collection_arg_to_glob_string_list(collections)

def _normalize_dataset_type_name(self, datasetTypeOrName: DatasetType | str) -> DatasetTypeName:
"""Convert DatasetType parameters in the format used by Butler methods
to a standardized string name for the REST API.
"""
if isinstance(datasetTypeOrName, DatasetType):
return DatasetTypeName(datasetTypeOrName.name)
else:
return DatasetTypeName(datasetTypeOrName)

def _clone(
self,
*,
Expand Down Expand Up @@ -637,49 +601,6 @@ def _query_collections(self, query: QueryCollectionsRequestModel) -> QueryCollec
return parse_model(response, QueryCollectionsResponseModel)


def _extract_dataset_type(datasetRefOrType: DatasetRef | DatasetType | str) -> DatasetType | None:
"""Return the DatasetType associated with the argument, or None if the
argument is not an object that contains a DatasetType object.
"""
if isinstance(datasetRefOrType, DatasetType):
return datasetRefOrType
elif isinstance(datasetRefOrType, DatasetRef):
return datasetRefOrType.datasetType
else:
return None


def _apply_storage_class_override(
ref: DatasetRef,
original_dataset_ref_or_type: DatasetRef | DatasetType | str,
explicit_storage_class: StorageClass | str | None,
) -> DatasetRef:
"""Return a DatasetRef with its storage class overridden to match the
StorageClass supplied by the user as input to one of the search functions.
Parameters
----------
ref : `DatasetRef`
The ref to which we will apply the StorageClass override.
original_dataset_ref_or_type : `DatasetRef` | `DatasetType` | `str`
The ref or type that was input to the search, which may contain a
storage class override.
explicit_storage_class : `StorageClass` | `str` | `None`
A storage class that the user explicitly requested as an override.
"""
if explicit_storage_class is not None:
return ref.overrideStorageClass(explicit_storage_class)

# If the caller provided a DatasetRef or DatasetType, they may have
# overridden the storage class on it, and we need to propagate that to the
# output.
dataset_type = _extract_dataset_type(original_dataset_ref_or_type)
if dataset_type is not None:
return ref.overrideStorageClass(dataset_type.storageClass)

return ref


def _to_file_payload(get_file_response: GetFileResponseModel) -> FileDatastoreGetPayload:
if get_file_response.artifact is None:
ref = get_file_response.dataset_ref
Expand Down

0 comments on commit e444325

Please sign in to comment.