Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-42188: Make RemoteButler usable from services #930

Merged
merged 15 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-42188.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ``LabeledButlerFactory``, a factory class for constructing Butler instances. This is intended for use in long-lived services that need to be able to create a Butler instance for each incoming client request.
1 change: 1 addition & 0 deletions python/lsst/daf/butler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from ._file_dataset import *
from ._file_descriptor import *
from ._formatter import *
from ._labeled_butler_factory import *

# Do not import 'instrument' or 'json' at all by default.
from ._limited_butler import *
Expand Down
136 changes: 77 additions & 59 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
from lsst.utils import doImportType
from lsst.utils.logging import getLogger

from ._butler_config import ButlerConfig
from ._butler_config import ButlerConfig, ButlerType
from ._butler_instance_options import ButlerInstanceOptions
from ._butler_repo_index import ButlerRepoIndex
from ._config import Config, ConfigSubset
from ._limited_butler import LimitedButler
Expand Down Expand Up @@ -105,6 +106,9 @@
the default for that dimension. Nonexistent collections are ignored.
If a default value is provided explicitly for a governor dimension via
``**kwargs``, no default will be inferred for that dimension.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `Any`
Additional keyword arguments passed to a constructor of actual butler
class.
Expand All @@ -125,60 +129,25 @@
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
without_datastore: bool = False,
**kwargs: Any,
) -> Butler:
if cls is Butler:
cls = cls._find_butler_class(config, searchPaths)
return Butler.from_config(
config=config,
collections=collections,
run=run,
searchPaths=searchPaths,
writeable=writeable,
inferDefaults=inferDefaults,
without_datastore=without_datastore,
**kwargs,
)

# Note: we do not pass any parameters to __new__, Python will pass them
# to __init__ after __new__ returns sub-class instance.
return super().__new__(cls)

@staticmethod
def _find_butler_class(
config: Config | ResourcePathExpression | None = None,
searchPaths: Sequence[ResourcePathExpression] | None = None,
) -> type[Butler]:
"""Find actual class to instantiate.

Parameters
----------
config : `ButlerConfig`, `Config` or `str`, optional
Configuration. Anything acceptable to the `ButlerConfig`
constructor. If a directory path is given the configuration will be
read from a ``butler.yaml`` file in that location. If `None` is
given default values will be used. If ``config`` contains "cls"
key then its value is used as a name of butler class and it must be
a sub-class of this class, otherwise `DirectButler` is
instantiated.
searchPaths : `list` of `str`, optional
Directory paths to search when calculating the full Butler
configuration. Not used if the supplied config is already a
`ButlerConfig`.

Returns
-------
butler_class : `type`
The type of `Butler` to instantiate.
"""
butler_class_name: str | None = None
if config is not None:
# Check for optional "cls" key in config.
if not isinstance(config, Config):
config = ButlerConfig(config, searchPaths=searchPaths)
butler_class_name = config.get("cls")

# Make DirectButler if class is not specified.
butler_class: type[Butler]
if butler_class_name is None:
from .direct_butler import DirectButler

butler_class = DirectButler
else:
butler_class = doImportType(butler_class_name)
if not issubclass(butler_class, Butler):
raise TypeError(f"{butler_class_name} is not a subclass of Butler")
return butler_class

@classmethod
def from_config(
cls,
Expand All @@ -189,6 +158,7 @@
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
without_datastore: bool = False,
**kwargs: Any,
) -> Butler:
"""Create butler instance from configuration.
Expand Down Expand Up @@ -233,9 +203,12 @@
are ignored. If a default value is provided explicitly for a
governor dimension via ``**kwargs``, no default will be inferred
for that dimension.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `Any`
Additional keyword arguments passed to a constructor of actual
butler class.
Default data ID key-value pairs. These may only identify
"governor" dimensions like ``instrument`` and ``skymap``.

Returns
-------
Expand Down Expand Up @@ -297,17 +270,47 @@
arguments provided, but it defaults to `False` when there are not
collection arguments.
"""
cls = cls._find_butler_class(config, searchPaths)
return cls(
config,
collections=collections,
run=run,
searchPaths=searchPaths,
writeable=writeable,
inferDefaults=inferDefaults,
**kwargs,
# DirectButler used to have a way to specify a "copy constructor" by
# passing the "butler" parameter to its constructor. This
# functionality has been moved out of the constructor into
# Butler._clone(), but the new interface is not public yet.
butler = kwargs.pop("butler", None)
if butler is not None:
if not isinstance(butler, Butler):
raise TypeError("'butler' parameter must be a Butler instance")

Check warning on line 280 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L280

Added line #L280 was not covered by tests
if config is not None or searchPaths is not None or writeable is not None:
raise TypeError(

Check warning on line 282 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L282

Added line #L282 was not covered by tests
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
)
return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)

options = ButlerInstanceOptions(
collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
)

# Load the Butler configuration. This may involve searching the
# environment to locate a configuration file.
butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
butler_type = butler_config.get_butler_type()

# Make DirectButler if class is not specified.
match butler_type:
case ButlerType.DIRECT:
from .direct_butler import DirectButler

return DirectButler.create_from_config(
butler_config,
options=options,
without_datastore=without_datastore,
)
case ButlerType.REMOTE:
from .remote_butler import RemoteButlerFactory

factory = RemoteButlerFactory.create_factory_from_config(butler_config)
return factory.create_butler_with_credentials_from_environment(butler_options=options)
case _:
raise TypeError(f"Unknown Butler type '{butler_type}'")

Check warning on line 312 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L311-L312

Added lines #L311 - L312 were not covered by tests

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a raise here for clarity if there is no match to REMOTE or DIRECT? I know that there are currently only two values in the enum but this ties it up nicely. Consider using match here since that is designed for enums.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because mypy's exhaustiveness checking will ensure that all the cases are handled, there's some value in not having the raise. Without the raise, when someone adds an enum value, mypy will tell them all the places that need to do something with it.

I don't feel that strongly about it one way or the other though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

match naturally comes with a fall through clause that can raise. I feel better if the code looks like it's dealing with all the options without relying on mypy.

@staticmethod
def makeRepo(
root: ResourcePathExpression,
Expand Down Expand Up @@ -1691,3 +1694,18 @@
not defined.
"""
raise NotImplementedError()

@abstractmethod
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
) -> Butler:
"""Return a new Butler instance connected to the same repository
as this one, but overriding ``collections``, ``run``,
``inferDefaults``, and default data ID.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When this method becomes public I think it will trigger the numpydoc warning. Might want to add the public docs here now rather than later.

"""
raise NotImplementedError()
20 changes: 20 additions & 0 deletions python/lsst/daf/butler/_butler_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import copy
import os
from collections.abc import Sequence
from enum import Enum

from lsst.resources import ResourcePath, ResourcePathExpression

Expand All @@ -47,6 +48,8 @@

CONFIG_COMPONENT_CLASSES = (RegistryConfig, StorageClassConfig, DatastoreConfig, RepoTransferFormatConfig)

ButlerType = Enum("ButlerType", ["DIRECT", "REMOTE"])


class ButlerConfig(Config):
"""Contains the configuration for a `Butler`.
Expand Down Expand Up @@ -191,3 +194,20 @@
# Not needed if there is never information in a butler config file
# not present in component configurations
self.update(butlerConfig)

def get_butler_type(self) -> ButlerType:
# Configuration optionally includes a class name specifying which
# implementation to use, DirectButler or RemoteButler.
butler_class_name = self.get("cls")
if butler_class_name is None:
# There are many existing DirectButler configurations that are
# missing the ``cls`` property.
return ButlerType.DIRECT
elif butler_class_name == "lsst.daf.butler.direct_butler.DirectButler":
return ButlerType.DIRECT
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add a cls entry to one of the direct butler config files in tests/config/basic so we can trigger this code path?

elif butler_class_name == "lsst.daf.butler.remote_butler.RemoteButler":
return ButlerType.REMOTE
else:
raise ValueError(

Check warning on line 211 in python/lsst/daf/butler/_butler_config.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler_config.py#L211

Added line #L211 was not covered by tests
f"Butler configuration requests to load unknown Butler class {butler_class_name}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f"Butler configuration requests to load unknown Butler class {butler_class_name}"
f"Butler configuration requests to load unknown Butler class {butler_class_name!r}"

so the class name is quoted.

)
46 changes: 46 additions & 0 deletions python/lsst/daf/butler/_butler_instance_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("ButlerInstanceOptions",)

import dataclasses
from typing import Any


@dataclasses.dataclass(frozen=True)
class ButlerInstanceOptions:
"""The parameters passed to `Butler.from_config` or the Butler convenience
constructor. These configure defaults and other settings for a Butler
instance. These settings are common to all Butler subclasses. See `Butler`
for the documentation of these properties.
"""

collections: Any = None
run: str | None = None
writeable: bool | None = None
inferDefaults: bool = True
kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
10 changes: 6 additions & 4 deletions python/lsst/daf/butler/_butler_repo_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from lsst.resources import ResourcePath

from ._config import Config
from ._utilities.thread_safe_cache import ThreadSafeCache


class ButlerRepoIndex:
Expand All @@ -57,7 +58,7 @@ class ButlerRepoIndex:
index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
"""The name of the environment variable to read to locate the index."""

_cache: ClassVar[dict[ResourcePath, Config]] = {}
_cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache()
"""Cache of indexes. In most scenarios only one index will be found
and the environment will not change. In tests this may not be true."""

Expand Down Expand Up @@ -88,8 +89,9 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
-----
Does check the cache before reading the file.
"""
if index_uri in cls._cache:
return cls._cache[index_uri]
config = cls._cache.get(index_uri)
if config is not None:
return config

try:
repo_index = Config(index_uri)
Expand All @@ -100,7 +102,7 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
raise RuntimeError(
f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
) from e
cls._cache[index_uri] = repo_index
repo_index = cls._cache.set_or_get(index_uri, repo_index)

return repo_index

Expand Down