-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-42188: Make RemoteButler usable from services #930
Changes from all commits
26a33c5
6a92041
4bf0f46
3356cf6
20307c5
0f109eb
ca9e267
23acf07
2e22c8d
7bf94ed
203b6e6
eb87f3b
12dc951
e2c3f13
2d24a30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Added ``LabeledButlerFactory``, a factory class for constructing Butler instances. This is intended for use in long-lived services that need to be able to create a Butler instance for each incoming client request. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,7 +38,8 @@ | |
from lsst.utils import doImportType | ||
from lsst.utils.logging import getLogger | ||
|
||
from ._butler_config import ButlerConfig | ||
from ._butler_config import ButlerConfig, ButlerType | ||
from ._butler_instance_options import ButlerInstanceOptions | ||
from ._butler_repo_index import ButlerRepoIndex | ||
from ._config import Config, ConfigSubset | ||
from ._limited_butler import LimitedButler | ||
|
@@ -105,6 +106,9 @@ | |
the default for that dimension. Nonexistent collections are ignored. | ||
If a default value is provided explicitly for a governor dimension via | ||
``**kwargs``, no default will be inferred for that dimension. | ||
without_datastore : `bool`, optional | ||
If `True` do not attach a datastore to this butler. Any attempts | ||
to use a datastore will fail. | ||
**kwargs : `Any` | ||
Additional keyword arguments passed to a constructor of actual butler | ||
class. | ||
|
@@ -125,60 +129,25 @@ | |
searchPaths: Sequence[ResourcePathExpression] | None = None, | ||
writeable: bool | None = None, | ||
inferDefaults: bool = True, | ||
without_datastore: bool = False, | ||
**kwargs: Any, | ||
) -> Butler: | ||
if cls is Butler: | ||
cls = cls._find_butler_class(config, searchPaths) | ||
return Butler.from_config( | ||
config=config, | ||
collections=collections, | ||
run=run, | ||
searchPaths=searchPaths, | ||
writeable=writeable, | ||
inferDefaults=inferDefaults, | ||
without_datastore=without_datastore, | ||
**kwargs, | ||
) | ||
|
||
# Note: we do not pass any parameters to __new__, Python will pass them | ||
# to __init__ after __new__ returns sub-class instance. | ||
return super().__new__(cls) | ||
|
||
@staticmethod | ||
def _find_butler_class( | ||
config: Config | ResourcePathExpression | None = None, | ||
searchPaths: Sequence[ResourcePathExpression] | None = None, | ||
) -> type[Butler]: | ||
"""Find actual class to instantiate. | ||
|
||
Parameters | ||
---------- | ||
config : `ButlerConfig`, `Config` or `str`, optional | ||
Configuration. Anything acceptable to the `ButlerConfig` | ||
constructor. If a directory path is given the configuration will be | ||
read from a ``butler.yaml`` file in that location. If `None` is | ||
given default values will be used. If ``config`` contains "cls" | ||
key then its value is used as a name of butler class and it must be | ||
a sub-class of this class, otherwise `DirectButler` is | ||
instantiated. | ||
searchPaths : `list` of `str`, optional | ||
Directory paths to search when calculating the full Butler | ||
configuration. Not used if the supplied config is already a | ||
`ButlerConfig`. | ||
|
||
Returns | ||
------- | ||
butler_class : `type` | ||
The type of `Butler` to instantiate. | ||
""" | ||
butler_class_name: str | None = None | ||
if config is not None: | ||
# Check for optional "cls" key in config. | ||
if not isinstance(config, Config): | ||
config = ButlerConfig(config, searchPaths=searchPaths) | ||
butler_class_name = config.get("cls") | ||
|
||
# Make DirectButler if class is not specified. | ||
butler_class: type[Butler] | ||
if butler_class_name is None: | ||
from .direct_butler import DirectButler | ||
|
||
butler_class = DirectButler | ||
else: | ||
butler_class = doImportType(butler_class_name) | ||
if not issubclass(butler_class, Butler): | ||
raise TypeError(f"{butler_class_name} is not a subclass of Butler") | ||
return butler_class | ||
|
||
@classmethod | ||
def from_config( | ||
cls, | ||
|
@@ -189,6 +158,7 @@ | |
searchPaths: Sequence[ResourcePathExpression] | None = None, | ||
writeable: bool | None = None, | ||
inferDefaults: bool = True, | ||
without_datastore: bool = False, | ||
**kwargs: Any, | ||
) -> Butler: | ||
"""Create butler instance from configuration. | ||
|
@@ -233,9 +203,12 @@ | |
are ignored. If a default value is provided explicitly for a | ||
governor dimension via ``**kwargs``, no default will be inferred | ||
for that dimension. | ||
without_datastore : `bool`, optional | ||
If `True` do not attach a datastore to this butler. Any attempts | ||
to use a datastore will fail. | ||
**kwargs : `Any` | ||
Additional keyword arguments passed to a constructor of actual | ||
butler class. | ||
Default data ID key-value pairs. These may only identify | ||
"governor" dimensions like ``instrument`` and ``skymap``. | ||
|
||
Returns | ||
------- | ||
|
@@ -297,17 +270,47 @@ | |
arguments provided, but it defaults to `False` when there are not | ||
collection arguments. | ||
""" | ||
cls = cls._find_butler_class(config, searchPaths) | ||
return cls( | ||
config, | ||
collections=collections, | ||
run=run, | ||
searchPaths=searchPaths, | ||
writeable=writeable, | ||
inferDefaults=inferDefaults, | ||
**kwargs, | ||
# DirectButler used to have a way to specify a "copy constructor" by | ||
# passing the "butler" parameter to its constructor. This | ||
# functionality has been moved out of the constructor into | ||
# Butler._clone(), but the new interface is not public yet. | ||
butler = kwargs.pop("butler", None) | ||
if butler is not None: | ||
if not isinstance(butler, Butler): | ||
raise TypeError("'butler' parameter must be a Butler instance") | ||
if config is not None or searchPaths is not None or writeable is not None: | ||
raise TypeError( | ||
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." | ||
) | ||
return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) | ||
|
||
options = ButlerInstanceOptions( | ||
collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs | ||
) | ||
|
||
# Load the Butler configuration. This may involve searching the | ||
# environment to locate a configuration file. | ||
butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore) | ||
butler_type = butler_config.get_butler_type() | ||
|
||
# Make DirectButler if class is not specified. | ||
match butler_type: | ||
case ButlerType.DIRECT: | ||
from .direct_butler import DirectButler | ||
|
||
return DirectButler.create_from_config( | ||
butler_config, | ||
options=options, | ||
without_datastore=without_datastore, | ||
) | ||
case ButlerType.REMOTE: | ||
from .remote_butler import RemoteButlerFactory | ||
|
||
factory = RemoteButlerFactory.create_factory_from_config(butler_config) | ||
return factory.create_butler_with_credentials_from_environment(butler_options=options) | ||
case _: | ||
raise TypeError(f"Unknown Butler type '{butler_type}'") | ||
|
||
@staticmethod | ||
def makeRepo( | ||
root: ResourcePathExpression, | ||
|
@@ -1691,3 +1694,18 @@ | |
not defined. | ||
""" | ||
raise NotImplementedError() | ||
|
||
@abstractmethod | ||
def _clone( | ||
self, | ||
*, | ||
collections: Any = None, | ||
run: str | None = None, | ||
inferDefaults: bool = True, | ||
**kwargs: Any, | ||
) -> Butler: | ||
"""Return a new Butler instance connected to the same repository | ||
as this one, but overriding ``collections``, ``run``, | ||
``inferDefaults``, and default data ID. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When this method becomes public I think it will trigger the numpydoc warning. Might want to add the public docs here now rather than later. |
||
""" | ||
raise NotImplementedError() |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -35,6 +35,7 @@ | |||||
import copy | ||||||
import os | ||||||
from collections.abc import Sequence | ||||||
from enum import Enum | ||||||
|
||||||
from lsst.resources import ResourcePath, ResourcePathExpression | ||||||
|
||||||
|
@@ -47,6 +48,8 @@ | |||||
|
||||||
CONFIG_COMPONENT_CLASSES = (RegistryConfig, StorageClassConfig, DatastoreConfig, RepoTransferFormatConfig) | ||||||
|
||||||
ButlerType = Enum("ButlerType", ["DIRECT", "REMOTE"]) | ||||||
|
||||||
|
||||||
class ButlerConfig(Config): | ||||||
"""Contains the configuration for a `Butler`. | ||||||
|
@@ -191,3 +194,20 @@ | |||||
# Not needed if there is never information in a butler config file | ||||||
# not present in component configurations | ||||||
self.update(butlerConfig) | ||||||
|
||||||
def get_butler_type(self) -> ButlerType: | ||||||
# Configuration optionally includes a class name specifying which | ||||||
# implementation to use, DirectButler or RemoteButler. | ||||||
butler_class_name = self.get("cls") | ||||||
if butler_class_name is None: | ||||||
# There are many existing DirectButler configurations that are | ||||||
# missing the ``cls`` property. | ||||||
return ButlerType.DIRECT | ||||||
elif butler_class_name == "lsst.daf.butler.direct_butler.DirectButler": | ||||||
return ButlerType.DIRECT | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add a |
||||||
elif butler_class_name == "lsst.daf.butler.remote_butler.RemoteButler": | ||||||
return ButlerType.REMOTE | ||||||
else: | ||||||
raise ValueError( | ||||||
f"Butler configuration requests to load unknown Butler class {butler_class_name}" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
so the class name is quoted. |
||||||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
__all__ = ("ButlerInstanceOptions",) | ||
|
||
import dataclasses | ||
from typing import Any | ||
|
||
|
||
@dataclasses.dataclass(frozen=True) | ||
class ButlerInstanceOptions: | ||
"""The parameters passed to `Butler.from_config` or the Butler convenience | ||
constructor. These configure defaults and other settings for a Butler | ||
instance. These settings are common to all Butler subclasses. See `Butler` | ||
for the documentation of these properties. | ||
""" | ||
|
||
collections: Any = None | ||
run: str | None = None | ||
writeable: bool | None = None | ||
inferDefaults: bool = True | ||
kwargs: dict[str, Any] = dataclasses.field(default_factory=dict) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a
raise
here for clarity if there is no match to REMOTE or DIRECT? I know that there are currently only two values in the enum but this ties it up nicely. Consider usingmatch
here since that is designed for enums.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because mypy's exhaustiveness checking will ensure that all the cases are handled, there's some value in not having the raise. Without the raise, when someone adds an enum value, mypy will tell them all the places that need to do something with it.
I don't feel that strongly about it one way or the other though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
match
naturally comes with a fall through clause that can raise. I feel better if the code looks like it's dealing with all the options without relying on mypy.