Skip to content

Commit

Permalink
Merge pull request #930 from lsst/tickets/DM-42188
Browse files Browse the repository at this point in the history
DM-42188: Make RemoteButler usable from services
  • Loading branch information
dhirving committed Jan 3, 2024
2 parents ae0d0ea + 2d24a30 commit e6ab75b
Show file tree
Hide file tree
Showing 23 changed files with 1,116 additions and 317 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-42188.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added ``LabeledButlerFactory``, a factory class for constructing Butler instances. This is intended for use in long-lived services that need to be able to create a Butler instance for each incoming client request.
1 change: 1 addition & 0 deletions python/lsst/daf/butler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from ._file_dataset import *
from ._file_descriptor import *
from ._formatter import *
from ._labeled_butler_factory import *

# Do not import 'instrument' or 'json' at all by default.
from ._limited_butler import *
Expand Down
136 changes: 77 additions & 59 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
from lsst.utils import doImportType
from lsst.utils.logging import getLogger

from ._butler_config import ButlerConfig
from ._butler_config import ButlerConfig, ButlerType
from ._butler_instance_options import ButlerInstanceOptions
from ._butler_repo_index import ButlerRepoIndex
from ._config import Config, ConfigSubset
from ._limited_butler import LimitedButler
Expand Down Expand Up @@ -105,6 +106,9 @@ class Butler(LimitedButler): # numpydoc ignore=PR02
the default for that dimension. Nonexistent collections are ignored.
If a default value is provided explicitly for a governor dimension via
``**kwargs``, no default will be inferred for that dimension.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `Any`
Additional keyword arguments passed to a constructor of actual butler
class.
Expand All @@ -125,60 +129,25 @@ def __new__(
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
without_datastore: bool = False,
**kwargs: Any,
) -> Butler:
if cls is Butler:
cls = cls._find_butler_class(config, searchPaths)
return Butler.from_config(
config=config,
collections=collections,
run=run,
searchPaths=searchPaths,
writeable=writeable,
inferDefaults=inferDefaults,
without_datastore=without_datastore,
**kwargs,
)

# Note: we do not pass any parameters to __new__, Python will pass them
# to __init__ after __new__ returns sub-class instance.
return super().__new__(cls)

@staticmethod
def _find_butler_class(
config: Config | ResourcePathExpression | None = None,
searchPaths: Sequence[ResourcePathExpression] | None = None,
) -> type[Butler]:
"""Find actual class to instantiate.
Parameters
----------
config : `ButlerConfig`, `Config` or `str`, optional
Configuration. Anything acceptable to the `ButlerConfig`
constructor. If a directory path is given the configuration will be
read from a ``butler.yaml`` file in that location. If `None` is
given default values will be used. If ``config`` contains "cls"
key then its value is used as a name of butler class and it must be
a sub-class of this class, otherwise `DirectButler` is
instantiated.
searchPaths : `list` of `str`, optional
Directory paths to search when calculating the full Butler
configuration. Not used if the supplied config is already a
`ButlerConfig`.
Returns
-------
butler_class : `type`
The type of `Butler` to instantiate.
"""
butler_class_name: str | None = None
if config is not None:
# Check for optional "cls" key in config.
if not isinstance(config, Config):
config = ButlerConfig(config, searchPaths=searchPaths)
butler_class_name = config.get("cls")

# Make DirectButler if class is not specified.
butler_class: type[Butler]
if butler_class_name is None:
from .direct_butler import DirectButler

butler_class = DirectButler
else:
butler_class = doImportType(butler_class_name)
if not issubclass(butler_class, Butler):
raise TypeError(f"{butler_class_name} is not a subclass of Butler")
return butler_class

@classmethod
def from_config(
cls,
Expand All @@ -189,6 +158,7 @@ def from_config(
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
without_datastore: bool = False,
**kwargs: Any,
) -> Butler:
"""Create butler instance from configuration.
Expand Down Expand Up @@ -233,9 +203,12 @@ def from_config(
are ignored. If a default value is provided explicitly for a
governor dimension via ``**kwargs``, no default will be inferred
for that dimension.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `Any`
Additional keyword arguments passed to a constructor of actual
butler class.
Default data ID key-value pairs. These may only identify
"governor" dimensions like ``instrument`` and ``skymap``.
Returns
-------
Expand Down Expand Up @@ -297,17 +270,47 @@ def from_config(
arguments provided, but it defaults to `False` when there are not
collection arguments.
"""
cls = cls._find_butler_class(config, searchPaths)
return cls(
config,
collections=collections,
run=run,
searchPaths=searchPaths,
writeable=writeable,
inferDefaults=inferDefaults,
**kwargs,
# DirectButler used to have a way to specify a "copy constructor" by
# passing the "butler" parameter to its constructor. This
# functionality has been moved out of the constructor into
# Butler._clone(), but the new interface is not public yet.
butler = kwargs.pop("butler", None)
if butler is not None:
if not isinstance(butler, Butler):
raise TypeError("'butler' parameter must be a Butler instance")
if config is not None or searchPaths is not None or writeable is not None:
raise TypeError(
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
)
return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)

options = ButlerInstanceOptions(
collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
)

# Load the Butler configuration. This may involve searching the
# environment to locate a configuration file.
butler_config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
butler_type = butler_config.get_butler_type()

# Make DirectButler if class is not specified.
match butler_type:
case ButlerType.DIRECT:
from .direct_butler import DirectButler

return DirectButler.create_from_config(
butler_config,
options=options,
without_datastore=without_datastore,
)
case ButlerType.REMOTE:
from .remote_butler import RemoteButlerFactory

factory = RemoteButlerFactory.create_factory_from_config(butler_config)
return factory.create_butler_with_credentials_from_environment(butler_options=options)
case _:
raise TypeError(f"Unknown Butler type '{butler_type}'")

@staticmethod
def makeRepo(
root: ResourcePathExpression,
Expand Down Expand Up @@ -1691,3 +1694,18 @@ def _query_dimension_records(
not defined.
"""
raise NotImplementedError()

@abstractmethod
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
) -> Butler:
"""Return a new Butler instance connected to the same repository
as this one, but overriding ``collections``, ``run``,
``inferDefaults``, and default data ID.
"""
raise NotImplementedError()
20 changes: 20 additions & 0 deletions python/lsst/daf/butler/_butler_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import copy
import os
from collections.abc import Sequence
from enum import Enum

from lsst.resources import ResourcePath, ResourcePathExpression

Expand All @@ -47,6 +48,8 @@

CONFIG_COMPONENT_CLASSES = (RegistryConfig, StorageClassConfig, DatastoreConfig, RepoTransferFormatConfig)

ButlerType = Enum("ButlerType", ["DIRECT", "REMOTE"])


class ButlerConfig(Config):
"""Contains the configuration for a `Butler`.
Expand Down Expand Up @@ -191,3 +194,20 @@ def __init__(
# Not needed if there is never information in a butler config file
# not present in component configurations
self.update(butlerConfig)

def get_butler_type(self) -> ButlerType:
# Configuration optionally includes a class name specifying which
# implementation to use, DirectButler or RemoteButler.
butler_class_name = self.get("cls")
if butler_class_name is None:
# There are many existing DirectButler configurations that are
# missing the ``cls`` property.
return ButlerType.DIRECT
elif butler_class_name == "lsst.daf.butler.direct_butler.DirectButler":
return ButlerType.DIRECT
elif butler_class_name == "lsst.daf.butler.remote_butler.RemoteButler":
return ButlerType.REMOTE
else:
raise ValueError(
f"Butler configuration requests to load unknown Butler class {butler_class_name}"
)
46 changes: 46 additions & 0 deletions python/lsst/daf/butler/_butler_instance_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("ButlerInstanceOptions",)

import dataclasses
from typing import Any


@dataclasses.dataclass(frozen=True)
class ButlerInstanceOptions:
"""The parameters passed to `Butler.from_config` or the Butler convenience
constructor. These configure defaults and other settings for a Butler
instance. These settings are common to all Butler subclasses. See `Butler`
for the documentation of these properties.
"""

collections: Any = None
run: str | None = None
writeable: bool | None = None
inferDefaults: bool = True
kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
10 changes: 6 additions & 4 deletions python/lsst/daf/butler/_butler_repo_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from lsst.resources import ResourcePath

from ._config import Config
from ._utilities.thread_safe_cache import ThreadSafeCache


class ButlerRepoIndex:
Expand All @@ -57,7 +58,7 @@ class ButlerRepoIndex:
index_env_var: ClassVar[str] = "DAF_BUTLER_REPOSITORY_INDEX"
"""The name of the environment variable to read to locate the index."""

_cache: ClassVar[dict[ResourcePath, Config]] = {}
_cache: ClassVar[ThreadSafeCache[ResourcePath, Config]] = ThreadSafeCache()
"""Cache of indexes. In most scenarios only one index will be found
and the environment will not change. In tests this may not be true."""

Expand Down Expand Up @@ -88,8 +89,9 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
-----
Does check the cache before reading the file.
"""
if index_uri in cls._cache:
return cls._cache[index_uri]
config = cls._cache.get(index_uri)
if config is not None:
return config

try:
repo_index = Config(index_uri)
Expand All @@ -100,7 +102,7 @@ def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
raise RuntimeError(
f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
) from e
cls._cache[index_uri] = repo_index
repo_index = cls._cache.set_or_get(index_uri, repo_index)

return repo_index

Expand Down

0 comments on commit e6ab75b

Please sign in to comment.