Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-39563: Make clearer error message when repo alias has failed to find something #849

Merged
merged 11 commits into from
Jun 8, 2023
1 change: 1 addition & 0 deletions doc/changes/DM-39563.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Moved Butler repository aliasing resolution into `~lsst.daf.butler.ButlerConfig` so that it is available everywhere without having to do the resolving each time.
14 changes: 1 addition & 13 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,19 +231,7 @@ def __init__(
self.storageClasses = butler.storageClasses
self._config: ButlerConfig = butler._config
else:
# Can only look for strings in the known repos list.
if isinstance(config, str):
# Somehow ButlerConfig fails in some cases if config is a
# ResourcePath, force it back to string here.
config = str(self.get_repo_uri(config, True))
try:
self._config = ButlerConfig(config, searchPaths=searchPaths)
except FileNotFoundError as e:
if known := self.get_known_repos():
aliases = f"(known aliases: {', '.join(known)})"
else:
aliases = "(no known aliases)"
raise FileNotFoundError(f"{e} {aliases}") from e
self._config = ButlerConfig(config, searchPaths=searchPaths)
try:
if "root" in self._config:
butlerRoot = self._config["root"]
Expand Down
40 changes: 39 additions & 1 deletion python/lsst/daf/butler/_butlerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from lsst.resources import ResourcePath, ResourcePathExpression

from ._butlerRepoIndex import ButlerRepoIndex
from .core import Config, DatastoreConfig, StorageClassConfig
from .registry import RegistryConfig
from .transfers import RepoTransferFormatConfig
Expand Down Expand Up @@ -78,6 +79,21 @@
self.configDir = copy.copy(other.configDir)
return

# If a string is given it *could* be an alias that should be
# expanded by the repository index system.
original_other = other
resolved_alias = False
if isinstance(other, str):
try:
# Force back to a string because the resolved URI
# might not refer explicitly to a directory and we have
# check below to guess that.
other = str(ButlerRepoIndex.get_repo_uri(other, True))
except Exception:
pass

Check warning on line 93 in python/lsst/daf/butler/_butlerConfig.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butlerConfig.py#L92-L93

Added lines #L92 - L93 were not covered by tests
if other != original_other:
resolved_alias = True

# Include ResourcePath here in case it refers to a directory.
# Creating a ResourcePath from a ResourcePath is a no-op.
if isinstance(other, (str, os.PathLike, ResourcePath)):
Expand Down Expand Up @@ -106,7 +122,29 @@

# Read the supplied config so that we can work out which other
# defaults to use.
butlerConfig = Config(other)
try:
butlerConfig = Config(other)
except FileNotFoundError as e:
# No reason to talk about aliases unless we were given a
# string and the alias was not resolved.
if isinstance(original_other, str):
if not resolved_alias:
# No alias was resolved. List known aliases if we have
# them or else explain a reason why aliasing might not
# have happened.
if known := ButlerRepoIndex.get_known_repos():
aliases = f"(given {original_other!r} and known aliases: {', '.join(known)})"
else:
failure_reason = ButlerRepoIndex.get_failure_reason()
if failure_reason:
failure_reason = f": {failure_reason}"
aliases = f"(given {original_other!r} and no known aliases{failure_reason})"
else:
aliases = f"(resolved from alias {original_other!r})"
errmsg = f"{e} {aliases}"
else:
errmsg = str(e)
raise FileNotFoundError(errmsg) from e

configFile = butlerConfig.configFile
if configFile is not None:
Expand Down
69 changes: 53 additions & 16 deletions python/lsst/daf/butler/_butlerRepoIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import os
from typing import ClassVar, Dict, Set

from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.resources import ResourcePath

from .core import Config

Expand Down Expand Up @@ -55,13 +55,17 @@ class ButlerRepoIndex:
"""Cache of indexes. In most scenarios only one index will be found
and the environment will not change. In tests this may not be true."""

_most_recent_failure: ClassVar[str] = ""
"""Cache of the most recent failure when reading an index. Reset on
every read."""

@classmethod
def _read_repository_index(cls, index_uri: ResourcePathExpression) -> Config:
def _read_repository_index(cls, index_uri: ResourcePath) -> Config:
"""Read the repository index from the supplied URI.

Parameters
----------
index_uri : `lsst.resources.ResourcePathExpression`
index_uri : `lsst.resources.ResourcePath`
URI of the repository index.

Returns
Expand All @@ -78,15 +82,19 @@ def _read_repository_index(cls, index_uri: ResourcePathExpression) -> Config:
-----
Does check the cache before reading the file.
"""
# Force the given value to a ResourcePath so that it can be used
# as an index into the cache consistently.
uri = ResourcePath(index_uri)

if index_uri in cls._cache:
return cls._cache[uri]
return cls._cache[index_uri]

repo_index = Config(uri)
cls._cache[uri] = repo_index
try:
repo_index = Config(index_uri)
except FileNotFoundError as e:
# More explicit error message.
raise FileNotFoundError(f"Butler repository index file not found at {index_uri}.") from e
except Exception as e:
raise RuntimeError(
f"Butler repository index file at {index_uri} could not be read: {type(e).__qualname__} {e}"
) from e
cls._cache[index_uri] = repo_index

return repo_index

Expand Down Expand Up @@ -118,8 +126,18 @@ def _read_repository_index_from_environment(cls) -> Config:
repo_index : `Config`
The index found in the environment.
"""
index_uri = cls._get_index_uri()
return cls._read_repository_index(index_uri)
cls._most_recent_failure = ""
try:
index_uri = cls._get_index_uri()
except KeyError as e:
cls._most_recent_failure = str(e)
raise
try:
repo_index = cls._read_repository_index(index_uri)
except Exception as e:
cls._most_recent_failure = str(e)
raise
return repo_index

@classmethod
def get_known_repos(cls) -> Set[str]:
Expand All @@ -132,10 +150,29 @@ def get_known_repos(cls) -> Set[str]:
"""
try:
repo_index = cls._read_repository_index_from_environment()
except (FileNotFoundError, KeyError):
except Exception:
return set()
return set(repo_index)

@classmethod
def get_failure_reason(cls) -> str:
"""Return possible reason for failure to return repository index.

Returns
-------
reason : `str`
If there is a problem reading the repository index, this will
contain a string with an explanation. Empty string if everything
worked.

Notes
-----
The value returned is only reliable if called immediately after a
failure. The most recent failure reason is reset every time an attempt
is made to request a label and so the reason can be out of date.
"""
return cls._most_recent_failure

@classmethod
def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
"""Look up the label in a butler repository index.
Expand Down Expand Up @@ -168,15 +205,15 @@ def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath:
"""
try:
repo_index = cls._read_repository_index_from_environment()
except KeyError:
except Exception:
if return_label:
return ResourcePath(label)
return ResourcePath(label, forceAbsolute=False)
raise

repo_uri = repo_index.get(label)
if repo_uri is None:
if return_label:
return ResourcePath(label)
return ResourcePath(label, forceAbsolute=False)
# This should not raise since it worked earlier.
try:
index_uri = str(cls._get_index_uri())
Expand Down
3 changes: 0 additions & 3 deletions python/lsst/daf/butler/_quantum_backed.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from pydantic import BaseModel

from ._butlerConfig import ButlerConfig
from ._butlerRepoIndex import ButlerRepoIndex
from ._deferredDatasetHandle import DeferredDatasetHandle
from ._limited_butler import LimitedButler
from .core import (
Expand Down Expand Up @@ -332,8 +331,6 @@ def _initialize(
dataset_types: `Mapping` [`str`, `DatasetType`]
Mapping of the dataset type name to its registry definition.
"""
if isinstance(config, str):
config = ButlerRepoIndex.get_repo_uri(config, True)
butler_config = ButlerConfig(config, searchPaths=search_paths)
if "root" in butler_config:
butler_root = butler_config["root"]
Expand Down
4 changes: 1 addition & 3 deletions python/lsst/daf/butler/script/configDump.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from typing import IO

from .._butlerConfig import ButlerConfig
from .._butlerRepoIndex import ButlerRepoIndex


def configDump(repo: str, subset: str, searchpath: str, outfile: IO) -> None:
Expand All @@ -50,8 +49,7 @@ def configDump(repo: str, subset: str, searchpath: str, outfile: IO) -> None:
AttributeError
If there is an issue dumping the configuration.
"""
repo_path = ButlerRepoIndex.get_repo_uri(repo, True)
config = ButlerConfig(repo_path, searchPaths=searchpath)
config = ButlerConfig(repo, searchPaths=searchpath)
if subset is not None:
try:
config = config[subset]
Expand Down
56 changes: 52 additions & 4 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def mock_s3(cls):
from lsst.daf.butler import (
Butler,
ButlerConfig,
ButlerRepoIndex,
CollectionType,
Config,
DataCoordinate,
Expand Down Expand Up @@ -102,6 +103,18 @@ def mock_s3(cls):
TESTDIR = os.path.abspath(os.path.dirname(__file__))


def clean_environment() -> None:
"""Remove external environment variables that affect the tests."""
for k in (
"DAF_BUTLER_REPOSITORY_INDEX",
"S3_ENDPOINT_URL",
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_SHARED_CREDENTIALS_FILE",
):
os.environ.pop(k, None)
Comment on lines +106 to +115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to make this more reusable and move to python/lsst/daf/butler/tests/utils.py?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm half inclined to remove the S3 code from here completely but we need one remote datastore for testing and we don't use the webdav one by default. There is lsst.resources.s3utils that tries to have something but it's not quite what we need. I probably should be trying to make that work for the S3 use case at USDF (where pytest doesn't work without this change). The DAF_BUTLER_REPOSITORY_INDEX clean up is really only needed for this one test of testConstructor and a general utility for the S3 tests in butler is only relevant for this one file.



def makeExampleMetrics():
return MetricsExample(
{"AM1": 5.2, "AM2": 30.6},
Expand Down Expand Up @@ -571,7 +584,7 @@ def testConstructor(self) -> None:
for suffix in (".yaml", ".json"):
# Ensure that the content differs so that we know that
# we aren't reusing the cache.
bad_label = f"s3://bucket/not_real{suffix}"
bad_label = f"file://bucket/not_real{suffix}"
butler_index["bad_label"] = bad_label
with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
butler_index.dumpToUri(temp_file)
Expand All @@ -586,20 +599,50 @@ def testConstructor(self) -> None:
self.assertIsInstance(butler, Butler)
with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"):
Butler("not_there", writeable=False)
with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"):
Butler("bad_label")
with self.assertRaises(FileNotFoundError):
# Should ignore aliases.
Butler(ResourcePath("label", forceAbsolute=False))
with self.assertRaises(KeyError) as cm:
Butler.get_repo_uri("missing")
self.assertEqual(Butler.get_repo_uri("missing", True), ResourcePath("missing"))
self.assertEqual(
Butler.get_repo_uri("missing", True), ResourcePath("missing", forceAbsolute=False)
)
self.assertIn("not known to", str(cm.exception))
# Should report no failure.
self.assertEqual(ButlerRepoIndex.get_failure_reason(), "")
with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
# Now with empty configuration.
butler_index = Config()
butler_index.dumpToUri(temp_file)
with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"):
Butler("label")
with ResourcePath.temporary_uri(suffix=suffix) as temp_file:
# Now with bad contents.
with open(temp_file.ospath, "w") as fh:
print("'", file=fh)
with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}):
with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"):
Butler("label")
with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}):
with self.assertRaises(FileNotFoundError):
Butler.get_repo_uri("label")
self.assertEqual(Butler.get_known_repos(), set())

with self.assertRaisesRegex(FileNotFoundError, "index file not found"):
Butler("label")

# Check that we can create Butler when the alias file is not found.
butler = Butler(self.tmpConfigFile, writeable=False)
self.assertIsInstance(butler, Butler)
with self.assertRaises(KeyError) as cm:
# No environment variable set.
Butler.get_repo_uri("label")
self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label"))
self.assertEqual(Butler.get_repo_uri("label", True), ResourcePath("label", forceAbsolute=False))
self.assertIn("No repository index defined", str(cm.exception))
with self.assertRaisesRegex(FileNotFoundError, "no known aliases"):
with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"):
# No aliases registered.
Butler("not_there")
self.assertEqual(Butler.get_known_repos(), set())
Expand Down Expand Up @@ -2271,5 +2314,10 @@ class ChainedDatastoreTransfers(PosixDatastoreTransfers):
configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")


def setup_module(module) -> None:
clean_environment()


if __name__ == "__main__":
clean_environment()
unittest.main()