Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40297: Stop checking dataset existence when creating deferred dataset handle #873

Merged
merged 4 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lsst-daf-butler"
requires-python = ">=3.10.0"
description = "An abstraction layer for reading and writing astronomical data to datastores."
license = {text = "GPLv3+ License"}
readme = "README.md"
Expand Down
18 changes: 11 additions & 7 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,9 +1279,9 @@
LookupError
Raised if no matching dataset exists in the `Registry`.
"""
# Check that dataset actually exists.
if not self._datastore.exists(ref):
raise LookupError(f"Dataset reference {ref} does not exist.")
# Check that dataset is known to the datastore.
if not self._datastore.knows(ref):
raise LookupError(f"Dataset reference {ref} is not known to datastore.")

Check warning on line 1284 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1284

Added line #L1284 was not covered by tests
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

def getDeferred(
Expand Down Expand Up @@ -1332,16 +1332,20 @@
Raises
------
LookupError
Raised if no matching dataset exists in the `Registry`.
Raised if no matching dataset exists in the `Registry` or
datastore.
ValueError
Raised if a resolved `DatasetRef` was passed as an input, but it
differs from the one found in the registry.
TypeError
Raised if no collections were provided.
"""
if isinstance(datasetRefOrType, DatasetRef) and not self._datastore.exists(datasetRefOrType):
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
if isinstance(datasetRefOrType, DatasetRef):
if not self._datastore.knows(datasetRefOrType):
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")
ref = datasetRefOrType
else:
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

def get(
Expand Down
10 changes: 6 additions & 4 deletions python/lsst/daf/butler/datastores/fileDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,8 +1332,9 @@ def _read_artifact_into_memory(
formatter.name(),
)
try:
with formatter._updateLocation(newLocation):
with time_this(
with (
formatter._updateLocation(newLocation),
time_this(
log,
msg="Reading%s from location %s %s with formatter %s",
args=(
Expand All @@ -1342,8 +1343,9 @@ def _read_artifact_into_memory(
msg,
formatter.name(),
),
):
result = formatter.read(component=getInfo.component if isComponent else None)
),
):
result = formatter.read(component=getInfo.component if isComponent else None)
except Exception as e:
raise ValueError(
f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
Expand Down
4 changes: 4 additions & 0 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But
# Create DatasetRef for put using default run.
refIn = DatasetRef(datasetType, dataId, id=uuid.UUID(int=1), run=butler.run)

# Check that getDeferred fails with standalone ref.
with self.assertRaises(LookupError):
butler.getDeferred(refIn)

# Put the dataset again, since the last thing we did was remove it
# and we want to use the default collection.
ref = butler.put(metric, refIn)
Expand Down