Skip to content

Commit

Permalink
Few fixes from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
andy-slac committed May 25, 2023
1 parent 408ec68 commit d3031a7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 49 deletions.
33 changes: 15 additions & 18 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ def _findDatasetRef(
------
LookupError
Raised if no matching dataset exists in the `Registry` (and
``predict is False``).
``predict`` is `False`).
ValueError
Raised if a resolved `DatasetRef` was passed as an input, but it
differs from the one found in the registry.
Expand Down Expand Up @@ -1170,14 +1170,14 @@ def put(
if isinstance(datasetRefOrType, DatasetRef):
# This is a direct put of predefined DatasetRef.
log.debug("Butler put direct: %s", datasetRefOrType)
# _importDatasets ignores existing dataset ref and just returns an
# original ref.
# If registry already has a dataset with the same dataset ID,
# dataset type and DataId, then _importDatasets will do nothing and
# just return an original ref. We have to raise in this case, there
# is a datastore check below for that.
(imported_ref,) = self.registry._importDatasets(
[datasetRefOrType],
expand=True,
)
if imported_ref.id != datasetRefOrType.id:
raise RuntimeError("This registry configuration does not support direct put of ref.")
# Before trying to write to the datastore check that it does not
# know this dataset. This is prone to races, of course.
if self.datastore.knows(datasetRefOrType):
Expand Down Expand Up @@ -1276,7 +1276,15 @@ def getDirectDeferred(
-------
obj : `DeferredDatasetHandle`
A handle which can be used to retrieve a dataset at a later time.
Raises
------
LookupError
Raised if no matching dataset exists in the `Registry`.
"""
# Check thad dataset actuall exists.
if not self.datastore.exists(ref):
raise LookupError(f"Dataset reference {ref} does not exist.")

Check warning on line 1287 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1287

Added line #L1287 was not covered by tests
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

def getDeferred(
Expand Down Expand Up @@ -1334,6 +1342,8 @@ def getDeferred(
TypeError
Raised if no collections were provided.
"""
if isinstance(datasetRefOrType, DatasetRef) and not self.datastore.exists(datasetRefOrType):
raise LookupError(f"Dataset reference {datasetRefOrType} does not exist.")

Check warning on line 1346 in python/lsst/daf/butler/_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_butler.py#L1346

Added line #L1346 was not covered by tests
ref = self._findDatasetRef(datasetRefOrType, dataId, collections=collections, **kwargs)
return DeferredDatasetHandle(butler=self, ref=ref, parameters=parameters, storageClass=storageClass)

Expand Down Expand Up @@ -1832,9 +1842,6 @@ def ingest(
# execution butler.
existingRefs: List[DatasetRef] = []

# Any newly-resolved refs.
resolvedRefs: list[DatasetRef] = []

for ref in dataset.refs:
assert ref.run is not None # For mypy
group_key = (ref.datasetType, ref.run)
Expand All @@ -1861,16 +1868,6 @@ def ingest(

# Store expanded form in the original FileDataset.
dataset.refs = existingRefs
elif resolvedRefs:
if len(dataset.refs) != len(resolvedRefs):
raise ConflictingDefinitionError(
f"For dataset {dataset.path} some DatasetRef were "
"resolved and others were not. This is not supported."
)
dataset.refs = resolvedRefs

# These datasets have to be registered.
self.registry._importDatasets(resolvedRefs)
else:
groupedData[group_key].append(dataset)

Expand Down
54 changes: 26 additions & 28 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def makeDatasetId(
class SerializedDatasetRef(BaseModel):
"""Simplified model of a `DatasetRef` suitable for serialization."""

# DO NOT change order in the Union, pydantic is sensitive to that!
id: uuid.UUID
datasetType: Optional[SerializedDatasetType] = None
dataId: Optional[SerializedDataCoordinate] = None
Expand Down Expand Up @@ -187,10 +186,15 @@ def direct(
) -> SerializedDatasetRef:
"""Construct a `SerializedDatasetRef` directly without validators.
Notes
-----
This differs from the pydantic "construct" method in that the arguments
are explicitly what the model requires, and it will recurse through
members, constructing them from their corresponding `direct` methods.
The ``id`` parameter is a string representation of dataset ID, it is
converted to UUID by this method.
This method should only be called when the inputs are trusted.
"""
node = SerializedDatasetRef.__new__(cls)
Expand Down Expand Up @@ -226,12 +230,12 @@ class DatasetRef:
The `DatasetType` for this Dataset.
dataId : `DataCoordinate`
A mapping of dimensions that labels the Dataset within a Collection.
id : `DatasetId`, optional
The unique identifier assigned when the dataset is created. If ``id``
is not specified, a new unique ID will be created.
run : `str`
The name of the run this dataset was associated with when it was
created.
id : `DatasetId`, optional
The unique identifier assigned when the dataset is created. If ``id``
is not specified, a new unique ID will be created.
conform : `bool`, optional
If `True` (default), call `DataCoordinate.standardize` to ensure that
the data ID's dimensions are consistent with the dataset type's.
Expand Down Expand Up @@ -270,8 +274,8 @@ def __init__(
self,
datasetType: DatasetType,
dataId: DataCoordinate,
*,
run: str,
*,
id: Optional[DatasetId] = None,
conform: bool = True,
id_generation_mode: DatasetIdGenEnum = DatasetIdGenEnum.UNIQUE,
Expand Down Expand Up @@ -308,12 +312,12 @@ def __repr__(self) -> str:
# DataCoordinate's __repr__ - while adhering to the guidelines for
# __repr__ - is much harder to users to read, while its __str__ just
# produces a dict that can also be passed to DatasetRef's constructor.
return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, id={self.id}, run={self.run!r})"
return f"DatasetRef({self.datasetType!r}, {self.dataId!s}, run={self.run!r}, id={self.id})"

def __str__(self) -> str:
s = (
f"{self.datasetType.name}@{self.dataId!s}, sc={self.datasetType.storageClass_name}]"
f" (id={self.id})"
f"{self.datasetType.name}@{self.dataId!s} [sc={self.datasetType.storageClass_name}]"
f" (run={self.run} id={self.id})"
)
return s

Expand Down Expand Up @@ -349,30 +353,24 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
The object converted to a dictionary.
"""
if minimal:
# The only thing needed to uniquely define a DatasetRef
# is its id so that can be used directly if it is
# resolved and if it is not a component DatasetRef.
# Store is in a dict to allow us to easily add the planned
# origin information later without having to support
# an int and dict in simple form.
# The only thing needed to uniquely define a DatasetRef is its id
# so that can be used directly if it is not a component DatasetRef.
# Store is in a dict to allow us to easily add the planned origin
# information later without having to support an int and dict in
# simple form.
simple: Dict[str, Any] = {"id": self.id}
if self.isComponent():
# We can still be a little minimalist with a component
# but we will also need to record the datasetType component
simple["component"] = self.datasetType.component()
return SerializedDatasetRef(**simple)

# Convert to a dict form
as_dict: Dict[str, Any] = {
"datasetType": self.datasetType.to_simple(minimal=minimal),
"dataId": self.dataId.to_simple(),
}

# Only include the id entry if it is defined
as_dict["run"] = self.run
as_dict["id"] = self.id

return SerializedDatasetRef(**as_dict)
return SerializedDatasetRef(
datasetType=self.datasetType.to_simple(minimal=minimal),
dataId=self.dataId.to_simple(),
run=self.run,
id=self.id,
)

@classmethod
def from_simple(
Expand Down Expand Up @@ -446,13 +444,13 @@ def from_simple(
dataId = DataCoordinate.from_simple(simple.dataId, universe=universe)

# Check that simple ref is resolved.
if simple.id is None or simple.run is None:
if simple.run is None:
dstr = ""
if simple.datasetType is None:
dstr = f" (datasetType={datasetType.name!r})"
raise ValueError(

Check warning on line 451 in python/lsst/daf/butler/core/datasets/ref.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/core/datasets/ref.py#L451

Added line #L451 was not covered by tests
"Attempting to create an unresolved ref from simple form is deprecated. "
f"Encountered with {simple!r}{dstr}.",
"Run collection name is missing from serialized representation. "
f"Encountered with {simple!r}{dstr}."
)

return cls(datasetType, dataId, id=simple.id, run=simple.run)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2070,7 +2070,7 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St
if index < 2:
source_refs.append(ref)
if ref not in deleted:
new_metric = butler.get(ref, collections=run)
new_metric = butler.get(ref)
self.assertEqual(new_metric, metric)

# Create some bad dataset types to ensure we check for inconsistent
Expand Down Expand Up @@ -2144,8 +2144,8 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St
# Now try to get the same refs from the new butler.
for ref in source_refs:
if ref not in deleted:
new_metric = self.target_butler.get(ref, collections=ref.run)
old_metric = self.source_butler.get(ref, collections=ref.run)
new_metric = self.target_butler.get(ref)
old_metric = self.source_butler.get(ref)
self.assertEqual(new_metric, old_metric)

# Now prune run2 collection and create instead a CHAINED collection.
Expand Down

0 comments on commit d3031a7

Please sign in to comment.