Skip to content

Commit

Permalink
Merge pull request #838 from lsst/tickets/DM-39198
Browse files Browse the repository at this point in the history
DM-39198: fix storage class handling in datastore, as used by QuantumBackedButler
  • Loading branch information
TallJimbo committed May 17, 2023
2 parents 5b85eaa + 412971c commit 98bc225
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-39198.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bugs in storage class conversion in `FileDatastore`, as used by `QuantumBackedButler`.
22 changes: 16 additions & 6 deletions python/lsst/daf/butler/datastores/fileDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,9 @@ def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) ->
Information describing the artifact written to the datastore.
"""
# May need to coerce the in memory dataset to the correct
# python type.
# python type, but first we need to make sure the storage class
# reflects the one defined in the data repository.
ref = self._cast_storage_class(ref)
inMemoryDataset = ref.datasetType.storageClass.coerce_type(inMemoryDataset)

location, formatter = self._prepare_for_put(inMemoryDataset, ref)
Expand Down Expand Up @@ -1561,15 +1563,20 @@ def _mexists(
existence : `dict` of [`DatasetRef`, `bool`]
Mapping from dataset to boolean indicating existence.
"""
# Need a mapping of dataset_id to dataset ref since the API
# works with dataset_id
id_to_ref = {ref.getCheckedId(): ref for ref in refs}
# Make a mapping from refs with the internal storage class to the given
# refs that may have a different one. We'll use the internal refs
# throughout this method and convert back at the very end.
internal_ref_to_input_ref = {self._cast_storage_class(ref): ref for ref in refs}

# Need a mapping of dataset_id to (internal) dataset ref since some
# internal APIs work with dataset_id.
id_to_ref = {ref.getCheckedId(): ref for ref in internal_ref_to_input_ref}

# Set of all IDs we are checking for.
requested_ids = set(id_to_ref.keys())

# The records themselves. Could be missing some entries.
records = self._get_stored_records_associated_with_refs(refs)
records = self._get_stored_records_associated_with_refs(id_to_ref.values())

dataset_existence = self._process_mexists_records(
id_to_ref, records, True, artifact_existence=artifact_existence
Expand All @@ -1586,7 +1593,10 @@ def _mexists(
)
)

return dataset_existence
return {
internal_ref_to_input_ref[internal_ref]: existence
for internal_ref, existence in dataset_existence.items()
}

def _mexists_check_expected(
self, refs: Sequence[DatasetRef], artifact_existence: Optional[Dict[ResourcePath, bool]] = None
Expand Down

0 comments on commit 98bc225

Please sign in to comment.