Skip to content

Commit

Permalink
Merge pull request #269 from lsst/tickets/DM-24288
Browse files Browse the repository at this point in the history
DM-24288: Do disassembly inside datastore
  • Loading branch information
timj committed Apr 30, 2020
2 parents c84fccd + ee8593b commit 0d85109
Show file tree
Hide file tree
Showing 20 changed files with 500 additions and 234 deletions.
1 change: 1 addition & 0 deletions config/datastores/posixDatastore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ datastore:
# Gen2 has.
default: "{run:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{label:?}/{abstract_filter:?}/{subfilter:?}/{physical_filter:?}/{visit:?}/{datasetType}_{component:?}_{tract:?}_{patch:?}_{label:?}_{abstract_filter:?}_{physical_filter:?}_{calibration_label:?}_{visit:?}_{exposure:?}_{detector:?}_{instrument:?}_{skymap:?}_{skypix:?}_{run}"
formatters: !include formatters.yaml
composites: !include ../composites.yaml
1 change: 1 addition & 0 deletions config/datastores/s3Datastore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ datastore:
# Gen2 has.
default: "{collection:/}/{datasetType}.{component:?}/{tract:?}/{patch:?}/{label:?}/{abstract_filter:?}/{subfilter:?}/{physical_filter:?}/{visit:?}/{datasetType}_{component:?}_{tract:?}_{patch:?}_{label:?}_{abstract_filter:?}_{physical_filter:?}_{calibration_label:?}_{visit:?}_{exposure:?}_{detector:?}_{instrument:?}_{skymap:?}_{skypix:?}_{run}"
formatters: !include formatters.yaml
composites: !include ../composites.yaml
19 changes: 16 additions & 3 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,13 +644,17 @@ def put(self, obj: Any, datasetRefOrType: Union[DatasetRef, DatasetType, str],
raise TypeError(f"Cannot associate into collection '{tag}' of non-TAGGED type "
f"{collectionType.name}.")

isVirtualComposite = self._composites.shouldBeDisassembled(datasetType)
# Disable all disassembly at the registry level for now
isVirtualComposite = False

# Add Registry Dataset entry. If not a virtual composite, add
# and attach components at the same time.
dataId = self.registry.expandDataId(dataId, graph=datasetType.dimensions, **kwds)
ref, = self.registry.insertDatasets(datasetType, run=run, dataIds=[dataId],
producer=producer, recursive=not isVirtualComposite)
producer=producer,
# Never write components into
# registry
recursive=False)

# Check to see if this datasetType requires disassembly
if isVirtualComposite:
Expand Down Expand Up @@ -694,7 +698,9 @@ def getDirect(self, ref: DatasetRef, *, parameters: Optional[Dict[str, Any]] = N
# if the ref exists in the store we return it directly
if self.datastore.exists(ref):
return self.datastore.get(ref, parameters=parameters)
elif ref.isComposite():
elif ref.isComposite() and ref.components:
# The presence of components indicates that this dataset
# was disassembled at the registry level.
# Check that we haven't got any unknown parameters
ref.datasetType.storageClass.validateParameters(parameters)
# Reconstruct the composite
Expand Down Expand Up @@ -1047,6 +1053,13 @@ def pruneDatasets(self, refs: Iterable[DatasetRef], *,
if collectionType is not CollectionType.TAGGED:
raise TypeError(f"Cannot disassociate from collection '{tag}' "
f"of non-TAGGED type {collectionType.name}.")
# Pruning a component of a DatasetRef makes no sense since registry
# doesn't always know about components and datastore might not store
# components in a separate file
for ref in refs:
if ref.datasetType.component():
raise ValueError(f"Can not prune a component of a dataset (ref={ref})")

if recursive:
refs = list(DatasetRef.flatten(refs))
# We don't need an unreliable Datastore transaction for this, because
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/composites.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class CompositesMap:
"""

def __init__(self, config, *, universe):
if not isinstance(config, type(self)):
if not isinstance(config, CompositesConfig):
config = CompositesConfig(config)
assert isinstance(config, CompositesConfig)
self.config = config
Expand Down
10 changes: 8 additions & 2 deletions python/lsst/daf/butler/core/datasets/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class DatasetRef:
not be created in new code, but are still supported for backwards
compatibility. New code should only pass `False` if it can guarantee
that the dimensions are already consistent.
hasParentId : `bool`, optional
If `True` this `DatasetRef` is a component that has the ``id``
of the composite parent. This is set if the registry does not
know about individual components but does know about the composite.
Raises
------
Expand All @@ -80,16 +84,18 @@ class DatasetRef:
``id`` is provided but ``run`` is not.
"""

__slots__ = ("id", "datasetType", "dataId", "run", "_hash", "_components")
__slots__ = ("id", "datasetType", "dataId", "run", "_hash", "_components", "hasParentId")

def __new__(cls, datasetType: DatasetType, dataId: DataCoordinate, *,
id: Optional[int] = None,
run: Optional[str] = None, hash: Optional[bytes] = None,
components: Optional[Mapping[str, DatasetRef]] = None, conform: bool = True) -> DatasetRef:
components: Optional[Mapping[str, DatasetRef]] = None, conform: bool = True,
hasParentId: bool = False) -> DatasetRef:
self = super().__new__(cls)
assert isinstance(datasetType, DatasetType)
self.id = id
self.datasetType = datasetType
self.hasParentId = hasParentId
if conform:
self.dataId = DataCoordinate.standardize(dataId, graph=datasetType.dimensions)
else:
Expand Down
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/core/datasets/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def __repr__(self):
return "DatasetType({}, {}, {})".format(self.name, self.dimensions, self._storageClassName)

def __eq__(self, other):
if not isinstance(other, type(self)):
return False
if self._name != other._name:
return False
if self._dimensions != other._dimensions:
Expand Down
7 changes: 6 additions & 1 deletion python/lsst/daf/butler/core/storedFileInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class StoredDatastoreItemInfo:
class StoredFileInfo(StoredDatastoreItemInfo):
"""Datastore-private metadata associated with a file stored in a Datastore.
"""
__slots__ = {"formatter", "path", "storageClass", "checksum", "file_size"}
__slots__ = {"formatter", "path", "storageClass", "component",
"checksum", "file_size"}

formatter: str
"""Fully-qualified name of Formatter."""
Expand All @@ -53,6 +54,10 @@ class StoredFileInfo(StoredDatastoreItemInfo):
storageClass: StorageClass
"""StorageClass associated with Dataset."""

component: Optional[str]
"""Component associated with this file. Can be None if the file does
not refer to a component of a composite."""

checksum: Optional[str]
"""Checksum of the serialized dataset."""

Expand Down

0 comments on commit 0d85109

Please sign in to comment.