-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-13840: Prepare Butler for composite work to begin #21
Changes from all commits
fe11932
d788988
1436363
561338f
641b32a
d2b8b9c
943757a
ad240be
e69715e
2ef019c
7ea2f1c
682ffa7
70c9296
db65867
cee46b1
1bcb3d1
e0f1370
790e3c8
9c368df
5e9696c
f9bdfb4
3d5067f
4c2afd3
e3cebe3
3661840
b99012f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,104 +69,66 @@ def __init__(self, config): | |
if self.run is None: | ||
self.run = self.registry.makeRun(self.config['run']) | ||
|
||
def getDirect(self, ref, parameters=None): | ||
"""Load a `Dataset` or a slice thereof from a `DatasetRef`. | ||
|
||
Unlike `Butler.get`, this method allows `Datasets` outside the Butler's `Collection` to be read as | ||
long as the `DatasetRef` that identifies them can be obtained separately. | ||
def put(self, obj, datasetType, dataId, producer=None): | ||
"""Store and register a dataset. | ||
|
||
Parameters | ||
---------- | ||
ref : `DatasetRef` | ||
A pointer to the `Dataset` to load. | ||
parameters : `dict` | ||
`StorageClass`-specific parameters that can be used to obtain a slice of the `Dataset`. | ||
obj : `object` | ||
The dataset. | ||
datasetType : `DatasetType` instance or `str` | ||
The `DatasetType`. | ||
dataId : `dict` | ||
An identifier with `DataUnit` names and values. | ||
producer : `Quantum`, optional | ||
The producer. | ||
|
||
Returns | ||
------- | ||
inMemoryDataset : `InMemoryDataset` | ||
The requested `Dataset`. | ||
ref : `DatasetRef` | ||
A reference to the stored dataset. | ||
""" | ||
parent = self.datastore.get(ref.uri, ref.datasetType.storageClass, parameters) if ref.uri else None | ||
children = {name: self.datastore.get(childRef, parameters) | ||
for name, childRef in ref.components.items()} | ||
return ref.datasetType.storageClass.assemble(parent, children) | ||
datasetType = self.registry.getDatasetType(datasetType) | ||
ref = self.registry.addDataset(datasetType, dataId, run=self.run, producer=producer) | ||
# self.datastore.put(obj, ref) | ||
return ref | ||
|
||
def get(self, ref, parameters=None): | ||
"""Load a `Dataset` or a slice thereof from the Butler's `Collection`. | ||
def getDirect(self, ref): | ||
"""Retrieve a stored dataset. | ||
|
||
Unlike `Butler.get`, this method allows datasets outside the Butler's collection to be read as | ||
long as the `DatasetRef` that identifies them can be obtained separately. | ||
|
||
Parameters | ||
---------- | ||
ref : `DatasetRef` | ||
The `Dataset` to retrieve. | ||
parameters : `dict` | ||
A dictionary of `StorageClass`-specific parameters that can be | ||
used to obtain a slice of the `Dataset`. | ||
Reference to an already stored dataset. | ||
|
||
Returns | ||
------- | ||
dataset : `InMemoryDataset` | ||
The requested `Dataset`. | ||
obj : `object` | ||
The dataset. | ||
""" | ||
ref = self.registry.find(self.run.collection, ref) | ||
if ref: | ||
return self.getDirect(ref, parameters) | ||
else: | ||
return None # No Dataset found | ||
# Currently a direct pass-through to `Datastore.get` but this should | ||
# change for composites. | ||
return self.datastore.get(ref) | ||
|
||
def put(self, ref, inMemoryDataset, producer=None): | ||
"""Write a `Dataset`. | ||
def get(self, datasetType, dataId): | ||
"""Retrieve a stored dataset. | ||
|
||
Parameters | ||
---------- | ||
ref : `DatasetRef` | ||
The `Dataset` being stored. | ||
inMemoryDataset : `InMemoryDataset` | ||
The `Dataset` to store. | ||
producer : `Quantum` | ||
The producer of this `Dataset`. May be ``None`` for some | ||
`Registry` instances. | ||
``producer.run`` must match ``self.config['run']``. | ||
datasetType : `DatasetType` instance or `str` | ||
The `DatasetType`. | ||
dataId : `dict` | ||
A `dict` of `DataUnit` name, value pairs that label the `DatasetRef` | ||
within a Collection. | ||
|
||
Returns | ||
------- | ||
datasetRef : `DatasetRef` | ||
The registered (and stored) dataset. | ||
""" | ||
ref = self.registry.expand(ref) | ||
run = self.run | ||
assert(producer is None or run == producer.run) | ||
storageHint = ref.makeStorageHint(run) | ||
uri, components = self.datastore.put(inMemoryDataset, ref.datasetType.storageClass, | ||
storageHint, ref.datasetType.name) | ||
return self.registry.addDataset(ref, uri, components, producer=producer, run=run) | ||
|
||
def markInputUsed(self, quantum, ref): | ||
"""Mark a `Dataset` as having been "actually" (not just | ||
predicted-to-be) used by a `Quantum`. | ||
|
||
Parameters | ||
---------- | ||
quantum : `Quantum` | ||
The dependent `Quantum`. | ||
ref : `DatasetRef` | ||
The `Dataset` that is a true dependency of ``quantum``. | ||
""" | ||
ref = self.registry.find(self.run.collection, ref) | ||
self.registry.markInputUsed(ref, quantum) | ||
|
||
def unlink(self, *refs): | ||
"""Remove dataset from collection. | ||
|
||
Remove the `Dataset`\ s associated with the given `DatasetRef`\ s | ||
from the `Butler`\ 's collection, and signal that they may be deleted | ||
from storage if they are not referenced by any other collection. | ||
|
||
Parameters | ||
---------- | ||
refs : `list` of `DatasetRef` | ||
List of refs for `Dataset`\ s to unlink. | ||
obj : `object` | ||
The dataset. | ||
""" | ||
refs = [self.registry.find(self.run.collection, ref) for ref in refs] | ||
for ref in self.registry.disassociate(self.run.collection, refs, remove=True): | ||
self.datastore.remove(ref.uri) | ||
datasetType = self.registry.getDatasetType(datasetType) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should probably be pass-through (or perhaps fill in details) for |
||
ref = self.registry.find(datasetType, dataId) | ||
return self.getDirect(ref) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,51 +96,50 @@ class DatasetRef(object): | |
---------- | ||
datasetType : `DatasetType` | ||
The `DatasetType` for this `Dataset`. | ||
units : `dict` | ||
dataId : `dict` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This documentation for a |
||
Dictionary where the keys are `DataUnit` names and the values are | ||
`DataUnit` instances. | ||
`DataUnit` values. | ||
id : `int`, optional | ||
A unique identifier. | ||
Normally set to `None` and assigned by `Registry` | ||
""" | ||
|
||
__slots__ = ("_type", "_producer", "_predictedConsumers", "_actualConsumers") | ||
_currentId = -1 | ||
__slots__ = ("_id", "_datasetType", "_dataId", "_producer", | ||
"_predictedConsumers", "_actualConsumers", "_components", | ||
"_assembler") | ||
|
||
@classmethod | ||
def getNewId(cls): | ||
"""Generate a new Dataset ID number. | ||
|
||
..todo:: | ||
This is a temporary workaround that will probably disapear in | ||
the future, when a solution is found to the problem of | ||
autoincrement compound primary keys in SQLite. | ||
""" | ||
cls._currentId += 1 | ||
return cls._currentId | ||
|
||
def __init__(self, datasetType, units): | ||
units = datasetType.units.conform(units) | ||
super().__init__( | ||
datasetType.name, | ||
**{unit.__class__.__name__: unit.value for unit in units} | ||
) | ||
def __init__(self, datasetType, dataId, id=None): | ||
assert isinstance(datasetType, DatasetType) | ||
self._id = id | ||
self._datasetType = datasetType | ||
self._units = units | ||
self._dataId = dataId | ||
self._producer = None | ||
self._predictedConsumers = dict() | ||
self._actualConsumers = dict() | ||
self._components = dict() | ||
self._assembler = None | ||
|
||
@property | ||
def id(self): | ||
"""Primary key of the dataset (`int`) | ||
|
||
Typically assigned by `Registry`. | ||
""" | ||
return self._id | ||
|
||
@property | ||
def datasetType(self): | ||
"""The `DatasetType` associated with the `Dataset` the `DatasetRef` | ||
points to. | ||
""" | ||
return self._type | ||
return self._datasetType | ||
|
||
@property | ||
def units(self): | ||
"""A `tuple` of `DataUnit` instances that label the `DatasetRef` | ||
def dataId(self): | ||
"""A `dict` of `DataUnit` name, value pairs that label the `DatasetRef` | ||
within a Collection. | ||
""" | ||
return self._units | ||
return self._dataId | ||
|
||
@property | ||
def producer(self): | ||
|
@@ -173,14 +172,20 @@ def actualConsumers(self): | |
""" | ||
return _safeMakeMappingProxyType(self._actualConsumers) | ||
|
||
def makeStorageHint(self, run, template=None): | ||
"""Construct a storage hint by filling in template with the Collection | ||
collection and the values in the units tuple. | ||
@property | ||
def components(self): | ||
"""Named `DatasetRef` components. | ||
|
||
Read-only; update via `Registry.attachComponent()`. | ||
""" | ||
return _safeMakeMappingProxyType(self._components) | ||
|
||
@property | ||
def assembler(self): | ||
"""Fully-qualified name of an importable Assembler object that can be | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remember that assemblers are classes with assemble and disassemble methods. You have to store the assembler class name, then to assemble you create an instance and run the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not according to https://confluence.lsstcorp.org/display/DM/Gen3+Butler+Composites+Design There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When you asked me to combine free functions into classes the code got significantly cleaner. I'll be surprised if we gain by pulling everything apart again. I was really happy with the way assembler/disassembler turned out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I must admit that I hadn't taken that confluence page as gospel. I thought it was guiding principles so I haven't gone into edit it with my thoughts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code on the confluence page is absolutely intended as just pseudocode. You're both very much encouraged to actively rethink all of it (the code parts, that is; I hope the conceptual stuff will actually stick this time around). |
||
used to construct this Dataset from its components. | ||
|
||
Although a `Dataset` may belong to multiple Collections, only the one | ||
corresponding to its `Run` is used. | ||
`None` for datasets that are not virtual composites. | ||
Read-only; update via `Registry.setAssembler()`. | ||
""" | ||
if template is None: | ||
template = self.datasetType.template | ||
units = {unit.__class__.__name__: unit.value for unit in self.units} | ||
return template.format(DatasetType=self.datasetType.name, Run=run.collection, **units) | ||
return self._assembler |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you referring to the class here because the keys are names that convert to instances of
DataUnit
classes?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. They are names of the tables. (e.g.
{"camera" : "HSC", "visit" : 3}
).