Skip to content

Commit

Permalink
Remove Registry prototype for redesign
Browse files Browse the repository at this point in the history
- Reset SqlRegistry to non-implemented and remove associated tests
- Remove DataUnit subclass implementations
- Rename DataUnitTypeSet -> DataUnitSet
- Remove mentions of DataUnitMap
- Merge DatasetLabel and DatasetHandle into DatasetRef
- Rename type -> datasetType where appropriate
- Don't assume anything about StorageClass in Registry
- General style cleanup
  • Loading branch information
Pim Schellart authored and Pim Schellart committed Jan 29, 2018
1 parent 012c14e commit c318747
Show file tree
Hide file tree
Showing 23 changed files with 206 additions and 1,756 deletions.
70 changes: 33 additions & 37 deletions python/lsst/daf/butler/butler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -24,7 +24,6 @@
from .core.config import Config
from .core.datastore import Datastore
from .core.registry import Registry
from .core.datasets import DatasetLabel, DatasetHandle


class ButlerConfig(Config):
Expand Down Expand Up @@ -68,15 +67,15 @@ def __init__(self, config):
if self.run is None:
self.run = self.registry.makeRun(self.config['run'])

def getDirect(self, handle, parameters=None):
"""Load a `Dataset` or a slice thereof from a `DatasetHandle`.
def getDirect(self, ref, parameters=None):
"""Load a `Dataset` or a slice thereof from a `DatasetRef`.
Unlike `Butler.get`, this method allows `Datasets` outside the Butler's `Collection` to be read as
long as the `DatasetHandle` that identifies them can be obtained separately.
long as the `DatasetRef` that identifies them can be obtained separately.
Parameters
----------
handle : `DatasetHandle`
ref : `DatasetRef`
A pointer to the `Dataset` to load.
parameters : `dict`
`StorageClass`-specific parameters that can be used to obtain a slice of the `Dataset`.
Expand All @@ -86,19 +85,18 @@ def getDirect(self, handle, parameters=None):
inMemoryDataset : `InMemoryDataset`
The requested `Dataset`.
"""
assert isinstance(handle, DatasetHandle)
parent = self.datastore.get(handle.uri, handle.type.storageClass, parameters) if handle.uri else None
children = {name: self.datastore.get(childHandle, parameters)
for name, childHandle in handle.components.items()}
return handle.type.storageClass.assemble(parent, children)
parent = self.datastore.get(ref.uri, ref.datasetType.storageClass, parameters) if ref.uri else None
children = {name: self.datastore.get(childRef, parameters)
for name, childRef in ref.components.items()}
return ref.datasetType.storageClass.assemble(parent, children)

def get(self, label, parameters=None):
def get(self, ref, parameters=None):
"""Load a `Dataset` or a slice thereof from the Butler's `Collection`.
Parameters
----------
label : `DatasetLabel`
Identifies the `Dataset` to retrieve.
ref : `DatasetRef`
The `Dataset` to retrieve.
parameters : `dict`
A dictionary of `StorageClass`-specific parameters that can be
used to obtain a slice of the `Dataset`.
Expand All @@ -108,37 +106,36 @@ def get(self, label, parameters=None):
dataset : `InMemoryDataset`
The requested `Dataset`.
"""
assert isinstance(label, DatasetLabel)
handle = self.registry.find(self.run.collection, label)
if handle:
return self.getDirect(handle, parameters)
ref = self.registry.find(self.run.collection, ref)
if ref:
return self.getDirect(ref, parameters)
else:
return None # No Dataset found

def put(self, label, inMemoryDataset, producer=None):
def put(self, ref, inMemoryDataset, producer=None):
"""Write a `Dataset`.
Parameters
----------
label : `DatasetLabel`
Identifies the `Dataset` being stored.
ref : `DatasetRef`
The `Dataset` being stored.
inMemoryDataset : `InMemoryDataset`
The `Dataset` to store.
producer : `Quantum`
Identifies the producer of this `Dataset`. May be ``None`` for some `Registries`.
The producer of this `Dataset`. May be ``None`` for some `Registries`.
``producer.run`` must match ``self.config['run']``.
Returns
-------
datasetHandle : `DatasetHandle`
A handle that identifies the registered (and stored) dataset.
datasetRef : `DatasetRef`
The registered (and stored) dataset.
"""
ref = self.registry.expand(label)
ref = self.registry.expand(ref)
run = self.run
assert(producer is None or run == producer.run)
storageHint = ref.makeStorageHint(run)
uri, components = self.datastore.put(inMemoryDataset, ref.type.storageClass,
storageHint, ref.type.name)
uri, components = self.datastore.put(inMemoryDataset, ref.datasetType.storageClass,
storageHint, ref.datasetType.name)
return self.registry.addDataset(ref, uri, components, producer=producer, run=run)

def markInputUsed(self, quantum, ref):
Expand All @@ -151,19 +148,18 @@ def markInputUsed(self, quantum, ref):
ref : `DatasetRef`
The `Dataset` that is a true dependency of ``quantum``.
"""
handle = self.registry.find(self.run.collection, ref)
self.registry.markInputUsed(handle, quantum)
ref = self.registry.find(self.run.collection, ref)
self.registry.markInputUsed(ref, quantum)

def unlink(self, *labels):
"""Remove the `Dataset`s associated with the given `DatasetLabel`s from the Butler's `Collection`,
def unlink(self, *refs):
"""Remove the `Dataset`s associated with the given `DatasetRef`s from the Butler's `Collection`,
and signal that they may be deleted from storage if they are not referenced by any other `Collection`.
Parameters
----------
labels : [`DatasetLabel`]
List of labels for `Dataset`s to unlink.
refs : [`DatasetRef`]
List of refs for `Dataset`s to unlink.
"""
handles = [self.registry.find(self.run.collection, label)
for label in labels]
for handle in self.registry.disassociate(self.run.collection, handles, remove=True):
self.datastore.remove(handle.uri)
refs = [self.registry.find(self.run.collection, ref) for ref in refs]
for ref in self.registry.disassociate(self.run.collection, refs, remove=True):
self.datastore.remove(ref.uri)
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down
124 changes: 16 additions & 108 deletions python/lsst/daf/butler/core/datasets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -23,8 +23,7 @@

from types import MappingProxyType
from .utils import slotValuesAreEqual, slotValuesToHash
from .storageClass import StorageClass
from .units import DataUnitTypeSet
from .units import DataUnitSet


def _safeMakeMappingProxyType(data):
Expand All @@ -33,7 +32,7 @@ def _safeMakeMappingProxyType(data):
return MappingProxyType(data)


class DatasetType:
class DatasetType(object):
"""A named category of Datasets that defines how they are organized,
related, and stored.
Expand Down Expand Up @@ -69,15 +68,14 @@ def template(self):

@property
def units(self):
"""A `DataUnitTypeSet` that defines the `DatasetRef`s corresponding
"""A `DataUnitSet` that defines the `DatasetRef`s corresponding
to this `DatasetType`.
"""
return self._units

@property
def storageClass(self):
"""A `StorageClass` subclass (not instance) that defines how this
`DatasetType` is persisted.
"""A `StorageClass` that defines how this `DatasetType` is persisted.
"""
return self._storageClass

Expand All @@ -86,45 +84,16 @@ def __init__(self, name, template, units, storageClass):
All arguments correspond directly to instance attributes.
"""
assert issubclass(storageClass, StorageClass)
self._name = name
self._template = template
self._units = DataUnitTypeSet(units)
self._units = DataUnitSet(units)
self._storageClass = storageClass


class DatasetLabel:
"""Opaque label that identifies a `Dataset` in a `Collection`.
"""

__slots__ = ("_name", "_units")
__eq__ = slotValuesAreEqual

def __init__(self, name, **units):
self._name = name
self._units = units

@property
def name(self):
"""Name of the `DatasetType` associated with the `Dataset`.
"""
return self._name

@property
def units(self):
"""Dictionary with name, value pairs for `DataUnit`s.
"""
return self._units


class DatasetRef(DatasetLabel):
class DatasetRef(object):
"""Reference to a `Dataset` in a `Registry`.
As opposed to a `DatasetLabel`, `DatasetRef` holds actual `DataUnit`
instances (instead of just their names and primary-key values).
They can typically only be constructed by calling `Registry.expand`.
In contrast to `DatasetLabel`s a `DatasetRef` may point to a `Dataset`s
that currently do not yet exist
A `DatasetRef` may point to a `Dataset`s that currently does not yet exist
(e.g. because it is a predicted input for provenance).
"""

Expand All @@ -143,31 +112,31 @@ def getNewId(cls):
cls._currentId += 1
return cls._currentId

def __init__(self, type, units):
def __init__(self, datasetType, units):
"""Construct a DatasetRef from a DatasetType and a complete tuple
of DataUnits.
Parameters
----------
type: `DatasetType`
datasetType: `DatasetType`
The `DatasetType` for this `Dataset`.
units: `dict`
Dictionary where the keys are `DataUnit` names and the values are
`DataUnit` instances.
"""
units = type.units.conform(units)
units = datasetType.units.conform(units)
super().__init__(
type.name,
datasetType.name,
**{unit.__class__.__name__: unit.value for unit in units}
)
self._type = type
self._datasetType = datasetType
self._units = units
self._producer = None
self._predictedConsumers = dict()
self._actualConsumers = dict()

@property
def type(self):
def datasetType(self):
"""The `DatasetType` associated with the `Dataset` the `DatasetRef`
points to.
"""
Expand Down Expand Up @@ -218,67 +187,6 @@ def makeStorageHint(self, run, template=None):
corresponding to its `Run` is used.
"""
if template is None:
template = self.type.template
template = self.datasetType.template
units = {unit.__class__.__name__: unit.value for unit in self.units}
return template.format(DatasetType=self.type.name, Run=run.collection, **units)


class DatasetHandle(DatasetRef):
"""Handle to a stored `Dataset` in a `Registry`.
As opposed to a `DatasetLabel`, and like a `DatasetRef`, `DatasetHandle`
holds actual `DataUnit` instances
(instead of just their names and primary-key values).
In contrast to `DatasetRef`s a `DatasetHandle` only ever points to a
`Dataset` that has been stored in a `Datastore`.
"""

__slots__ = ("_datasetId", "_registryId", "_uri", "_components", "_run")

def __init__(self, datasetId, registryId, ref, uri, components, run):
"""Constructor.
Parameters correspond directly to attributes.
"""
super().__init__(ref.type, ref.units)
self._datasetId = datasetId
self._registryId = registryId
self._producer = ref.producer
self._predictedConsumers.update(ref.predictedConsumers)
self._actualConsumers.update(ref.actualConsumers)
self._uri = uri
self._components = _safeMakeMappingProxyType(components)
self._run = run

@property
def datasetId(self):
"""Primary-key identifier for this `Dataset`.
"""
return self._datasetId

@property
def registryId(self):
"""Id of the `Registry` that was used to create this `Dataset`.
"""
return self._registryId

@property
def uri(self):
"""The URI that holds the location of the `Dataset` in a `Datastore`.
"""
return self._uri

@property
def components(self):
"""A `dict` holding `DatasetHandle` instances that correspond to this
`Dataset`s named components.
Empty if the `Dataset` is not a composite.
"""
return self._components

@property
def run(self):
"""The `Run` the `Dataset` was created with.
"""
return self._run
return template.format(DatasetType=self.datasetType.name, Run=run.collection, **units)
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/datastore.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down
5 changes: 1 addition & 4 deletions python/lsst/daf/butler/core/fileDescriptor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -21,8 +21,6 @@
# see <https://www.lsstcorp.org/LegalNotices/>.
#

from .location import Location


class FileDescriptor(object):
"""Describes a particular file.
Expand Down Expand Up @@ -53,7 +51,6 @@ def __init__(self, location, type=None, parameters=None):
parameters : `dict`
Additional parameters that can be used for reading and writing.
"""
assert isinstance(location, Location)
self.location = location
self.type = type
self.parameters = parameters
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/formatter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down
Loading

0 comments on commit c318747

Please sign in to comment.