Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-13374: Deconstruct Butler prototype for redesign #10

Merged
merged 1 commit into from Jan 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
70 changes: 33 additions & 37 deletions python/lsst/daf/butler/butler.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -24,7 +24,6 @@
from .core.config import Config
from .core.datastore import Datastore
from .core.registry import Registry
from .core.datasets import DatasetLabel, DatasetHandle


class ButlerConfig(Config):
Expand Down Expand Up @@ -68,15 +67,15 @@ def __init__(self, config):
if self.run is None:
self.run = self.registry.makeRun(self.config['run'])

def getDirect(self, handle, parameters=None):
"""Load a `Dataset` or a slice thereof from a `DatasetHandle`.
def getDirect(self, ref, parameters=None):
"""Load a `Dataset` or a slice thereof from a `DatasetRef`.

Unlike `Butler.get`, this method allows `Datasets` outside the Butler's `Collection` to be read as
long as the `DatasetHandle` that identifies them can be obtained separately.
long as the `DatasetRef` that identifies them can be obtained separately.

Parameters
----------
handle : `DatasetHandle`
ref : `DatasetRef`
A pointer to the `Dataset` to load.
parameters : `dict`
`StorageClass`-specific parameters that can be used to obtain a slice of the `Dataset`.
Expand All @@ -86,19 +85,18 @@ def getDirect(self, handle, parameters=None):
inMemoryDataset : `InMemoryDataset`
The requested `Dataset`.
"""
assert isinstance(handle, DatasetHandle)
parent = self.datastore.get(handle.uri, handle.type.storageClass, parameters) if handle.uri else None
children = {name: self.datastore.get(childHandle, parameters)
for name, childHandle in handle.components.items()}
return handle.type.storageClass.assemble(parent, children)
parent = self.datastore.get(ref.uri, ref.datasetType.storageClass, parameters) if ref.uri else None
children = {name: self.datastore.get(childRef, parameters)
for name, childRef in ref.components.items()}
return ref.datasetType.storageClass.assemble(parent, children)

def get(self, label, parameters=None):
def get(self, ref, parameters=None):
"""Load a `Dataset` or a slice thereof from the Butler's `Collection`.

Parameters
----------
label : `DatasetLabel`
Identifies the `Dataset` to retrieve.
ref : `DatasetRef`
The `Dataset` to retrieve.
parameters : `dict`
A dictionary of `StorageClass`-specific parameters that can be
used to obtain a slice of the `Dataset`.
Expand All @@ -108,37 +106,36 @@ def get(self, label, parameters=None):
dataset : `InMemoryDataset`
The requested `Dataset`.
"""
assert isinstance(label, DatasetLabel)
handle = self.registry.find(self.run.collection, label)
if handle:
return self.getDirect(handle, parameters)
ref = self.registry.find(self.run.collection, ref)
if ref:
return self.getDirect(ref, parameters)
else:
return None # No Dataset found

def put(self, label, inMemoryDataset, producer=None):
def put(self, ref, inMemoryDataset, producer=None):
"""Write a `Dataset`.

Parameters
----------
label : `DatasetLabel`
Identifies the `Dataset` being stored.
ref : `DatasetRef`
The `Dataset` being stored.
inMemoryDataset : `InMemoryDataset`
The `Dataset` to store.
producer : `Quantum`
Identifies the producer of this `Dataset`. May be ``None`` for some `Registries`.
The producer of this `Dataset`. May be ``None`` for some `Registries`.
``producer.run`` must match ``self.config['run']``.

Returns
-------
datasetHandle : `DatasetHandle`
A handle that identifies the registered (and stored) dataset.
datasetRef : `DatasetRef`
The registered (and stored) dataset.
"""
ref = self.registry.expand(label)
ref = self.registry.expand(ref)
run = self.run
assert(producer is None or run == producer.run)
storageHint = ref.makeStorageHint(run)
uri, components = self.datastore.put(inMemoryDataset, ref.type.storageClass,
storageHint, ref.type.name)
uri, components = self.datastore.put(inMemoryDataset, ref.datasetType.storageClass,
storageHint, ref.datasetType.name)
return self.registry.addDataset(ref, uri, components, producer=producer, run=run)

def markInputUsed(self, quantum, ref):
Expand All @@ -151,19 +148,18 @@ def markInputUsed(self, quantum, ref):
ref : `DatasetRef`
The `Dataset` that is a true dependency of ``quantum``.
"""
handle = self.registry.find(self.run.collection, ref)
self.registry.markInputUsed(handle, quantum)
ref = self.registry.find(self.run.collection, ref)
self.registry.markInputUsed(ref, quantum)

def unlink(self, *labels):
"""Remove the `Dataset`s associated with the given `DatasetLabel`s from the Butler's `Collection`,
def unlink(self, *refs):
"""Remove the `Dataset`s associated with the given `DatasetRef`s from the Butler's `Collection`,
and signal that they may be deleted from storage if they are not referenced by any other `Collection`.

Parameters
----------
labels : [`DatasetLabel`]
List of labels for `Dataset`s to unlink.
refs : [`DatasetRef`]
List of refs for `Dataset`s to unlink.
"""
handles = [self.registry.find(self.run.collection, label)
for label in labels]
for handle in self.registry.disassociate(self.run.collection, handles, remove=True):
self.datastore.remove(handle.uri)
refs = [self.registry.find(self.run.collection, ref) for ref in refs]
for ref in self.registry.disassociate(self.run.collection, refs, remove=True):
self.datastore.remove(ref.uri)
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/config.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down
124 changes: 16 additions & 108 deletions python/lsst/daf/butler/core/datasets.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -23,8 +23,7 @@

from types import MappingProxyType
from .utils import slotValuesAreEqual, slotValuesToHash
from .storageClass import StorageClass
from .units import DataUnitTypeSet
from .units import DataUnitSet


def _safeMakeMappingProxyType(data):
Expand All @@ -33,7 +32,7 @@ def _safeMakeMappingProxyType(data):
return MappingProxyType(data)


class DatasetType:
class DatasetType(object):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed? Python 3 doesn't need the explicit object inheritance.

"""A named category of Datasets that defines how they are organized,
related, and stored.

Expand Down Expand Up @@ -69,15 +68,14 @@ def template(self):

@property
def units(self):
"""A `DataUnitTypeSet` that defines the `DatasetRef`s corresponding
"""A `DataUnitSet` that defines the `DatasetRef`s corresponding
to this `DatasetType`.
"""
return self._units

@property
def storageClass(self):
"""A `StorageClass` subclass (not instance) that defines how this
`DatasetType` is persisted.
"""A `StorageClass` that defines how this `DatasetType` is persisted.
"""
return self._storageClass

Expand All @@ -86,45 +84,16 @@ def __init__(self, name, template, units, storageClass):

All arguments correspond directly to instance attributes.
"""
assert issubclass(storageClass, StorageClass)
self._name = name
self._template = template
self._units = DataUnitTypeSet(units)
self._units = DataUnitSet(units)
self._storageClass = storageClass


class DatasetLabel:
"""Opaque label that identifies a `Dataset` in a `Collection`.
"""

__slots__ = ("_name", "_units")
__eq__ = slotValuesAreEqual

def __init__(self, name, **units):
self._name = name
self._units = units

@property
def name(self):
"""Name of the `DatasetType` associated with the `Dataset`.
"""
return self._name

@property
def units(self):
"""Dictionary with name, value pairs for `DataUnit`s.
"""
return self._units


class DatasetRef(DatasetLabel):
class DatasetRef(object):
"""Reference to a `Dataset` in a `Registry`.

As opposed to a `DatasetLabel`, `DatasetRef` holds actual `DataUnit`
instances (instead of just their names and primary-key values).
They can typically only be constructed by calling `Registry.expand`.
In contrast to `DatasetLabel`s a `DatasetRef` may point to a `Dataset`s
that currently do not yet exist
A `DatasetRef` may point to a `Dataset`s that currently does not yet exist
(e.g. because it is a predicted input for provenance).
"""

Expand All @@ -143,31 +112,31 @@ def getNewId(cls):
cls._currentId += 1
return cls._currentId

def __init__(self, type, units):
def __init__(self, datasetType, units):
"""Construct a DatasetRef from a DatasetType and a complete tuple
of DataUnits.

Parameters
----------
type: `DatasetType`
datasetType: `DatasetType`
The `DatasetType` for this `Dataset`.
units: `dict`
Dictionary where the keys are `DataUnit` names and the values are
`DataUnit` instances.
"""
units = type.units.conform(units)
units = datasetType.units.conform(units)
super().__init__(
type.name,
datasetType.name,
**{unit.__class__.__name__: unit.value for unit in units}
)
self._type = type
self._datasetType = datasetType
self._units = units
self._producer = None
self._predictedConsumers = dict()
self._actualConsumers = dict()

@property
def type(self):
def datasetType(self):
"""The `DatasetType` associated with the `Dataset` the `DatasetRef`
points to.
"""
Expand Down Expand Up @@ -218,67 +187,6 @@ def makeStorageHint(self, run, template=None):
corresponding to its `Run` is used.
"""
if template is None:
template = self.type.template
template = self.datasetType.template
units = {unit.__class__.__name__: unit.value for unit in self.units}
return template.format(DatasetType=self.type.name, Run=run.collection, **units)


class DatasetHandle(DatasetRef):
"""Handle to a stored `Dataset` in a `Registry`.

As opposed to a `DatasetLabel`, and like a `DatasetRef`, `DatasetHandle`
holds actual `DataUnit` instances
(instead of just their names and primary-key values).
In contrast to `DatasetRef`s a `DatasetHandle` only ever points to a
`Dataset` that has been stored in a `Datastore`.
"""

__slots__ = ("_datasetId", "_registryId", "_uri", "_components", "_run")

def __init__(self, datasetId, registryId, ref, uri, components, run):
"""Constructor.

Parameters correspond directly to attributes.
"""
super().__init__(ref.type, ref.units)
self._datasetId = datasetId
self._registryId = registryId
self._producer = ref.producer
self._predictedConsumers.update(ref.predictedConsumers)
self._actualConsumers.update(ref.actualConsumers)
self._uri = uri
self._components = _safeMakeMappingProxyType(components)
self._run = run

@property
def datasetId(self):
"""Primary-key identifier for this `Dataset`.
"""
return self._datasetId

@property
def registryId(self):
"""Id of the `Registry` that was used to create this `Dataset`.
"""
return self._registryId

@property
def uri(self):
"""The URI that holds the location of the `Dataset` in a `Datastore`.
"""
return self._uri

@property
def components(self):
"""A `dict` holding `DatasetHandle` instances that correspond to this
`Dataset`s named components.

Empty if the `Dataset` is not a composite.
"""
return self._components

@property
def run(self):
"""The `Run` the `Dataset` was created with.
"""
return self._run
return template.format(DatasetType=self.datasetType.name, Run=run.collection, **units)
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/datastore.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down
5 changes: 1 addition & 4 deletions python/lsst/daf/butler/core/fileDescriptor.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand All @@ -21,8 +21,6 @@
# see <https://www.lsstcorp.org/LegalNotices/>.
#

from .location import Location


class FileDescriptor(object):
"""Describes a particular file.
Expand Down Expand Up @@ -53,7 +51,6 @@ def __init__(self, location, type=None, parameters=None):
parameters : `dict`
Additional parameters that can be used for reading and writing.
"""
assert isinstance(location, Location)
self.location = location
self.type = type
self.parameters = parameters
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/formatter.py
@@ -1,7 +1,7 @@
#
# LSST Data Management System
#
# Copyright 2008-2017 AURA/LSST.
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
Expand Down