Skip to content

Commit

Permalink
Merge pull request #224 from lsst/tickets/DM-21246
Browse files Browse the repository at this point in the history
DM-21246: butler cleanup and support for deferred run/collection passing
  • Loading branch information
TallJimbo committed Jan 30, 2020
2 parents 6d6ca2d + 0634d41 commit 72083d5
Show file tree
Hide file tree
Showing 11 changed files with 495 additions and 234 deletions.
506 changes: 364 additions & 142 deletions python/lsst/daf/butler/_butler.py

Large diffs are not rendered by default.

73 changes: 27 additions & 46 deletions python/lsst/daf/butler/_deferredDatasetHandle.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,63 +22,24 @@
"""
Module containing classes used with deferring dataset loading
"""
from __future__ import annotations

__all__ = ("DeferredDatasetHandle",)

import dataclasses
import typing
import types
from typing import Any, Optional, TYPE_CHECKING

from .core import DataId, DatasetRef, DatasetType

if typing.TYPE_CHECKING:
if TYPE_CHECKING:
from .core import DatasetRef, ExpandedDataCoordinate
from .butler import Butler


@dataclasses.dataclass(frozen=True)
class DeferredDatasetHandle:
"""This is a class to support deferred loading of a dataset from a butler.
Parameters
----------
butler : `Butler`
The butler that will be used to fetch the deferred dataset
datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
When `DatasetRef` the `dataId` should be `None`.
Otherwise the `DatasetType` or name thereof.
dataId : `dict` or `DataCoordinate`, optional
A dictionary of `Dimension` link name, value pairs that label the
`DatasetRef` within a Collection. When `None`, a `DatasetRef`
should be provided as the first argument.
parameters : `dict`
Additional StorageClass-defined options to control reading,
typically used to efficiently read only a subset of the dataset.
kwds : `dict`
Additional keyword arguments used to augment or construct a
`DataId`. See `DataId` construction parameters.
"""Proxy class that provides deferred loading of a dataset from a butler.
"""

datasetRefOrType: typing.Union[DatasetRef, DatasetType, str]
dataId: DataId
parameters: typing.Union[dict, None]
kwds: dict

def __init__(self, butler: 'Butler', datasetRefOrType: typing.Union[DatasetRef, DatasetType, str],
dataId: typing.Union[dict, DataId], parameters: typing.Union[dict, None], kwds: dict):
object.__setattr__(self, 'datasetRefOrType', datasetRefOrType)
object.__setattr__(self, 'dataId', dataId)
object.__setattr__(self, 'parameters', parameters)
object.__setattr__(self, 'kwds', kwds)

# Closure over butler to discourage accessing a raw butler through a
# deferred handle
def _get(self, parameters: typing.Union[None, dict]) -> typing.Any:
return butler.get(self.datasetRefOrType, self.dataId, parameters, **self.kwds)

object.__setattr__(self, '_get', types.MethodType(_get, self))

def get(self, parameters: typing.Union[None, dict] = None, **kwargs: dict) -> typing.Any:
def get(self, *, parameters: Optional = None, **kwargs: dict) -> Any:
""" Retrieves the dataset pointed to by this handle
This handle may be used multiple times, possibly with different
Expand Down Expand Up @@ -110,4 +71,24 @@ def get(self, parameters: typing.Union[None, dict] = None, **kwargs: dict) -> ty
else:
mergedParameters = {}

return self._get(mergedParameters)
return self.butler.getDirect(self.ref, parameters=mergedParameters)

@property
def dataId(self) -> ExpandedDataCoordinate:
"""The full data ID associated with the dataset
(`ExpandedDataCoordinate`).
"""
return self.ref.dataId

butler: Butler
"""The butler that will be used to fetch the dataset (`Butler`).
"""

ref: DatasetRef
"""Reference to the dataset (`DatasetRef`).
"""

parameters: Optional[dict]
"""Optional parameters that may be used to specify a subset of the dataset
to be loaded (`dict` or `None`).
"""
12 changes: 11 additions & 1 deletion python/lsst/daf/butler/datastores/fileLikeDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
)

from lsst.daf.butler import ddl
from lsst.daf.butler.registry.interfaces import ReadOnlyDatabaseError

from lsst.daf.butler.core.repoRelocation import replaceRoot
from lsst.daf.butler.core.utils import getInstanceOf, NamedValueSet, getClassOf, transactional
Expand Down Expand Up @@ -216,7 +217,16 @@ def __init__(self, config, registry, butlerRoot=None):

# Storage of paths and formatters, keyed by dataset_id
self._tableName = self.config["records", "table"]
registry.registerOpaqueTable(self._tableName, self.makeTableSpec())
try:
registry.registerOpaqueTable(self._tableName, self.makeTableSpec())
except ReadOnlyDatabaseError:
# If the database is read only and we just tried and failed to
# create a table, it means someone is trying to create a read-only
# butler client for an empty repo. That should be okay, as long
# as they then try to get any datasets before some other client
# creates the table. Chances are they'rejust validating
# configuration.
pass

# Determine whether checksums should be used
self.useChecksum = self.config.get("checksum", True)
Expand Down
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/registry/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class Registry:

@classmethod
def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], create: bool = False,
butlerRoot: Optional[str] = None) -> Registry:
butlerRoot: Optional[str] = None, writeable: bool = True) -> Registry:
"""Create `Registry` subclass instance from `config`.
Uses ``registry.cls`` from `config` to determine which subclass to
Expand All @@ -168,6 +168,8 @@ def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], cr
Assume empty Registry and create a new one.
butlerRoot : `str`, optional
Path to the repository root this `Registry` will manage.
writeable : `bool`, optional
If `True` (default) create a read-write connection to the database.
Returns
-------
Expand All @@ -182,7 +184,7 @@ def fromConfig(cls, config: Union[ButlerConfig, RegistryConfig, Config, str], cr
config.replaceRoot(butlerRoot)
DatabaseClass = config.getDatabaseClass()
database = DatabaseClass.fromUri(str(config.connectionString), origin=config.get("origin", 0),
namespace=config.get("namespace"))
namespace=config.get("namespace"), writeable=writeable)
dimensions = DimensionUniverse(config)
opaque = doImport(config["managers", "opaque"])
return cls(database=database, dimensions=dimensions, opaque=opaque, create=create)
Expand Down Expand Up @@ -217,6 +219,12 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"Registry({self._db!r}, {self.dimensions!r})"

def isWriteable(self) -> bool:
"""Return `True` if this registry allows write operations, and `False`
otherwise.
"""
return self._db.isWriteable()

@contextlib.contextmanager
def transaction(self):
"""Return a context manager that represents a transaction.
Expand Down
1 change: 0 additions & 1 deletion tests/config/basic/butler-chained.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
run: ingest
datastore: !include chainedDatastore.yaml
storageClasses: !include storageClasses.yaml
composites: !include composites.yaml
1 change: 0 additions & 1 deletion tests/config/basic/butler-inmemory.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
run: ingest
datastore: !include inMemoryDatastore.yaml
storageClasses: !include storageClasses.yaml
composites: !include composites.yaml
3 changes: 0 additions & 3 deletions tests/config/basic/butler-norun.yaml

This file was deleted.

1 change: 0 additions & 1 deletion tests/config/basic/butler-s3store.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
run: ingest
registry: !include sqliteRegistry.yaml
datastore: !include s3Datastore.yaml
storageClasses: !include storageClasses.yaml
Expand Down
1 change: 0 additions & 1 deletion tests/config/basic/butler.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
run: ingest
datastore: !include posixDatastore.yaml
storageClasses: !include storageClasses.yaml
composites: !include composites.yaml

0 comments on commit 72083d5

Please sign in to comment.