Skip to content

Commit

Permalink
Merge pull request #9 from lsst/tickets/DM-13371
Browse files Browse the repository at this point in the history
DM-13371: flake8 compliance
  • Loading branch information
timj committed Jan 26, 2018
2 parents 85b3cc5 + a437c11 commit 012c14e
Show file tree
Hide file tree
Showing 22 changed files with 454 additions and 305 deletions.
2 changes: 1 addition & 1 deletion examples/createTables.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy import create_engine, LargeBinary
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from lsst.daf.butler.schema import metadata
Expand Down
11 changes: 7 additions & 4 deletions python/lsst/daf/butler/butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ class ButlerConfig(Config):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.validate()

def validate(self):
for k in ['run', 'datastore.cls', 'registry.cls']:
if k not in self:
raise ValueError("Missing ButlerConfig parameter: {0}".format(k))


class Butler(object):
"""Main entry point for the data access system.
Expand Down Expand Up @@ -66,7 +67,7 @@ def __init__(self, config):
self.run = self.registry.getRun(self.config['run'])
if self.run is None:
self.run = self.registry.makeRun(self.config['run'])

def getDirect(self, handle, parameters=None):
"""Load a `Dataset` or a slice thereof from a `DatasetHandle`.
Expand Down Expand Up @@ -99,7 +100,8 @@ def get(self, label, parameters=None):
label : `DatasetLabel`
Identifies the `Dataset` to retrieve.
parameters : `dict`
A dictionary of `StorageClass`-specific parameters that can be used to obtain a slice of the `Dataset`.
A dictionary of `StorageClass`-specific parameters that can be
used to obtain a slice of the `Dataset`.
Returns
-------
Expand Down Expand Up @@ -135,7 +137,8 @@ def put(self, label, inMemoryDataset, producer=None):
run = self.run
assert(producer is None or run == producer.run)
storageHint = ref.makeStorageHint(run)
uri, components = self.datastore.put(inMemoryDataset, ref.type.storageClass, storageHint, ref.type.name)
uri, components = self.datastore.put(inMemoryDataset, ref.type.storageClass,
storageHint, ref.type.name)
return self.registry.addDataset(ref, uri, components, producer=producer, run=run)

def markInputUsed(self, quantum, ref):
Expand Down
3 changes: 1 addition & 2 deletions python/lsst/daf/butler/ci_hsc/ingest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@

import os
import sqlite3

import numpy as np

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from . import hsc
from . import hsc # noqa F401
from lsst.daf.butler.core import units
from lsst.daf.butler.core.schema import metadata

Expand Down
11 changes: 3 additions & 8 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,13 @@

import collections
import copy
import os
import sys
import warnings
import yaml
import pprint

import lsst.utils

from yaml.representer import Representer
yaml.add_representer(collections.defaultdict, Representer.represent_dict)


# UserDict and yaml have defined metaclasses and Python 3 does not allow multiple
# inheritance of classes with distinct metaclasses. We therefore have to
# create a new baseclass that Config can inherit from. This is because the metaclass
Expand Down Expand Up @@ -66,7 +62,7 @@ def __init__(self, other=None):
----------
other: `str` or `Config` or `dict`
Other source of configuration, can be:
- (`str`) Treated as a path to a config file on disk. Must end with '.yaml'.
- (`Config`) Copies the other Config's values into this one.
- (`dict`) Copies the values from the dict into this Config.
Expand Down Expand Up @@ -110,7 +106,6 @@ def __initFromFile(self, path):
path: `str`
To a persisted config file.
"""
config = None
if path.endswith('yaml'):
self.__initFromYamlFile(path)
else:
Expand Down Expand Up @@ -195,7 +190,7 @@ def update(self, other):
----------
other: `dict` or `Config`
Source of configuration:
- If foo is a dict, then after the update foo == {'a': {'c': 2}}
- But if foo is a Config, then after the update foo == {'a': {'b': 1, 'c': 2}}
"""
Expand Down
76 changes: 50 additions & 26 deletions python/lsst/daf/butler/core/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,22 @@
from .storageClass import StorageClass
from .units import DataUnitTypeSet


def _safeMakeMappingProxyType(data):
if data is None:
data = {}
return MappingProxyType(data)


class DatasetType:
"""A named category of Datasets that defines how they are organized, related, and stored.
"""A named category of Datasets that defines how they are organized,
related, and stored.
A concrete, final class whose instances represent `DatasetType`s.
`DatasetType` instances may be constructed without a `Registry`, but they must be registered
via `Registry.registerDatasetType()` before corresponding `Datasets` may be added.
`DatasetType` instances may be constructed without a `Registry`,
but they must be registered
via `Registry.registerDatasetType()` before corresponding `Datasets`
may be added.
`DatasetType` instances are immutable.
"""

Expand All @@ -46,28 +51,33 @@ class DatasetType:

@property
def name(self):
"""A string name for the `Dataset`; must correspond to the same `DatasetType` across all Registries.
"""A string name for the `Dataset`; must correspond to the same
`DatasetType` across all Registries.
"""
return self._name

@property
def template(self):
"""A string with `str.format`-style replacement patterns that can be used to create a path from a `Run`
"""A string with `str.format`-style replacement patterns that can be
used to create a path from a `Run`
(and optionally its associated Collection) and a `DatasetRef`.
May be `None` to indicate a read-only `Dataset` or one whose templates must be provided at a higher level.
May be `None` to indicate a read-only `Dataset` or one whose templates
must be provided at a higher level.
"""
return self._template

@property
def units(self):
"""A `DataUnitTypeSet` that defines the `DatasetRef`s corresponding to this `DatasetType`.
"""A `DataUnitTypeSet` that defines the `DatasetRef`s corresponding
to this `DatasetType`.
"""
return self._units

@property
def storageClass(self):
"""A `StorageClass` subclass (not instance) that defines how this `DatasetType` is persisted.
"""A `StorageClass` subclass (not instance) that defines how this
`DatasetType` is persisted.
"""
return self._storageClass

Expand Down Expand Up @@ -110,10 +120,11 @@ def units(self):
class DatasetRef(DatasetLabel):
"""Reference to a `Dataset` in a `Registry`.
As opposed to a `DatasetLabel`, `DatasetRef` holds actual `DataUnit` instances
(instead of just their names and primary-key values).
As opposed to a `DatasetLabel`, `DatasetRef` holds actual `DataUnit`
instances (instead of just their names and primary-key values).
They can typically only be constructed by calling `Registry.expand`.
In contrast to `DatasetLabel`s a `DatasetRef` may point to a `Dataset`s that currently do not yet exist
In contrast to `DatasetLabel`s a `DatasetRef` may point to a `Dataset`s
that currently do not yet exist
(e.g. because it is a predicted input for provenance).
"""

Expand All @@ -125,21 +136,24 @@ def getNewId(cls):
"""Generate a new Dataset ID number.
..todo::
This is a temporary workaround that will probably disapear in the future,
when a solution is found to the problem of autoincrement compound primary keys in SQLite.
This is a temporary workaround that will probably disapear in
the future, when a solution is found to the problem of
autoincrement compound primary keys in SQLite.
"""
cls._currentId += 1
return cls._currentId

def __init__(self, type, units):
"""Construct a DatasetRef from a DatasetType and a complete tuple of DataUnits.
"""Construct a DatasetRef from a DatasetType and a complete tuple
of DataUnits.
Parameters
----------
type: `DatasetType`
The `DatasetType` for this `Dataset`.
units: `dict`
Dictionary where the keys are `DataUnit` names and the values are `DataUnit` instances.
Dictionary where the keys are `DataUnit` names and the values are
`DataUnit` instances.
"""
units = type.units.conform(units)
super().__init__(
Expand All @@ -154,21 +168,25 @@ def __init__(self, type, units):

@property
def type(self):
"""The `DatasetType` associated with the `Dataset` the `DatasetRef` points to.
"""The `DatasetType` associated with the `Dataset` the `DatasetRef`
points to.
"""
return self._type

@property
def units(self):
"""A `tuple` of `DataUnit` instances that label the `DatasetRef` within a Collection.
"""A `tuple` of `DataUnit` instances that label the `DatasetRef`
within a Collection.
"""
return self._units

@property
def producer(self):
"""The `Quantum` instance that produced (or will produce) the `Dataset`.
"""The `Quantum` instance that produced (or will produce) the
`Dataset`.
Read-only; update via `Registry.addDataset()`, `QuantumGraph.addDataset()`, or `Butler.put()`.
Read-only; update via `Registry.addDataset()`,
`QuantumGraph.addDataset()`, or `Butler.put()`.
May be `None` if no provenance information is available.
"""
return self._producer
Expand All @@ -181,20 +199,23 @@ def predictedConsumers(self):
May be an empty list if no provenance information is available.
"""
return _safeMakeMappingProxyType(self._predictedConsumers)

@property
def actualConsumers(self):
"""A sequence of `Quantum` instances that list this `Dataset` in their `actualInputs` attributes.
"""A sequence of `Quantum` instances that list this `Dataset` in their
`actualInputs` attributes.
Read-only; update via `Registry.markInputUsed()`.
May be an empty list if no provenance information is available.
"""
return _safeMakeMappingProxyType(self._actualConsumers)

def makeStorageHint(self, run, template=None):
"""Construct a storage hint by filling in template with the Collection collection and the values in the units tuple.
"""Construct a storage hint by filling in template with the Collection
collection and the values in the units tuple.
Although a `Dataset` may belong to multiple Collections, only the one corresponding to its `Run` is used.
Although a `Dataset` may belong to multiple Collections, only the one
corresponding to its `Run` is used.
"""
if template is None:
template = self.type.template
Expand All @@ -205,9 +226,11 @@ def makeStorageHint(self, run, template=None):
class DatasetHandle(DatasetRef):
"""Handle to a stored `Dataset` in a `Registry`.
As opposed to a `DatasetLabel`, and like a `DatasetRef`, `DatasetHandle` holds actual `DataUnit` instances
As opposed to a `DatasetLabel`, and like a `DatasetRef`, `DatasetHandle`
holds actual `DataUnit` instances
(instead of just their names and primary-key values).
In contrast to `DatasetRef`s a `DatasetHandle` only ever points to a `Dataset` that has been stored in a `Datastore`.
In contrast to `DatasetRef`s a `DatasetHandle` only ever points to a
`Dataset` that has been stored in a `Datastore`.
"""

__slots__ = ("_datasetId", "_registryId", "_uri", "_components", "_run")
Expand Down Expand Up @@ -247,7 +270,8 @@ def uri(self):

@property
def components(self):
"""A `dict` holding `DatasetHandle` instances that correspond to this `Dataset`s named components.
"""A `dict` holding `DatasetHandle` instances that correspond to this
`Dataset`s named components.
Empty if the `Dataset` is not a composite.
"""
Expand Down
37 changes: 22 additions & 15 deletions python/lsst/daf/butler/core/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,24 @@
# see <https://www.lsstcorp.org/LegalNotices/>.
#

import yaml

from lsst.daf.persistence import doImport

from abc import ABCMeta, abstractmethod, abstractproperty
from abc import ABCMeta, abstractmethod
from .config import Config


class DatastoreConfig(Config):
pass


class Datastore(metaclass=ABCMeta):
"""Datastore interface.
"""
@staticmethod
def fromConfig(config):
cls = doImport(config['datastore.cls'])
return cls(config=config)

def __init__(self, config):
"""Constructor
Expand All @@ -56,11 +56,13 @@ def get(self, uri, storageClass, parameters=None):
Parameters
----------
uri : `str`
a Universal Resource Identifier that specifies the location of the stored `Dataset`.
a Universal Resource Identifier that specifies the location of the
stored `Dataset`.
storageClass : `StorageClass`
the `StorageClass` associated with the `DatasetType`.
parameters : `dict`
`StorageClass`-specific parameters that specify a slice of the `Dataset` to be loaded.
`StorageClass`-specific parameters that specify a slice of the
`Dataset` to be loaded.
Returns
-------
Expand All @@ -82,12 +84,12 @@ def put(self, inMemoryDataset, storageClass, storageHint, typeName=None):
storageHint : `str`
Provides a hint that the `Datastore` may use as (part of) the URI.
typeName : `str`
The `DatasetType` name, which may be used by this `Datastore` to override the
default serialization format for the `StorageClass`.
The `DatasetType` name, which may be used by this `Datastore` to
override the default serialization format for the `StorageClass`.
Returns
-------
uri : `str`
uri : `str`
The `URI` where the primary `Dataset` is stored.
components : `dict`, optional
A dictionary of URIs for the `Dataset`' components.
Expand All @@ -102,11 +104,13 @@ def remove(self, uri):
Parameters
----------
uri : `str`
a Universal Resource Identifier that specifies the location of the stored `Dataset`.
a Universal Resource Identifier that specifies the location of the
stored `Dataset`.
.. note::
Some Datastores may implement this method as a silent no-op to disable `Dataset` deletion through standard interfaces.
Some Datastores may implement this method as a silent no-op to
disable `Dataset` deletion through standard interfaces.
Raises
------
e : `FileNotFoundError`
Expand All @@ -128,13 +132,16 @@ def transfer(self, inputDatastore, inputUri, storageClass, storageHint, typeName
storageClass : `StorageClass`
The `StorageClass` associated with the `DatasetType`.
storageHint : `str`
Provides a hint that this `Datastore` may use as [part of] the `URI`.
Provides a hint that this `Datastore` may use as [part of] the
`URI`.
typeName : `str`
The `DatasetType` name, which may be used by this `Datastore` to override the default serialization format for the `StorageClass`.
The `DatasetType` name, which may be used by this `Datastore`
to override the default serialization format for the
`StorageClass`.
Returns
-------
uri : `str`
uri : `str`
The `URI` where the primary `Dataset` is stored.
components : `dict`, optional
A dictionary of URIs for the `Dataset`' components.
Expand Down

0 comments on commit 012c14e

Please sign in to comment.