Skip to content

Commit

Permalink
Add ability to specify field lengths for DatabaseDict and use that
Browse files Browse the repository at this point in the history
PosixDatastore now defines the lengths for PosixDatastoreRecords
  • Loading branch information
timj committed Mar 13, 2019
1 parent e4f20a7 commit 64bf7fe
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 33 deletions.
41 changes: 25 additions & 16 deletions python/lsst/daf/butler/core/databaseDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,20 @@ class DatabaseDict(MutableMapping):
The name of the field to be used as the dictionary key. Must not be
present in ``value._fields``.
value : `type`
The type used for the dictionary's values, typically a `namedtuple`.
Must have a ``_fields`` class attribute that is a tuple of field names
(i.e. as defined by `namedtuple`); these field names must also appear
in the ``types`` arg, and a `_make` attribute to construct it from a
sequence of values (again, as defined by `namedtuple`).
The type used for the dictionary's values, typically a
`namedtuple`. Must have a ``_fields`` class attribute that is a
tuple of field names (i.e., as defined by
`~collections.namedtuple`); these field names must also appear
in the ``types`` arg, and a ``_make`` attribute to construct it
from a sequence of values (again, as defined by
`~collections.namedtuple`).
lengths : `dict`, optional
Specific lengths of string fields. Defaults will be used if not
specified.
"""

@staticmethod
def fromConfig(config, types, key, value, registry=None):
def fromConfig(config, types, key, value, lengths=None, registry=None):
"""Create a `DatabaseDict` subclass instance from `config`.
If ``config`` contains a class ``cls`` key, this will be assumed to
Expand All @@ -72,19 +77,23 @@ def fromConfig(config, types, key, value, registry=None):
config : `Config`
Configuration used to identify and construct a subclass.
types : `dict`
A dictionary mapping `str` field names to type objects, containing
all fields to be held in the database.
A dictionary mapping `str` field names to Python type objects,
containing all fields to be held in the database.
key : `str`
The name of the field to be used as the dictionary key. Must not
be present in ``value._fields``.
value : `type`
The type used for the dictionary's values, typically a
`namedtuple`. Must have a ``_fields`` class attribute that is a
tuple of field names (i.e. as defined by `namedtuple`); these
field names must also appear in the ``types`` arg, and a `_make`
attribute to construct it from a sequence of values (again, as
defined by `namedtuple`).
registry : `Registry`
tuple of field names (i.e., as defined by
`~collections.namedtuple`); these field names must also appear
in the ``types`` arg, and a ``_make`` attribute to construct it
from a sequence of values (again, as defined by
`~collections.namedtuple`).
lengths : `dict`, optional
Specific lengths of string fields. Defaults will be used if not
specified.
registry : `Registry`, optional
A registry instance from which a `DatabaseDict` subclass can be
obtained. Ignored if ``config["cls"]`` exists; may be None if
it does.
Expand All @@ -96,14 +105,14 @@ def fromConfig(config, types, key, value, registry=None):
"""
if "cls" in config:
cls = doImport(config["cls"])
return cls(config=config, types=types, key=key, value=value)
return cls(config=config, types=types, key=key, value=value, lengths=lengths)
else:
table = config["table"]
if registry is None:
raise ValueError("Either config['cls'] or registry must be provided.")
return registry.makeDatabaseDict(table, types=types, key=key, value=value)
return registry.makeDatabaseDict(table, types=types, key=key, value=value, lengths=lengths)

def __init__(self, config, types, key, value):
def __init__(self, config, types, key, value, lengths=None):
# This constructor is currently defined just to clearly document the
# interface subclasses should conform to.
pass
16 changes: 12 additions & 4 deletions python/lsst/daf/butler/core/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,14 @@ def pixelization(self):
return self._pixelization

@abstractmethod
def makeDatabaseDict(self, table, types, key, value):
"""Construct a DatabaseDict backed by a table in the same database as
def makeDatabaseDict(self, table, types, key, value, lengths=None):
"""Construct a `DatabaseDict` backed by a table in the same database as
this Registry.
Parameters
----------
table : `table`
Name of the table that backs the returned DatabaseDict. If this
Name of the table that backs the returned `DatabaseDict`. If this
table already exists, its schema must include at least everything
in `types`.
types : `dict`
Expand All @@ -257,9 +257,17 @@ def makeDatabaseDict(self, table, types, key, value):
`~collections.namedtuple`. Must have a ``_fields`` class
attribute that is a tuple of field names (i.e. as defined by
`~collections.namedtuple`); these field names must also appear
in the ``types`` arg, and a `_make` attribute to construct it
in the ``types`` arg, and a ``_make`` attribute to construct it
from a sequence of values (again, as defined by
`~collections.namedtuple`).
lengths : `dict`, optional
Specific lengths of string fields. Defaults will be used if not
specified.
Returns
-------
databaseDict : `DatabaseDict`
`DatabaseDict` backed by this registry.
"""
raise NotImplementedError("Must be implemented by subclass")

Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/datastores/posixDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,11 @@ def __init__(self, config, registry):
# Storage of paths and formatters, keyed by dataset_id
types = {"path": str, "formatter": str, "storage_class": str,
"file_size": int, "checksum": str, "dataset_id": int}
lengths = {"path": 256, "formatter": 128, "storage_class": 64,
"checksum": 128}
self.records = DatabaseDict.fromConfig(self.config["records"], types=types,
value=self.RecordTuple, key="dataset_id",
registry=registry)
lengths=lengths, registry=registry)

def __str__(self):
return self.root
Expand Down
15 changes: 12 additions & 3 deletions python/lsst/daf/butler/registries/sqlRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _isValidDatasetType(self, datasetType):
"""
return isinstance(datasetType, DatasetType)

def makeDatabaseDict(self, table, types, key, value):
def makeDatabaseDict(self, table, types, key, value, lengths=None):
"""Construct a DatabaseDict backed by a table in the same database as
this Registry.
Expand All @@ -179,9 +179,17 @@ def makeDatabaseDict(self, table, types, key, value):
`~collections.namedtuple`. Must have a ``_fields`` class
attribute that is a tuple of field names (i.e. as defined by
`~collections.namedtuple`); these field names must also appear
in the ``types`` arg, and a `_make` attribute to construct it
in the ``types`` arg, and a ``_make`` attribute to construct it
from a sequence of values (again, as defined by
`~collections.namedtuple`).
lengths : `dict`, optional
Specific lengths of string fields. Defaults will be used if not
specified.
Returns
-------
databaseDict : `DatabaseDict`
`DatabaseDict` backed by this registry.
"""
# We need to construct a temporary config for the table value because
# SqlRegistryDatabaseDict.__init__ is required to take a config so it
Expand All @@ -192,7 +200,8 @@ def makeDatabaseDict(self, table, types, key, value):
# really need.
config = Config()
config["table"] = table
return SqlRegistryDatabaseDict(config, types=types, key=key, value=value, registry=self)
return SqlRegistryDatabaseDict(config, types=types, key=key, value=value, lengths=lengths,
registry=self)

def _makeDatasetRefFromRow(self, row, datasetType=None, dataId=None):
"""Construct a DatasetRef from the result of a query on the Dataset
Expand Down
25 changes: 17 additions & 8 deletions python/lsst/daf/butler/registries/sqlRegistryDatabaseDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,33 @@ class SqlRegistryDatabaseDict(DatabaseDict):
key : `str`
The name of the field to be used as the dictionary key. Must not be
present in ``value._fields``.
value : `type` (`namedtuple`)
The type used for the dictionary's values, typically a `namedtuple`.
Must have a ``_fields`` class attribute that is a tuple of field names
(i.e. as defined by `namedtuple`); these field names must also appear
in the ``types`` arg, and a `_make` attribute to construct it from a
sequence of values (again, as defined by `namedtuple`).
value : `type` (`collections.namedtuple`)
The type used for the dictionary's values, typically a
`~collections.namedtuple`. Must have a ``_fields`` class attribute
that is a tuple of field names (i.e., as defined by
`~collections.namedtuple`); these field names must also appear
in the ``types`` arg, and a ``_make`` attribute to construct it from a
sequence of values (again, as defined by `~collections.namedtuple`).
registry : `SqlRegistry`
A registry object with an open connection and a schema.
lengths : `dict`, optional
Specific lengths of string fields. Defaults will be used if not
specified.
"""

COLUMN_TYPES = {str: String, int: Integer, float: Float,
bool: Boolean, bytes: LargeBinary, datetime: DateTime}

def __init__(self, config, types, key, value, registry):
def __init__(self, config, types, key, value, registry, lengths=None):
self.registry = registry
allColumns = []
if lengths is None:
lengths = {}
for name, type_ in types.items():
column = Column(name, self.COLUMN_TYPES.get(type_, type_), primary_key=(name == key))
column_type = self.COLUMN_TYPES.get(type_, type_)
if issubclass(column_type, String) and name in lengths:
column_type = column_type(length=lengths[name])
column = Column(name, column_type, primary_key=(name == key))
allColumns.append(column)
if key in value._fields:
raise ValueError("DatabaseDict's key field may not be a part of the value tuple")
Expand Down
2 changes: 1 addition & 1 deletion tests/dummyRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def getDatasetLocations(self, ref):
def removeDatasetLocation(self, datastoreName, ref):
self._entries[ref.id].remove(datastoreName)

def makeDatabaseDict(self, table, types, key, value):
def makeDatabaseDict(self, table, types, key, value, lengths=None):
return dict()

@contextmanager
Expand Down

0 comments on commit 64bf7fe

Please sign in to comment.