Skip to content

Commit

Permalink
Implement review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Pim Schellart authored and Pim Schellart committed Apr 20, 2018
1 parent fcc8b25 commit 0f0ba99
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 41 deletions.
1 change: 1 addition & 0 deletions python/lsst/daf/butler/core/__init__.py
Expand Up @@ -21,3 +21,4 @@
from .storageClass import *
from .storageInfo import *
from .storedFileInfo import *
from .dataUnit import *
94 changes: 54 additions & 40 deletions python/lsst/daf/butler/core/dataUnit.py
Expand Up @@ -19,6 +19,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from itertools import chain

from .utils import TopologicalSet

__all__ = ("DataUnit", "DataUnitRegistry")
Expand All @@ -31,7 +33,7 @@ class DataUnit:
`DataUnit` instances represent concrete units such as e.g. `Camera`,
`Sensor`, `Visit` and `SkyMap`.
Attributes
Parameters
----------
name : `str`
Name of this `DataUnit`.
Expand All @@ -42,24 +44,13 @@ class DataUnit:
optionalDependencies : `frozenset`
Related `DataUnit` instances that may also be provided (and when they
are, they must be kept in sync).
dependencies : `frozenset`
The union of `requiredDependencies` and `optionalDependencies`.
table : `sqlalchemy.core.Table`, optional
When not ``None`` the primary table entry corresponding to this
`DataUnit`.
link : `tuple`
Names of columns that form the `DataUnit` specific part of the primary-key
in this `DataUnit` table and are also the names of the link column in
the Datasets table.
primaryKey : `tuple`
Full primary-key column name tuple. Consists of the ``link`` of this
`DataUnit` and that of all its ``requiredDependencies``.
linkColumns : `dict`
Dictionary keyed on ``link`` names with `sqlalchemy.Column` entries
into this `DataUnit` primary table as values.
primaryKeyColumns : `dict`
Dictionary keyed on ``primaryKey`` names with `sqlalchemy.Column` entries
into this `DataUnit` primary table as values.
table : `sqlalchemy.core.Table`, optional
When not ``None`` the primary table entry corresponding to this
`DataUnit`.
"""
def __init__(self, name, requiredDependencies, optionalDependencies, link=(), table=None):
self._name = name
Expand All @@ -74,45 +65,70 @@ def __repr__(self):

@property
def name(self):
"""Name of this `DataUnit` (`str`, read-only).
Also assumed to be the name of the primary table (if present)."""
return self._name

@property
def requiredDependencies(self):
"""Related `DataUnit` instances on which existence this `DataUnit`
instance depends (`frozenset`, read-only).
"""
return self._requiredDependencies

@property
def optionalDependencies(self):
"""Related `DataUnit` instances that may also be provided (and when they
are, they must be kept in sync) (`frozenset`, read-only).
"""
return self._optionalDependencies

@property
def dependencies(self):
"""The union of `requiredDependencies` and `optionalDependencies`
(`frozenset`, read-only).
"""
return self.requiredDependencies.union(self.optionalDependencies)

@property
def table(self):
if hasattr(self, '_table'):
return self._table
else:
return None
"""When not ``None`` the primary table entry corresponding to this
`DataUnit` (`sqlalchemy.core.Table`, optional).
"""
return getattr(self, '_table', None)

@property
def link(self):
"""Names of columns that form the `DataUnit` specific part of the primary-key
in this `DataUnit` table and are also the names of the link column in
the Datasets table (`tuple`).
"""
return self._link

@property
def primaryKey(self):
"""Full primary-key column name tuple. Consists of the ``link`` of this
`DataUnit` and that of all its ``requiredDependencies`` (`set`).
"""
if self._primaryKey is None:
self._primaryKey = tuple(self.link)
self._primaryKey = set(self.link)
for dependency in self.requiredDependencies:
self._primaryKey += dependency.primaryKey
self._primaryKey.update(dependency.primaryKey)
return self._primaryKey

@property
def linkColumns(self):
"""Dictionary keyed on ``link`` names with `sqlalchemy.Column` entries
into this `DataUnit` primary table as values (`dict`).
"""
return {name: self.table.columns[name] for name in self.link}

@property
def primaryKeyColumns(self):
"""Dictionary keyed on ``primaryKey`` names with `sqlalchemy.Column` entries
into this `DataUnit` primary table as values (`dict`).
"""
return {name: self.table.columns[name] for name in self.primaryKey}

def validateId(self, dataId):
Expand All @@ -128,10 +144,10 @@ def validateId(self, dataId):
ValueError
If a value for a required dependency is missing.
"""
for columnName in self.primaryKey:
if columnName not in dataId:
raise ValueError("Required key {} missing from {} for DataUnit {}".format(
columnName, dataId, self.name))
missing = self.primaryKey - set(dataId.keys())
if missing:
raise ValueError("Missing required keys: {} from {} for DataUnit {}".format(
missing, dataId, self.name))


class DataUnitRegistry:
Expand All @@ -157,12 +173,11 @@ def fromConfig(cls, config, builder=None):
config : `SchemaConfig`
`Registry` schema configuration describing `DataUnit` relations.
builder : `SchemaBuilder`, optional
When given, create `Table` entries for every `DataUnit` table.
When given, create `sqlalchemy.core.Table` entries for every `DataUnit` table.
"""
dataUnitRegistry = cls()
dataUnitRegistry.builder = builder
dataUnitRegistry._initDataUnitNames(config)
dataUnitRegistry._initDataUnits(config)
dataUnitRegistry._initDataUnits(config, builder)
return dataUnitRegistry

def __len__(self):
Expand All @@ -176,10 +191,10 @@ def __setitem__(self, dataUnitName, dataUnit):
self._dataUnits[dataUnitName] = dataUnit

def __iter__(self):
yield from self._dataUnitNames
return iter(self._dataUnitNames)

def keys(self):
yield from self._dataUnitNames
return iter(self._dataUnitNames)

def values(self):
return (self[dataUnitName] for dataUnitName in self._dataUnitNames)
Expand Down Expand Up @@ -209,13 +224,15 @@ def _initDataUnitNames(self, config):
for dependency in dependencies[category]:
self._dataUnitNames.connect(dependency, dataUnitName)

def _initDataUnits(self, config):
def _initDataUnits(self, config, builder):
"""Initialize `DataUnit` entries.
Parameters
----------
config : `SchemaConfig`
Schema configuration describing `DataUnit` relations.
builder : `SchemaBuilder`, optional
When given, create `sqlalchemy.core.Table` entries for every `DataUnit` table.
"""
# Visit DataUnits in dependency order
for dataUnitName in self._dataUnitNames:
Expand All @@ -230,21 +247,21 @@ def _initDataUnits(self, config):
requiredDependencies = (self[name] for name in dependencies['required'])
if 'optional' in dependencies:
optionalDependencies = (self[name] for name in dependencies['optional'])
if self.builder is not None:
if builder is not None:
if 'link' in dataUnitDescription:
# Link names
link = tuple((linkDescription['name'] for linkDescription in dataUnitDescription['link']))
# Link columns that will become part of the Datasets table
for linkDescription in dataUnitDescription['link']:
self.links[linkDescription['name']] = self.builder.makeColumn(linkDescription)
self.links[linkDescription['name']] = builder.makeColumn(linkDescription)
if 'tables' in dataUnitDescription:
for tableName, tableDescription in dataUnitDescription['tables'].items():
if tableName == dataUnitName:
# Primary table for this DataUnit
table = self.builder.addTable(tableName, tableDescription)
table = builder.addTable(tableName, tableDescription)
else:
# Secondary table
self.builder.addTable(tableName, tableDescription)
builder.addTable(tableName, tableDescription)
dataUnit = DataUnit(name=dataUnitName,
requiredDependencies=requiredDependencies,
optionalDependencies=optionalDependencies,
Expand All @@ -262,10 +279,7 @@ def getPrimaryKeyNames(self, dataUnitNames):
Returns
-------
primaryKeyNames : `list`
primaryKeyNames : `set`
All primary-key column names for the given ``dataUnitNames``.
"""
primaryKeyNames = []
for dataUnitName in dataUnitNames:
primaryKeyNames.extend(self[dataUnitName].primaryKey)
return primaryKeyNames
return set(chain.from_iterable(self[name].primaryKey for name in dataUnitNames))
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/core/registry.py
Expand Up @@ -69,3 +69,5 @@ def fromConfig(config):
def __init__(self, config):
assert isinstance(config, RegistryConfig)
self.config = config

# TODO Add back all interfaces (copied from SqlRegistry) once that is stabalized
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/schema.py
Expand Up @@ -20,9 +20,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .utils import iterable
from .config import Config
from sqlalchemy import Column, String, Integer, Boolean, LargeBinary, DateTime,\
Float, ForeignKey, ForeignKeyConstraint, Table, MetaData
from .config import Config
from .dataUnit import DataUnitRegistry

metadata = None # Needed to make disabled test_hsc not fail on import
Expand Down

0 comments on commit 0f0ba99

Please sign in to comment.