Skip to content

Commit

Permalink
Merge pull request #20 from lsst/tickets/DM-12627
Browse files Browse the repository at this point in the history
DM-12627: Implement Butler DatasetType
  • Loading branch information
Pim Schellart committed Mar 16, 2018
2 parents a10afbb + 2d5f7d3 commit e9143ed
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 24 deletions.
42 changes: 20 additions & 22 deletions python/lsst/daf/butler/core/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from types import MappingProxyType
from .utils import slotValuesAreEqual, slotValuesToHash
from .units import DataUnitSet

__all__ = ("DatasetType", "DatasetRef")

Expand All @@ -43,10 +42,20 @@ class DatasetType(object):
may be added.
`DatasetType` instances are immutable.
All arguments correspond directly to instance attributes.
Parameters
----------
name : `str`
A string name for the `Dataset`; must correspond to the same
`DatasetType` across all Registries.
dataUnits : `iterable` of `str`
`DataUnit` names that defines the `DatasetRef`\ s corresponding to
this `DatasetType`. The input iterable is copied into a `frozenset`.
storageClass : `str`
Name of a `StorageClass` that defines how this `DatasetType`
is persisted.
"""

__slots__ = ("_name", "_template", "_units", "_storageClass")
__slots__ = ("_name", "_dataUnits", "_storageClass")
__eq__ = slotValuesAreEqual
__hash__ = slotValuesToHash

Expand All @@ -58,33 +67,22 @@ def name(self):
return self._name

@property
def template(self):
"""A string with `str`.format-style replacement patterns that can be
used to create a path from a `Run`
(and optionally its associated Collection) and a `DatasetRef`.
May be `None` to indicate a read-only `Dataset` or one whose templates
must be provided at a higher level.
def dataUnits(self):
"""A `frozenset` of `DataUnit` names that defines the `DatasetRef`\ s
corresponding to this `DatasetType`.
"""
return self._template

@property
def units(self):
"""A `DataUnitSet` that defines the `DatasetRef`\ s corresponding
to this `DatasetType`.
"""
return self._units
return self._dataUnits

@property
def storageClass(self):
"""A `StorageClass` that defines how this `DatasetType` is persisted.
"""Name of a `StorageClass` that defines how this `DatasetType`
is persisted.
"""
return self._storageClass

def __init__(self, name, template, units, storageClass):
def __init__(self, name, dataUnits, storageClass):
self._name = name
self._template = template
self._units = DataUnitSet(units)
self._dataUnits = frozenset(dataUnits)
self._storageClass = storageClass


Expand Down
48 changes: 46 additions & 2 deletions python/lsst/daf/butler/registries/sqlRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from sqlalchemy import create_engine
from sqlalchemy.sql import select

from ..core.datasets import DatasetType
from ..core.registry import RegistryConfig, Registry
from ..core.schema import Schema

Expand All @@ -39,13 +41,24 @@ class SqlRegistry(Registry):
config : `SqlRegistryConfig` or `str`
Load configuration
"""

def __init__(self, config):
super().__init__(config)

self.config = SqlRegistryConfig(config)
self._schema = Schema(self.config['schema'])
self._engine = create_engine(self.config['db'])
self._schema.metadata.create_all(self._engine)
self._datasetTypes = {}

def _isValidDatasetType(self, datasetType):
"""Check if given `DatasetType` instance is valid for this `Registry`.
.. todo::
Insert checks for `storageClass`, `dataUnits` and `template`.
"""
return isinstance(datasetType, DatasetType)

def registerDatasetType(self, datasetType):
"""
Expand All @@ -56,7 +69,20 @@ def registerDatasetType(self, datasetType):
datasetType : `DatasetType`
The `DatasetType` to be added.
"""
raise NotImplementedError("Must be implemented by subclass")
if not self._isValidDatasetType(datasetType):
raise ValueError("DatasetType is not valid for this registry")
if datasetType.name in self._datasetTypes:
raise KeyError("DatasetType: {} already registered".format(datasetType.name))
datasetTypeTable = self._schema.metadata.tables['DatasetType']
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
with self._engine.begin() as connection:
connection.execute(datasetTypeTable.insert().values(dataset_type_name=datasetType.name,
storage_class=datasetType.storageClass))
if datasetType.dataUnits:
connection.execute(datasetTypeUnitsTable.insert(),
[{'dataset_type_name': datasetType.name, 'unit_name': dataUnitName}
for dataUnitName in datasetType.dataUnits])
self._datasetTypes[datasetType.name] = datasetType

def getDatasetType(self, name):
"""Get the `DatasetType`.
Expand All @@ -71,7 +97,25 @@ def getDatasetType(self, name):
type : `DatasetType`
The `DatasetType` associated with the given name.
"""
raise NotImplementedError("Must be implemented by subclass")
datasetType = None
if name in self._datasetTypes:
datasetType = self._datasetTypes[name]
else:
datasetTypeTable = self._schema.metadata.tables['DatasetType']
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
with self._engine.begin() as connection:
# Get StorageClass from DatasetType table
result = connection.execute(select([datasetTypeTable.c.storage_class]).where(
datasetTypeTable.c.dataset_type_name == name)).fetchone()
storageClass = result['storage_class']
# Get DataUnits (if any) from DatasetTypeUnits table
result = connection.execute(select([datasetTypeUnitsTable.c.unit_name]).where(
datasetTypeUnitsTable.c.dataset_type_name == name)).fetchall()
dataUnits = (r[0] for r in result) if result else ()
datasetType = DatasetType(name=name,
storageClass=storageClass,
dataUnits=dataUnits)
return datasetType

def addDataset(self, ref, uri, components, run, producer=None):
"""Add a `Dataset` to a Collection.
Expand Down
93 changes: 93 additions & 0 deletions tests/test_datasetType.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest

import lsst.utils.tests

from lsst.daf.butler.core.datasets import DatasetType

"""Tests for DatasetType.
"""


class DatasetTypeTestCase(lsst.utils.tests.TestCase):
"""Test for DatasetType.
"""
def testConstructor(self):
"""Test construction preserves values.
Note that construction doesn't check for valid storageClass or
dataUnits parameters.
These can only be verified for a particular schema.
"""
datasetTypeName = "test"
storageClass = "StructuredData"
dataUnits = frozenset(("camera", "visit"))
datasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
self.assertEqual(datasetType.name, datasetTypeName)
self.assertEqual(datasetType.storageClass, storageClass)
self.assertEqual(datasetType.dataUnits, dataUnits)

def testEquality(self):
self.assertEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageA", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("b", "StorageA", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageB", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageA", ("UnitB", )))

def testHashability(self):
"""Test `DatasetType.__hash__`.
This test is performed by checking that `DatasetType` entries can
be inserted into a `set` and that unique values of its
(`name`, `storageClass`, `dataUnits`) parameters result in separate
entries (and equal ones don't).
This does not check for uniformity of hashing or the actual values
of the hash function.
"""
types = []
unique = 0
for name in ["a", "b"]:
for storageClass in ["c", "d"]:
for dataUnits in [("e", ), ("f", )]:
datasetType = DatasetType(name, storageClass, dataUnits)
datasetTypeCopy = DatasetType(name, storageClass, dataUnits)
types.extend((datasetType, datasetTypeCopy))
unique += 1 # datasetType should always equal its copy
self.assertEqual(len(set(types)), unique) # all other combinations are unique


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass


def setup_module(module):
lsst.utils.tests.init()


if __name__ == "__main__":
lsst.utils.tests.init()
unittest.main()
25 changes: 25 additions & 0 deletions tests/test_sqlRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import lsst.utils.tests

from lsst.daf.butler.core.datasets import DatasetType
from lsst.daf.butler.core.registry import Registry
from lsst.daf.butler.registries.sqlRegistry import SqlRegistry

Expand All @@ -42,6 +43,30 @@ def testInitFromConfig(self):
registry = Registry.fromConfig(self.configFile)
self.assertIsInstance(registry, SqlRegistry)

def testDatasetType(self):
registry = Registry.fromConfig(self.configFile)
# Check valid insert
datasetTypeName = "test"
storageClass = "StructuredData"
dataUnits = ("camera", "visit")
inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
registry.registerDatasetType(inDatasetType)
outDatasetType = registry.getDatasetType(datasetTypeName)
self.assertEqual(outDatasetType, inDatasetType)

# Re-inserting should fail
with self.assertRaises(KeyError):
registry.registerDatasetType(inDatasetType)

# Template can be None
datasetTypeName = "testNoneTemplate"
storageClass = "StructuredData"
dataUnits = ("camera", "visit")
inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
registry.registerDatasetType(inDatasetType)
outDatasetType = registry.getDatasetType(datasetTypeName)
self.assertEqual(outDatasetType, inDatasetType)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass
Expand Down

0 comments on commit e9143ed

Please sign in to comment.