Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-12627: Implement Butler DatasetType #20

Merged
merged 4 commits into from
Mar 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 20 additions & 22 deletions python/lsst/daf/butler/core/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

from types import MappingProxyType
from .utils import slotValuesAreEqual, slotValuesToHash
from .units import DataUnitSet

__all__ = ("DatasetType", "DatasetRef")

Expand All @@ -43,10 +42,20 @@ class DatasetType(object):
may be added.
`DatasetType` instances are immutable.

All arguments correspond directly to instance attributes.
Parameters
----------
name : `str`
A string name for the `Dataset`; must correspond to the same
`DatasetType` across all Registries.
dataUnits : `iterable` of `str`
`DataUnit` names that defines the `DatasetRef`\ s corresponding to
this `DatasetType`. The input iterable is copied into a `frozenset`.
storageClass : `str`
Name of a `StorageClass` that defines how this `DatasetType`
is persisted.
"""

__slots__ = ("_name", "_template", "_units", "_storageClass")
__slots__ = ("_name", "_dataUnits", "_storageClass")
__eq__ = slotValuesAreEqual
__hash__ = slotValuesToHash

Expand All @@ -58,33 +67,22 @@ def name(self):
return self._name

@property
def template(self):
"""A string with `str`.format-style replacement patterns that can be
used to create a path from a `Run`
(and optionally its associated Collection) and a `DatasetRef`.

May be `None` to indicate a read-only `Dataset` or one whose templates
must be provided at a higher level.
def dataUnits(self):
"""A `frozenset` of `DataUnit` names that defines the `DatasetRef`\ s
corresponding to this `DatasetType`.
"""
return self._template

@property
def units(self):
"""A `DataUnitSet` that defines the `DatasetRef`\ s corresponding
to this `DatasetType`.
"""
return self._units
return self._dataUnits

@property
def storageClass(self):
"""A `StorageClass` that defines how this `DatasetType` is persisted.
"""Name of a `StorageClass` that defines how this `DatasetType`
is persisted.
"""
return self._storageClass

def __init__(self, name, template, units, storageClass):
def __init__(self, name, dataUnits, storageClass):
self._name = name
self._template = template
self._units = DataUnitSet(units)
self._dataUnits = frozenset(dataUnits)
self._storageClass = storageClass


Expand Down
48 changes: 46 additions & 2 deletions python/lsst/daf/butler/registries/sqlRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from sqlalchemy import create_engine
from sqlalchemy.sql import select

from ..core.datasets import DatasetType
from ..core.registry import RegistryConfig, Registry
from ..core.schema import Schema

Expand All @@ -39,13 +41,24 @@ class SqlRegistry(Registry):
config : `SqlRegistryConfig` or `str`
Load configuration
"""

def __init__(self, config):
super().__init__(config)

self.config = SqlRegistryConfig(config)
self._schema = Schema(self.config['schema'])
self._engine = create_engine(self.config['db'])
self._schema.metadata.create_all(self._engine)
self._datasetTypes = {}

def _isValidDatasetType(self, datasetType):
"""Check if given `DatasetType` instance is valid for this `Registry`.

.. todo::

Insert checks for `storageClass`, `dataUnits` and `template`.
"""
return isinstance(datasetType, DatasetType)

def registerDatasetType(self, datasetType):
"""
Expand All @@ -56,7 +69,20 @@ def registerDatasetType(self, datasetType):
datasetType : `DatasetType`
The `DatasetType` to be added.
"""
raise NotImplementedError("Must be implemented by subclass")
if not self._isValidDatasetType(datasetType):
raise ValueError("DatasetType is not valid for this registry")
if datasetType.name in self._datasetTypes:
raise KeyError("DatasetType: {} already registered".format(datasetType.name))
datasetTypeTable = self._schema.metadata.tables['DatasetType']
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
with self._engine.begin() as connection:
connection.execute(datasetTypeTable.insert().values(dataset_type_name=datasetType.name,
storage_class=datasetType.storageClass))
if datasetType.dataUnits:
connection.execute(datasetTypeUnitsTable.insert(),
[{'dataset_type_name': datasetType.name, 'unit_name': dataUnitName}
for dataUnitName in datasetType.dataUnits])
self._datasetTypes[datasetType.name] = datasetType

def getDatasetType(self, name):
"""Get the `DatasetType`.
Expand All @@ -71,7 +97,25 @@ def getDatasetType(self, name):
type : `DatasetType`
The `DatasetType` associated with the given name.
"""
raise NotImplementedError("Must be implemented by subclass")
datasetType = None
if name in self._datasetTypes:
datasetType = self._datasetTypes[name]
else:
datasetTypeTable = self._schema.metadata.tables['DatasetType']
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
with self._engine.begin() as connection:
# Get StorageClass from DatasetType table
result = connection.execute(select([datasetTypeTable.c.storage_class]).where(
datasetTypeTable.c.dataset_type_name == name)).fetchone()
storageClass = result['storage_class']
# Get DataUnits (if any) from DatasetTypeUnits table
result = connection.execute(select([datasetTypeUnitsTable.c.unit_name]).where(
datasetTypeUnitsTable.c.dataset_type_name == name)).fetchall()
dataUnits = (r[0] for r in result) if result else ()
datasetType = DatasetType(name=name,
storageClass=storageClass,
dataUnits=dataUnits)
return datasetType

def addDataset(self, ref, uri, components, run, producer=None):
"""Add a `Dataset` to a Collection.
Expand Down
93 changes: 93 additions & 0 deletions tests/test_datasetType.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest

import lsst.utils.tests

from lsst.daf.butler.core.datasets import DatasetType

"""Tests for DatasetType.
"""


class DatasetTypeTestCase(lsst.utils.tests.TestCase):
"""Test for DatasetType.
"""
def testConstructor(self):
"""Test construction preserves values.

Note that construction doesn't check for valid storageClass or
dataUnits parameters.
These can only be verified for a particular schema.
"""
datasetTypeName = "test"
storageClass = "StructuredData"
dataUnits = frozenset(("camera", "visit"))
datasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
self.assertEqual(datasetType.name, datasetTypeName)
self.assertEqual(datasetType.storageClass, storageClass)
self.assertEqual(datasetType.dataUnits, dataUnits)

def testEquality(self):
self.assertEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageA", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("b", "StorageA", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageB", ("UnitA", )))
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", )),
DatasetType("a", "StorageA", ("UnitB", )))

def testHashability(self):
"""Test `DatasetType.__hash__`.

This test is performed by checking that `DatasetType` entries can
be inserted into a `set` and that unique values of its
(`name`, `storageClass`, `dataUnits`) parameters result in separate
entries (and equal ones don't).

This does not check for uniformity of hashing or the actual values
of the hash function.
"""
types = []
unique = 0
for name in ["a", "b"]:
for storageClass in ["c", "d"]:
for dataUnits in [("e", ), ("f", )]:
datasetType = DatasetType(name, storageClass, dataUnits)
datasetTypeCopy = DatasetType(name, storageClass, dataUnits)
types.extend((datasetType, datasetTypeCopy))
unique += 1 # datasetType should always equal its copy
self.assertEqual(len(set(types)), unique) # all other combinations are unique


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass


def setup_module(module):
lsst.utils.tests.init()


if __name__ == "__main__":
lsst.utils.tests.init()
unittest.main()
25 changes: 25 additions & 0 deletions tests/test_sqlRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import lsst.utils.tests

from lsst.daf.butler.core.datasets import DatasetType
from lsst.daf.butler.core.registry import Registry
from lsst.daf.butler.registries.sqlRegistry import SqlRegistry

Expand All @@ -42,6 +43,30 @@ def testInitFromConfig(self):
registry = Registry.fromConfig(self.configFile)
self.assertIsInstance(registry, SqlRegistry)

def testDatasetType(self):
registry = Registry.fromConfig(self.configFile)
# Check valid insert
datasetTypeName = "test"
storageClass = "StructuredData"
dataUnits = ("camera", "visit")
inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
registry.registerDatasetType(inDatasetType)
outDatasetType = registry.getDatasetType(datasetTypeName)
self.assertEqual(outDatasetType, inDatasetType)

# Re-inserting should fail
with self.assertRaises(KeyError):
registry.registerDatasetType(inDatasetType)

# Template can be None
datasetTypeName = "testNoneTemplate"
storageClass = "StructuredData"
dataUnits = ("camera", "visit")
inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
registry.registerDatasetType(inDatasetType)
outDatasetType = registry.getDatasetType(datasetTypeName)
self.assertEqual(outDatasetType, inDatasetType)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass
Expand Down