lsst · pschella · Mar 16, 2018 · Mar 12, 2018 · Mar 16, 2018 · Mar 16, 2018
diff --git a/config/registry/default_schema.yaml b/config/registry/default_schema.yaml
@@ -517,6 +517,10 @@ tables:
       name: storage_class
       type: string
       nullable: false
+    - 
+      name: template
+      type: string
+      nullable: true
 
   DatasetTypeUnits:
     doc: >

diff --git a/python/lsst/daf/butler/core/datasets.py b/python/lsst/daf/butler/core/datasets.py
@@ -21,7 +21,6 @@
 
 from types import MappingProxyType
 from .utils import slotValuesAreEqual, slotValuesToHash
-from .units import DataUnitSet
 
 __all__ = ("DatasetType", "DatasetRef")
 
@@ -46,7 +45,7 @@ class DatasetType(object):
     All arguments correspond directly to instance attributes.
     """
 
-    __slots__ = ("_name", "_template", "_units", "_storageClass")
+    __slots__ = ("_name", "_dataUnits", "_storageClass", "_template")
     __eq__ = slotValuesAreEqual
     __hash__ = slotValuesToHash
 
@@ -57,6 +56,19 @@ def name(self):
         """
         return self._name
 
+    @property
+    def dataUnits(self):
+        """A `frozenset` of `DataUnit` names that defines the `DatasetRef`\ s
+        corresponding to this `DatasetType`.
+        """
+        return self._dataUnits
+
+    @property
+    def storageClass(self):
+        """A `StorageClass` that defines how this `DatasetType` is persisted.
+        """
+        return self._storageClass
+
     @property
     def template(self):
         """A string with `str`.format-style replacement patterns that can be
@@ -68,24 +80,11 @@ def template(self):
         """
         return self._template
 
-    @property
-    def units(self):
-        """A `DataUnitSet` that defines the `DatasetRef`\ s corresponding
-        to this `DatasetType`.
-        """
-        return self._units
-
-    @property
-    def storageClass(self):
-        """A `StorageClass` that defines how this `DatasetType` is persisted.
-        """
-        return self._storageClass
-
-    def __init__(self, name, template, units, storageClass):
+    def __init__(self, name, dataUnits, storageClass, template=None):
         self._name = name
-        self._template = template
-        self._units = DataUnitSet(units)
+        self._dataUnits = frozenset(dataUnits)
         self._storageClass = storageClass
+        self._template = template
 
 
 class DatasetRef(object):

diff --git a/python/lsst/daf/butler/registries/sqlRegistry.py b/python/lsst/daf/butler/registries/sqlRegistry.py
@@ -20,7 +20,9 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from sqlalchemy import create_engine
+from sqlalchemy.sql import select
 
+from ..core.datasets import DatasetType
 from ..core.registry import RegistryConfig, Registry
 from ..core.schema import Schema
 
@@ -39,13 +41,24 @@ class SqlRegistry(Registry):
     config : `SqlRegistryConfig` or `str`
         Load configuration
     """
+
     def __init__(self, config):
         super().__init__(config)
 
         self.config = SqlRegistryConfig(config)
         self._schema = Schema(self.config['schema'])
         self._engine = create_engine(self.config['db'])
         self._schema.metadata.create_all(self._engine)
+        self._datasetTypes = {}
+
+    def _isValidDatasetType(self, datasetType):
+        """Check if given `DatasetType` instance is valid for this `Registry`.
+
+        .. todo::
+
+            Insert checks for `storageClass`, `dataUnits` and `template`.
+        """
+        return isinstance(datasetType, DatasetType)
 
     def registerDatasetType(self, datasetType):
         """
@@ -56,7 +69,21 @@ def registerDatasetType(self, datasetType):
         datasetType : `DatasetType`
             The `DatasetType` to be added.
         """
-        raise NotImplementedError("Must be implemented by subclass")
+        if not self._isValidDatasetType(datasetType):
+            raise ValueError("DatasetType is not valid for this registry")
+        if datasetType.name in self._datasetTypes:
+            raise KeyError("DatasetType: {} already registered".format(datasetType.name))
+        datasetTypeTable = self._schema.metadata.tables['DatasetType']
+        datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
+        with self._engine.begin() as connection:
+            connection.execute(datasetTypeTable.insert().values(dataset_type_name=datasetType.name,
+                                                                storage_class=datasetType.storageClass,
+                                                                template=datasetType.template))
+            if datasetType.dataUnits:
+                connection.execute(datasetTypeUnitsTable.insert(),
+                                   [{'dataset_type_name': datasetType.name, 'unit_name': dataUnitName}
+                                    for dataUnitName in datasetType.dataUnits])
+            self._datasetTypes[datasetType.name] = datasetType
 
     def getDatasetType(self, name):
         """Get the `DatasetType`.
@@ -71,7 +98,28 @@ def getDatasetType(self, name):
         type : `DatasetType`
             The `DatasetType` associated with the given name.
         """
-        raise NotImplementedError("Must be implemented by subclass")
+        datasetType = None
+        if name in self._datasetTypes:
+            datasetType = self._datasetTypes[name]
+        else:
+            datasetTypeTable = self._schema.metadata.tables['DatasetType']
+            datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits']
+            with self._engine.begin() as connection:
+                # Get StorageClass and template from DatasetType table
+                result = connection.execute(select([datasetTypeTable.c.storage_class,
+                                                    datasetTypeTable.c.template]).where(
+                    datasetTypeTable.c.dataset_type_name == name)).fetchone()
+                storageClass = result['storage_class']
+                template = result['template']
+                # Get DataUnits (if any) from DatasetTypeUnits table
+                result = connection.execute(select([datasetTypeUnitsTable.c.unit_name]).where(
+                    datasetTypeUnitsTable.c.dataset_type_name == name)).fetchall()
+                dataUnits = (r[0] for r in result) if result else ()
+                datasetType = DatasetType(name=name,
+                                          storageClass=storageClass,
+                                          dataUnits=dataUnits,
+                                          template=template)
+        return datasetType
 
     def addDataset(self, ref, uri, components, run, producer=None):
         """Add a `Dataset` to a Collection.

diff --git a/tests/test_datasetType.py b/tests/test_datasetType.py
@@ -0,0 +1,93 @@
+# This file is part of daf_butler.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import unittest
+
+import lsst.utils.tests
+
+from lsst.daf.butler.core.datasets import DatasetType
+
+"""Tests for SqlRegistry.
+"""
+
+
+class DatasetTypeTestCase(lsst.utils.tests.TestCase):
+    """Test for DatasetType.
+    """
+    def setUp(self):
+        pass
+
+    def testConstructor(self):
+        """Test construction preserves values.
+
+        Note that construction doesn't check for valid storageClass,
+        dataUnits or template parameters.
+        These can only be verified for a particular schema.
+        """
+        datasetTypeName = "test"
+        storageClass = "StructuredData"
+        dataUnits = frozenset(("camera", "visit"))
+        template = "{datasetType}/{camera}/{visit}"
+        datasetType = DatasetType(datasetTypeName, dataUnits, storageClass, template)
+        self.assertEqual(datasetType.name, datasetTypeName)
+        self.assertEqual(datasetType.storageClass, storageClass)
+        self.assertEqual(datasetType.dataUnits, dataUnits)
+        self.assertEqual(datasetType.template, template)
+
+    def testEquality(self):
+        self.assertEqual(DatasetType("a", "StorageA", ("UnitA", )),
+                         DatasetType("a", "StorageA", ("UnitA", )))
+        self.assertEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"),
+                         DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"))
+        self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"),
+                            DatasetType("b", "StorageA", ("UnitA", ), "{UnitA}"))
+        self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"),
+                            DatasetType("a", "StorageB", ("UnitA", ), "{UnitA}"))
+        self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"),
+                            DatasetType("a", "StorageA", ("UnitB", ), "{UnitA}"))
+        self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"),
+                            DatasetType("a", "StorageA", ("UnitA", ), "{UnitB}"))
+
+    def testHashability(self):
+        types = []
+        unique = 0
+        for name in ["a", "b"]:
+            for storageClass in ["c", "d"]:
+                for dataUnits in [("e", ), ("f", )]:
+                    for template in ["g", "h"]:
+                        datasetType = DatasetType(name, storageClass, dataUnits, template)
+                        datasetTypeCopy = DatasetType(name, storageClass, dataUnits, template)
+                        types.extend((datasetType, datasetTypeCopy))
+                        unique += 1
+        self.assertEqual(len(set(types)), unique)
+
+
+class MemoryTester(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_sqlRegistry.py b/tests/test_sqlRegistry.py
@@ -24,6 +24,7 @@
 
 import lsst.utils.tests
 
+from lsst.daf.butler.core.datasets import DatasetType
 from lsst.daf.butler.core.registry import Registry
 from lsst.daf.butler.registries.sqlRegistry import SqlRegistry
 
@@ -42,6 +43,32 @@ def testInitFromConfig(self):
         registry = Registry.fromConfig(self.configFile)
         self.assertIsInstance(registry, SqlRegistry)
 
+    def testDatasetType(self):
+        registry = Registry.fromConfig(self.configFile)
+        # Check valid insert
+        datasetTypeName = "test"
+        storageClass = "StructuredData"
+        dataUnits = ("camera", "visit")
+        template = "{datasetType}/{camera}/{visit}"
+        inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass)
+        registry.registerDatasetType(inDatasetType)
+        outDatasetType = registry.getDatasetType(datasetTypeName)
+        self.assertEqual(outDatasetType, inDatasetType)
+
+        # Re-inserting should fail
+        with self.assertRaises(KeyError):
+            registry.registerDatasetType(inDatasetType)
+
+        # Template can be None
+        datasetTypeName = "testNoneTemplate"
+        storageClass = "StructuredData"
+        dataUnits = ("camera", "visit")
+        template = None
+        inDatasetType = DatasetType(datasetTypeName, dataUnits, storageClass, template)
+        registry.registerDatasetType(inDatasetType)
+        outDatasetType = registry.getDatasetType(datasetTypeName)
+        self.assertEqual(outDatasetType, inDatasetType)
+
 
 class MemoryTester(lsst.utils.tests.MemoryTestCase):
     pass