-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-12627: Implement Butler DatasetType #20
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,6 @@ | |
|
||
from types import MappingProxyType | ||
from .utils import slotValuesAreEqual, slotValuesToHash | ||
from .units import DataUnitSet | ||
|
||
__all__ = ("DatasetType", "DatasetRef") | ||
|
||
|
@@ -46,7 +45,7 @@ class DatasetType(object): | |
All arguments correspond directly to instance attributes. | ||
""" | ||
|
||
__slots__ = ("_name", "_template", "_units", "_storageClass") | ||
__slots__ = ("_name", "_dataUnits", "_storageClass", "_template") | ||
__eq__ = slotValuesAreEqual | ||
__hash__ = slotValuesToHash | ||
|
||
|
@@ -57,6 +56,19 @@ def name(self): | |
""" | ||
return self._name | ||
|
||
@property | ||
def dataUnits(self): | ||
"""A `frozenset` of `DataUnit` names that defines the `DatasetRef`\ s | ||
corresponding to this `DatasetType`. | ||
""" | ||
return self._dataUnits | ||
|
||
@property | ||
def storageClass(self): | ||
"""A `StorageClass` that defines how this `DatasetType` is persisted. | ||
""" | ||
return self._storageClass | ||
|
||
@property | ||
def template(self): | ||
"""A string with `str`.format-style replacement patterns that can be | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not related to the file name template used by datastore. Correct? (so this template is not a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is related to that template. But it is unclear (to me at the moment) if we still want it here. I'll remove it for now. |
||
|
@@ -68,24 +80,11 @@ def template(self): | |
""" | ||
return self._template | ||
|
||
@property | ||
def units(self): | ||
"""A `DataUnitSet` that defines the `DatasetRef`\ s corresponding | ||
to this `DatasetType`. | ||
""" | ||
return self._units | ||
|
||
@property | ||
def storageClass(self): | ||
"""A `StorageClass` that defines how this `DatasetType` is persisted. | ||
""" | ||
return self._storageClass | ||
|
||
def __init__(self, name, template, units, storageClass): | ||
def __init__(self, name, dataUnits, storageClass, template=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be any iterable, but it becomes a |
||
self._name = name | ||
self._template = template | ||
self._units = DataUnitSet(units) | ||
self._dataUnits = frozenset(dataUnits) | ||
self._storageClass = storageClass | ||
self._template = template | ||
|
||
|
||
class DatasetRef(object): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,9 @@ | |
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from sqlalchemy import create_engine | ||
from sqlalchemy.sql import select | ||
|
||
from ..core.datasets import DatasetType | ||
from ..core.registry import RegistryConfig, Registry | ||
from ..core.schema import Schema | ||
|
||
|
@@ -39,13 +41,24 @@ class SqlRegistry(Registry): | |
config : `SqlRegistryConfig` or `str` | ||
Load configuration | ||
""" | ||
|
||
def __init__(self, config): | ||
super().__init__(config) | ||
|
||
self.config = SqlRegistryConfig(config) | ||
self._schema = Schema(self.config['schema']) | ||
self._engine = create_engine(self.config['db']) | ||
self._schema.metadata.create_all(self._engine) | ||
self._datasetTypes = {} | ||
|
||
def _isValidDatasetType(self, datasetType): | ||
"""Check if given `DatasetType` instance is valid for this `Registry`. | ||
|
||
.. todo:: | ||
|
||
Insert checks for `storageClass`, `dataUnits` and `template`. | ||
""" | ||
return isinstance(datasetType, DatasetType) | ||
|
||
def registerDatasetType(self, datasetType): | ||
""" | ||
|
@@ -56,7 +69,21 @@ def registerDatasetType(self, datasetType): | |
datasetType : `DatasetType` | ||
The `DatasetType` to be added. | ||
""" | ||
raise NotImplementedError("Must be implemented by subclass") | ||
if not self._isValidDatasetType(datasetType): | ||
raise ValueError("DatasetType is not valid for this registry") | ||
if datasetType.name in self._datasetTypes: | ||
raise KeyError("DatasetType: {} already registered".format(datasetType.name)) | ||
datasetTypeTable = self._schema.metadata.tables['DatasetType'] | ||
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits'] | ||
with self._engine.begin() as connection: | ||
connection.execute(datasetTypeTable.insert().values(dataset_type_name=datasetType.name, | ||
storage_class=datasetType.storageClass, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a name of a |
||
template=datasetType.template)) | ||
if datasetType.dataUnits: | ||
connection.execute(datasetTypeUnitsTable.insert(), | ||
[{'dataset_type_name': datasetType.name, 'unit_name': dataUnitName} | ||
for dataUnitName in datasetType.dataUnits]) | ||
self._datasetTypes[datasetType.name] = datasetType | ||
|
||
def getDatasetType(self, name): | ||
"""Get the `DatasetType`. | ||
|
@@ -71,7 +98,28 @@ def getDatasetType(self, name): | |
type : `DatasetType` | ||
The `DatasetType` associated with the given name. | ||
""" | ||
raise NotImplementedError("Must be implemented by subclass") | ||
datasetType = None | ||
if name in self._datasetTypes: | ||
datasetType = self._datasetTypes[name] | ||
else: | ||
datasetTypeTable = self._schema.metadata.tables['DatasetType'] | ||
datasetTypeUnitsTable = self._schema.metadata.tables['DatasetTypeUnits'] | ||
with self._engine.begin() as connection: | ||
# Get StorageClass and template from DatasetType table | ||
result = connection.execute(select([datasetTypeTable.c.storage_class, | ||
datasetTypeTable.c.template]).where( | ||
datasetTypeTable.c.dataset_type_name == name)).fetchone() | ||
storageClass = result['storage_class'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll change the docstring. Should be a name (for now at least). |
||
template = result['template'] | ||
# Get DataUnits (if any) from DatasetTypeUnits table | ||
result = connection.execute(select([datasetTypeUnitsTable.c.unit_name]).where( | ||
datasetTypeUnitsTable.c.dataset_type_name == name)).fetchall() | ||
dataUnits = (r[0] for r in result) if result else () | ||
datasetType = DatasetType(name=name, | ||
storageClass=storageClass, | ||
dataUnits=dataUnits, | ||
template=template) | ||
return datasetType | ||
|
||
def addDataset(self, ref, uri, components, run, producer=None): | ||
"""Add a `Dataset` to a Collection. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import unittest | ||
|
||
import lsst.utils.tests | ||
|
||
from lsst.daf.butler.core.datasets import DatasetType | ||
|
||
"""Tests for SqlRegistry. | ||
""" | ||
|
||
|
||
class DatasetTypeTestCase(lsst.utils.tests.TestCase): | ||
"""Test for DatasetType. | ||
""" | ||
def setUp(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to define it if it doesn't do anything. |
||
pass | ||
|
||
def testConstructor(self): | ||
"""Test construction preserves values. | ||
|
||
Note that construction doesn't check for valid storageClass, | ||
dataUnits or template parameters. | ||
These can only be verified for a particular schema. | ||
""" | ||
datasetTypeName = "test" | ||
storageClass = "StructuredData" | ||
dataUnits = frozenset(("camera", "visit")) | ||
template = "{datasetType}/{camera}/{visit}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm worried that we have two distinct template systems now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should really be the same, but I'll remove it since this seems to be just a remnant of the old system. |
||
datasetType = DatasetType(datasetTypeName, dataUnits, storageClass, template) | ||
self.assertEqual(datasetType.name, datasetTypeName) | ||
self.assertEqual(datasetType.storageClass, storageClass) | ||
self.assertEqual(datasetType.dataUnits, dataUnits) | ||
self.assertEqual(datasetType.template, template) | ||
|
||
def testEquality(self): | ||
self.assertEqual(DatasetType("a", "StorageA", ("UnitA", )), | ||
DatasetType("a", "StorageA", ("UnitA", ))) | ||
self.assertEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"), | ||
DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}")) | ||
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"), | ||
DatasetType("b", "StorageA", ("UnitA", ), "{UnitA}")) | ||
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"), | ||
DatasetType("a", "StorageB", ("UnitA", ), "{UnitA}")) | ||
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"), | ||
DatasetType("a", "StorageA", ("UnitB", ), "{UnitA}")) | ||
self.assertNotEqual(DatasetType("a", "StorageA", ("UnitA", ), "{UnitA}"), | ||
DatasetType("a", "StorageA", ("UnitA", ), "{UnitB}")) | ||
|
||
def testHashability(self): | ||
types = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this test method needs a docstring to explain what is actually being tested. There's a lot going on here before the final conversion of the list to a set. |
||
unique = 0 | ||
for name in ["a", "b"]: | ||
for storageClass in ["c", "d"]: | ||
for dataUnits in [("e", ), ("f", )]: | ||
for template in ["g", "h"]: | ||
datasetType = DatasetType(name, storageClass, dataUnits, template) | ||
datasetTypeCopy = DatasetType(name, storageClass, dataUnits, template) | ||
types.extend((datasetType, datasetTypeCopy)) | ||
unique += 1 | ||
self.assertEqual(len(set(types)), unique) | ||
|
||
|
||
class MemoryTester(lsst.utils.tests.MemoryTestCase): | ||
pass | ||
|
||
|
||
def setup_module(module): | ||
lsst.utils.tests.init() | ||
|
||
|
||
if __name__ == "__main__": | ||
lsst.utils.tests.init() | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could really do with explicit Attributes and Parameters section.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought it was policy to document these in the property docstrings?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I thought that in this case there was a difference between init parameters and attributes (on phone so not checking).