Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-13349: Define storage classes in config yaml #11

Merged
merged 17 commits into from
Feb 1, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*.pyc
version.py
_build.*
.cache
tests/.tests
tests/test_input_datastore/
Expand All @@ -8,3 +9,4 @@ tests/test_output_datastore/
.sconsign.dblite
config.log
ups/*.cfgc
pytest_session.txt
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# see <https://www.lsstcorp.org/LegalNotices/>.
#

from lsst.daf.persistence import doImport
from lsst.daf.butler.core.utils import doImport

from abc import ABCMeta, abstractmethod
from .config import Config
Expand Down
50 changes: 7 additions & 43 deletions python/lsst/daf/butler/core/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from abc import ABCMeta, abstractmethod

from lsst.daf.persistence import doImport
from .mappingFactory import MappingFactory


class Formatter(object, metaclass=ABCMeta):
Expand Down Expand Up @@ -70,14 +70,10 @@ def write(self, inMemoryDataset, fileDescriptor):
raise NotImplementedError("Type does not support writing")


class FormatterFactory(object):
class FormatterFactory(MappingFactory):
"""Factory for `Formatter` instances.
"""

def __init__(self):
"""Constructor.
"""
self._registry = {}
refType = Formatter

def getFormatter(self, storageClass, datasetType=None):
"""Get a new formatter instance.
Expand All @@ -86,24 +82,18 @@ def getFormatter(self, storageClass, datasetType=None):
----------
storageClass : `StorageClass`
Get `Formatter` associated with this `StorageClass`, unless.
datasetType : `DatasetType` or `str, optional
datasetType : `DatasetType` or `str`, optional
If given, look if an override has been specified for this `DatasetType` and,
if so return that instead.
"""
if datasetType:
try:
typeName = self._registry[self._getName(datasetType)]()
except KeyError:
pass
typeName = self._registry[self._getName(storageClass)]
return self._getInstanceOf(typeName)
return self.getFromRegistry(storageClass, override=datasetType)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can switch this to using composition if you prefer since the get and put methods do not have the same names as the base class implementation

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that way we can also get rid of my other gripe (having refType specified) and move that to an argument (e.g. self.mappingFactory = MappingFactory(Formatter)).


def registerFormatter(self, type_, formatter):
"""Register a Formatter.

Parameters
----------
type : `str` or `StorageClass` or `DatasetType`
type_ : `str` or `StorageClass` or `DatasetType`
Type for which this formatter is to be used.
formatter : `str`
Identifies a `Formatter` subclass to use for reading and writing `Dataset`s of this type.
Expand All @@ -113,30 +103,4 @@ def registerFormatter(self, type_, formatter):
e : `ValueError`
If formatter does not name a valid formatter type.
"""
if not self._isValidFormatterStr(formatter):
raise ValueError("Not a valid Formatter: {0}".format(formatter))
self._registry[self._getName(type_)] = formatter

@staticmethod
def _getName(typeOrName):
"""Extract name of `DatasetType` or `StorageClass` as string.
"""
if isinstance(typeOrName, str):
return typeOrName
elif hasattr(typeOrName, 'name'):
return typeOrName.name
else:
raise ValueError("Cannot extract name from type")

@staticmethod
def _getInstanceOf(typeName):
cls = doImport(typeName)
return cls()

@staticmethod
def _isValidFormatterStr(formatter):
try:
f = FormatterFactory._getInstanceOf(formatter)
return isinstance(f, Formatter)
except ImportError:
return False
self.placeInRegistry(type_, formatter)
144 changes: 144 additions & 0 deletions python/lsst/daf/butler/core/mappingFactory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#
# LSST Data Management System
#
# Copyright 2008-2018 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <https://www.lsstcorp.org/LegalNotices/>.
#
from lsst.daf.butler.core.utils import doImport


class MappingFactory:
"""
Register the mapping of some key to a python type and retrieve instances.

Enables instances of these classes to be retrieved from the factory later.
The class can be specified as an object, class or string.
If the key is an object it is converted to a string by accessing
a `name` attribute.
"""

refType = None
"""Type of instances expected to be returned from factory."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be documented in the attributes section of the class documentation instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I spent ten minutes reading numpydoc documentation and this seemed to be the way they wanted me to document class attributes. I admit it was somewhat unclear to me (and I'm about to delete that line anyhow).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I read the numpydoc docs again I see the attributes section (as opposed to parameters for __init__).


def __init__(self):
self._registry = {}

def getFromRegistry(self, targetClass, override=None):
"""Get a new instance of the object stored in the registry.

Parameters
----------
targetClass : `str` or object supporting `.name` attribute
Get item from registry associated with this target class, unless
override : `str` or object supporting `.name` attribute, optional
If given, look if an override has been specified for this and,
if so return that instead.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps make it slightly more generic? By taking *targetClasses and thus allowing for multiple overrides?


Returns
-------
instance : `object`
Instance of class stored in registry associated with the target.
"""
for t in (override, targetClass):
if t is not None:
try:
typeName = self._registry[self._getName(t)]
except KeyError:
pass
else:
return self._getInstanceOf(typeName)
raise KeyError("Unable to find item in registry with keys {} or {}".format(targetClass, override))

def placeInRegistry(self, registryKey, typeName):
"""Register a class name with the associated type.
The type name provided is validated against the reference
class, `refType`, if defined.

Parameters
----------
registryKey : `str` or object supporting `.name` attribute.
Item to associate with the provided type.
typeName : `str` or Python type
Identifies a class to associate with the provided key.

Raises
------
e : `ValueError`
If instance of class is not of the expected type.
"""
if not self._isValidStr(typeName):
raise ValueError("Not a valid class string: {}".format(typeName))
keyString = self._getName(registryKey)
if keyString in self._registry:
raise ValueError("Item with key {} already registered".format(keyString))

self._registry[keyString] = typeName

@staticmethod
def _getName(typeOrName):
"""Extract name of supplied object as string.

Parameters
----------
typeOrName : `str` or object supporting `.name` attribute.
Item from which to extract a name.

Returns
-------
name : `str`
Extracted name as a string.
"""
if isinstance(typeOrName, str):
return typeOrName
elif hasattr(typeOrName, 'name'):
return typeOrName.name
else:
raise ValueError("Cannot extract name from type")

@staticmethod
def _getInstanceOf(typeOrName):
"""Given the type name or a type, instantiate an object of that type.

If a type name is given, an attempt will be made to import the type.

Parameters
----------
typeOrName : `str` or Python class
A string describing the Python class to load or a Python type.
"""
if isinstance(typeOrName, str):
cls = doImport(typeOrName)
else:
cls = typeOrName
return cls()

@classmethod
def _isValidStr(cls, typeName):
"""Validate that the class name provided does create instances of
objects that are of the expected type, as stored in the class
`refType` attribute.
"""
if cls.refType is None:
return True
try:
c = cls._getInstanceOf(typeName)
except (ImportError, TypeError, AttributeError):
return False
else:
return isinstance(c, cls.refType)
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from abc import ABCMeta, abstractmethod

from lsst.daf.persistence import doImport
from lsst.daf.butler.core.utils import doImport

from .config import Config

Expand Down
108 changes: 72 additions & 36 deletions python/lsst/daf/butler/core/storageClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
# see <https://www.lsstcorp.org/LegalNotices/>.
#

import lsst.afw.table
from lsst.daf.butler.core.utils import doImport

from .mappingFactory import MappingFactory


class StorageClassMeta(type):
Expand All @@ -43,38 +45,72 @@ def assemble(cls, parent, components):
return parent


class TablePersistable(StorageClass):
name = "TablePersistable"


class Image(StorageClass):
name = "Image"


class Exposure(StorageClass):
name = "Exposure"
components = {
"image": Image,
"mask": Image,
"variance": Image,
"wcs": TablePersistable,
"psf": TablePersistable,
"photoCalib": TablePersistable,
"visitInfo": TablePersistable,
"apCorr": TablePersistable,
"coaddInputs": TablePersistable,
}

@classmethod
def assemble(cls, parent, components):
raise NotImplementedError("TODO")


class Catalog(StorageClass):
name = "Catalog"
type = None # Catalog is abstract (I think)


class SourceCatalog(StorageClass):
name = "SourceCatalog"
type = lsst.afw.table.SourceCatalog
def makeNewStorageClass(name, pytype, components=None):
"""Create a new Python class as a subclass of `StorageClass`.

parameters
----------
name : `str`
Name to use for this class.
pytype : `type`
Python type (or name of type) to associate with the `StorageClass`
components : `dict`, optional
`dict` mapping name of a component to another `StorageClass`.

Returns
-------
newtype : `StorageClass`
Newly created Python type.
"""
if isinstance(pytype, str):
# Special case Python native dict type for testing
if pytype == "dict":
pytype = dict
else:
pytype = doImport(pytype)
return type(name, (StorageClass,), {"name": name,
"type": pytype,
"components": components})


class StorageClassFactory(MappingFactory):
"""Factory for `StorageClass` instances.
"""
refType = StorageClass

def getStorageClass(self, storageClassName):
"""Get a StorageClass instance associated with the supplied name.

Parameter
---------
storageClassName : `str`
Name of the storage class to retrieve.

Returns
-------
instance : `StorageClass`
Instance of the correct `StorageClass`.
"""
return self.getFromRegistry(storageClassName)

def registerStorageClass(self, storageClassName, pytype, components=None):
"""Create a `StorageClass` subclass with the supplied properties
and associate it with the storageClassName in the registry.

Parameters
----------
storageClassName : `str`
Name of new storage class to be created.
pytype : `str` or Python class.
Python type to be associated with this storage class.
components : `dict`
Map of storageClassName to `storageClass` for components.

Raises
------
e : `KeyError`
If a storage class has already been registered with
storageClassName.
"""
newtype = makeNewStorageClass(storageClassName, pytype, components)
self.placeInRegistry(storageClassName, newtype)