Skip to content

Commit

Permalink
Major reorganization of datastores
Browse files Browse the repository at this point in the history
* Datastore APIs now take DatasetRefs
* Assembly and disassembly is now done by butler.
* Datastore now records path and formatter indexed by DatasetRef.
  - No longer uses a URI to record that information.
* URIs can now be retrieved separately.
* StorageClassFactory is now a Singleton.
* DatasetType now has a StorageClass instance (not name)
* StorageClasses can now be defined in multiple config files.
* afw Exposure assembly much improved.
* File templates now use DatasetRef
* DatasetType now knows how to to represent components in the name.
* Read Formatters now have access to the storage class used to write
  and the storage class requested to read the data.
* Location no longer knows about components.
  • Loading branch information
timj committed Mar 30, 2018
1 parent 06b392f commit aed7723
Show file tree
Hide file tree
Showing 36 changed files with 1,653 additions and 814 deletions.
55 changes: 55 additions & 0 deletions config/registry/storageClasses.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
storageClasses:
StructuredDataDict:
pytype: dict
StructuredDataList:
pytype: list
TablePersistable:
pytype: lsst.afw.table.io.persistable.Persistable
TablePersistableWcs:
pytype: lsst.afw.geom.skyWcs.skyWcs.SkyWcs
TablePersistablePsf:
pytype: lsst.afw.detection.Psf
TablePersistableCoaddInputs:
pytype: lsst.afw.image.coaddInputs.CoaddInputs
TablePersistableVisitInfo:
pytype: lsst.afw.image.visitInfo.VisitInfo
TablePersistableApCorr:
pytype: lsst.afw.image.apCorrMap.ApCorrMap
TablePersistableCalib:
pytype: lsst.afw.image.calib.Calib
ImageF:
pytype: lsst.afw.image.image.ImageF
MaskX:
pytype: lsst.afw.image.mask.MaskX
Catalog:
pytype: lsst.afw.table.BaseCatalog
SourceCatalog:
pytype: lsst.afw.table.SourceCatalog
Exposure: &Exposure
pytype: lsst.afw.image.Exposure
assembler: lsst.daf.butler.assemblers.exposureAssembler.ExposureAssemblerMonolithic
components:
image: ImageF
mask: MaskX
variance: ImageF
wcs: TablePersistableWcs
psf: TablePersistablePsf
calib: TablePersistableCalib
visitInfo: TablePersistableVisitInfo
apCorrMap: TablePersistableApCorr
coaddInputs: TablePersistableCoaddInputs
ExposureF:
<<: *Exposure
pytype: lsst.afw.image.ExposureF
ExposureI:
<<: *Exposure
pytype: lsst.afw.image.ExposureI
ExposureComposite: &ExposureComposite
<<: *Exposure
assembler: lsst.daf.butler.assemblers.exposureAssembler.ExposureAssembler
ExposureCompositeF:
<<: *ExposureComposite
pytype: lsst.afw.image.ExposureF
ExposureI:
<<: *ExposureComposite
pytype: lsst.afw.image.ExposureI
65 changes: 65 additions & 0 deletions python/lsst/daf/butler/assemblers/exposureAssembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

# Need to enable PSFs to be instantiated
import lsst.afw.detection # noqa F401
from lsst.afw.image import makeExposure, makeMaskedImage

from lsst.daf.butler.core.composites import CompositeAssembler

Expand Down Expand Up @@ -159,6 +160,70 @@ def disassemble(self, composite):

return components

def assemble(self, components):
"""Construct an Exposure from components.
Parameters
----------
components : `dict`
All the components from which to construct the Exposure.
Some can be missing.
Returns
-------
exposure : `~lsst.afw.image.Exposure`
Assembled exposure.
Raises
------
ValueError
Some supplied components are not recognized.
"""
components = components.copy()
maskedImageComponents = {}
hasMaskedImage = False
for component in ("image", "variance", "mask"):
value = None
if component in components:
hasMaskedImage = True
value = components.pop(component)
maskedImageComponents[component] = value

wcs = None
if "wcs" in components:
wcs = components.pop("wcs")

pytype = self.storageClass.pytype
if hasMaskedImage:
maskedImage = makeMaskedImage(**maskedImageComponents)
exposure = makeExposure(maskedImage, wcs=wcs)

if not isinstance(exposure, pytype):
raise RuntimeError("Unexpected type created in assembly;"
" was {} expected {}".format(type(exposure), pytype))

else:
exposure = pytype()
if wcs is not None:
exposure.setWcs(wcs)

# Set other components
exposure.setPsf(components.pop("psf", None))
exposure.setCalib(components.pop("calib", None))

info = exposure.getInfo()
if "visitInfo" in components:
info.setVisitInfo(components.pop("visitInfo"))
info.setApCorrMap(components.pop("apCorrMap", None))
info.setCoaddInputs(components.pop("coaddInputs", None))

# If we have some components left over that is a problem
if components:
raise ValueError("The following components were not understood:"
" {}".format(list(components.keys())))

return exposure


class ExposureAssemblerMonolithic(ExposureAssembler):
"""Exposure assembler with disassembly disabled."""
Expand Down
50 changes: 42 additions & 8 deletions python/lsst/daf/butler/butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .core.config import Config
from .core.datastore import Datastore
from .core.registry import Registry
from .core.storageClass import StorageClassFactory

__all__ = ("ButlerConfig", "Butler")

Expand All @@ -39,12 +40,12 @@ def __init__(self, *args, **kwargs):
self.validate()

def validate(self):
for k in ['run', 'datastore.cls', 'registry.cls']:
for k in ['run', 'datastore.cls', 'registry.cls', 'storageClasses.config']:
if k not in self:
raise ValueError("Missing ButlerConfig parameter: {0}".format(k))


class Butler(object):
class Butler:
"""Main entry point for the data access system.
Attributes
Expand All @@ -64,8 +65,10 @@ class Butler(object):

def __init__(self, config):
self.config = ButlerConfig(config)
self.datastore = Datastore.fromConfig(self.config)
self.registry = Registry.fromConfig(self.config)
self.datastore = Datastore.fromConfig(self.config, self.registry)
self.storageClasses = StorageClassFactory()
self.storageClasses.addFromConfig(self.config)
self.run = self.registry.getRun(collection=self.config['run'])
if self.run is None:
self.run = self.registry.makeRun(self.config['run'])
Expand All @@ -91,7 +94,27 @@ def put(self, obj, datasetType, dataId, producer=None):
"""
datasetType = self.registry.getDatasetType(datasetType)
ref = self.registry.addDataset(datasetType, dataId, run=self.run, producer=producer)
# self.datastore.put(obj, ref)

# Look up storage class to see if this is a composite
storageClass = datasetType.storageClass

# Check to see if this storage class has a disassembler
if storageClass.assemblerClass.disassemble is not None and storageClass.components:
components = storageClass.assembler().disassemble(obj)
for component, info in components.items():
compTypeName = datasetType.componentTypeName(component)
compRef = self.put(info.component, compTypeName, dataId, producer)
self.registry.attachComponent(component, ref, compRef)
else:
# This is an entity without a disassembler.
# If it is a composite we still need to register the components
for component in storageClass.components:
compTypeName = datasetType.componentTypeName(component)
compDatasetType = self.registry.getDatasetType(compTypeName)
compRef = self.registry.addDataset(compDatasetType, dataId, run=self.run, producer=producer)
self.registry.attachComponent(component, ref, compRef)
self.datastore.put(obj, ref)

return ref

def getDirect(self, ref):
Expand All @@ -110,9 +133,20 @@ def getDirect(self, ref):
obj : `object`
The dataset.
"""
# Currently a direct pass-through to `Datastore.get` but this should
# change for composites.
return self.datastore.get(ref)
# if the ref exists in the store we return it directly
if self.datastore.exists(ref):
return self.datastore.get(ref)
elif ref.components:
# Reconstruct the composite
components = {}
for compName, compRef in ref.components.items():
components[compName] = self.datastore.get(compRef)

# Assemble the components
return ref.datasetType.storageClass.assembler().assemble(components)
else:
# single entity in datastore
raise ValueError("Unable to locate ref {} in datastore {}".format(ref.id, self.datastore.name))

def get(self, datasetType, dataId):
"""Retrieve a stored dataset.
Expand All @@ -131,5 +165,5 @@ def get(self, datasetType, dataId):
The dataset.
"""
datasetType = self.registry.getDatasetType(datasetType)
ref = self.registry.find(datasetType, dataId)
ref = self.registry.find(self.run.collection, datasetType, dataId)
return self.getDirect(ref)
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from .config import *
from .datasets import *
from .datastore import *
from .dataUnits import *
from .fileDescriptor import *
from .fileTemplates import *
from .formatter import *
Expand All @@ -20,4 +19,6 @@
from .run import *
from .schema import *
from .storageClass import *
from .storageInfo import *
from .storedFileInfo import *
from .units import *
41 changes: 0 additions & 41 deletions python/lsst/daf/butler/core/dataUnits.py

This file was deleted.

0 comments on commit aed7723

Please sign in to comment.