Skip to content

Commit

Permalink
Merge pull request #262 from lsst/tickets/DM-24347
Browse files Browse the repository at this point in the history
DM-24347: Allow a component dataset to be None
  • Loading branch information
timj committed Apr 24, 2020
2 parents 6d49846 + 2c6b531 commit 979aa60
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 30 deletions.
18 changes: 15 additions & 3 deletions python/lsst/daf/butler/datastores/genericDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ def _move_to_trash_in_registry(self, ref):
# the python object. moveDatasetLocationToTrash will deal with that.
self.registry.moveDatasetLocationToTrash(self.name, list(ref.flatten([ref])))

def _post_process_get(self, inMemoryDataset, readStorageClass, assemblerParams=None):
def _post_process_get(self, inMemoryDataset, readStorageClass, assemblerParams=None,
isComponent=False):
"""Given the Python object read from the datastore, manipulate
it based on the supplied parameters and ensure the Python
type is correct.
Expand All @@ -138,16 +139,27 @@ def _post_process_get(self, inMemoryDataset, readStorageClass, assemblerParams=N
readStorageClass: `StorageClass`
The `StorageClass` used to obtain the assembler and to
check the python type.
assemblerParams : `dict`
assemblerParams : `dict`, optional
Parameters to pass to the assembler. Can be `None`.
isComponent : `bool`, optional
If this is a component, allow the inMemoryDataset to be `None`.
"""
# Process any left over parameters
if assemblerParams:
inMemoryDataset = readStorageClass.assembler().handleParameters(inMemoryDataset, assemblerParams)

# Validate the returned data type matches the expected data type
pytype = readStorageClass.pytype
if pytype and not isinstance(inMemoryDataset, pytype):

allowedTypes = []
if pytype:
allowedTypes.append(pytype)

# Special case components to allow them to be None
if isComponent:
allowedTypes.append(type(None))

if allowedTypes and not isinstance(inMemoryDataset, tuple(allowedTypes)):
raise TypeError("Got Python type {} from datastore but expected {}".format(type(inMemoryDataset),
pytype))

Expand Down
7 changes: 4 additions & 3 deletions python/lsst/daf/butler/datastores/inMemoryDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,11 @@ def get(self, ref, parameters=None):

inMemoryDataset = self.datasets[realID]

component = ref.datasetType.component()

# Different storage classes implies a component request
if readStorageClass != writeStorageClass:

component = ref.datasetType.component()

if component is None:
raise ValueError("Storage class inconsistency ({} vs {}) but no"
" component requested".format(readStorageClass.name,
Expand All @@ -273,7 +273,8 @@ def get(self, ref, parameters=None):

# Since there is no formatter to process parameters, they all must be
# passed to the assembler.
return self._post_process_get(inMemoryDataset, readStorageClass, parameters)
return self._post_process_get(inMemoryDataset, readStorageClass, parameters,
isComponent=component is not None)

def put(self, inMemoryDataset, ref):
"""Write a InMemoryDataset with a given `DatasetRef` to the store.
Expand Down
6 changes: 4 additions & 2 deletions python/lsst/daf/butler/datastores/posixDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,11 @@ def get(self, ref, parameters=None):
try:
result = formatter.read(component=getInfo.component)
except Exception as e:
raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}") from e
raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
f" ({ref.datasetType.name} from {location.path}): {e}") from e

return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams)
return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
isComponent=getInfo.component is not None)

@transactional
def put(self, inMemoryDataset, ref):
Expand Down
6 changes: 4 additions & 2 deletions python/lsst/daf/butler/datastores/s3Datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,11 @@ def get(self, ref, parameters=None):
formatter._fileDescriptor.location = Location(*os.path.split(tmpFile.name))
result = formatter.read(component=getInfo.component)
except Exception as e:
raise ValueError(f"Failure from formatter for dataset {ref.id}: {e}") from e
raise ValueError(f"Failure from formatter '{formatter.name()}' for dataset {ref.id}"
f" ({ref.datasetType.name} from {location.uri}): {e}") from e

return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams)
return self._post_process_get(result, getInfo.readStorageClass, getInfo.assemblerParams,
isComponent=getInfo.component is not None)

@transactional
def put(self, inMemoryDataset, ref):
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/formatters/fileFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def read(self, component=None):
# component coercing it to its appropriate pytype
data = self._assembleDataset(data, component)

if data is None:
# Special case components by allowing a formatter to return None
# to indicate that the component was understood but is missing
if data is None and component is None:
raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")

return data
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/formatters/fitsExposureFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def readComponent(self, component, parameters=None):
'metadata': ('readMetadata', False),
'filter': ('readFilter', False),
'polygon': ('readValidPolygon', False),
'appCorrMap': ('readApCorrMap', False),
'apCorrMap': ('readApCorrMap', False),
'visitInfo': ('readVisitInfo', False),
'transmissionCurve': ('readTransmissionCurve', False),
'detector': ('readDetector', False),
Expand Down
8 changes: 8 additions & 0 deletions python/lsst/daf/butler/tests/_examplePythonTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,20 @@ def __init__(self, summary=None, output=None, data=None):
def __eq__(self, other):
return self.summary == other.summary and self.output == other.output and self.data == other.data

def __str__(self):
return str(self.exportAsDict())

def __repr__(self):
return f"MetricsExample({self.exportAsDict()})"

def exportAsDict(self):
"""Convert object contents to a single python dict."""
exportDict = {"summary": self.summary,
"output": self.output}
if self.data is not None:
exportDict["data"] = list(self.data)
else:
exportDict["data"] = None
return exportDict

def _asdict(self):
Expand Down
3 changes: 3 additions & 0 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,9 @@ def testIngest(self):
self.assertEqual(uri1, uri2)

# Test that removing one does not break the second
# This line will issue a warning log message for a ChainedDatastore
# that uses an InMemoryDatastore since in-memory can not ingest
# files.
butler.prune([datasets[0].refs[0]], unstore=True, disassociate=False)
self.assertFalse(butler.datasetExists(datasetTypeName, dataId1))
self.assertTrue(butler.datasetExists(datasetTypeName, dataId2))
Expand Down
36 changes: 27 additions & 9 deletions tests/test_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@
TESTDIR = os.path.dirname(__file__)


def makeExampleMetrics():
def makeExampleMetrics(use_none=False):
if use_none:
array = None
else:
array = [563, 234, 456.7]
return MetricsExample({"AM1": 5.2, "AM2": 30.6},
{"a": [1, 2, 3],
"b": {"blue": 5, "red": "green"}},
[563, 234, 456.7]
array,
)


Expand Down Expand Up @@ -159,17 +163,31 @@ def testBasicPutGet(self):

# Get a component -- we need to construct new refs for them
# with derived storage classes but with parent ID
comp = "output"
compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(comp), dimensions,
sc.components[comp], dataId, id=ref.id)
output = datastore.get(compRef)
self.assertEqual(output, metricsOut.output)
for comp in ("data", "output"):
compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(comp), dimensions,
sc.components[comp], dataId, id=ref.id)
output = datastore.get(compRef)
self.assertEqual(output, getattr(metricsOut, comp))

uri = datastore.getUri(compRef)
self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)
uri = datastore.getUri(compRef)
self.assertEqual(uri[:len(self.uriScheme)], self.uriScheme)

storageClass = sc

# Check that we can put a metric with None in a component and
# get it back as None
metricsNone = makeExampleMetrics(use_none=True)
dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"}
refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False)
datastore.put(metricsNone, refNone)

comp = "data"
for comp in ("data", "output"):
compRef = self.makeDatasetRef(refNone.datasetType.componentTypeName(comp), dimensions,
sc.components[comp], dataIdNone, id=refNone.id)
output = datastore.get(compRef)
self.assertEqual(output, getattr(metricsNone, comp))

# Check that a put fails if the dataset type is not supported
if self.hasUnsupportedPut:
sc = StorageClass("UnsupportedSC", pytype=type(metrics))
Expand Down
36 changes: 27 additions & 9 deletions tests/test_datastoreFits.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,27 @@ def testExposurePutGet(self):

# Get some components
# Could not test the following components as they were not known:
# bbox, xy0, filter, polygon, appCorrMap, detector,
# extras, and exposureInfo
# transmissionCurve returned None and which is fixed in DM-24347
for compName in ("wcs", "image", "mask", "coaddInputs", "psf",
"variance", "photoCalib", "metadata", "visitInfo"):
compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
storageClass.components[compName], dataId, id=ref.id)
component = datastore.get(compRef)
self.assertIsInstance(component, compRef.datasetType.storageClass.pytype)
# bbox, xy0, filter, polygon, detector, extras, and exposureInfo
for compName, isNone in (("wcs", False),
("image", False),
("mask", False),
("coaddInputs", False),
("psf", False),
("variance", False),
("photoCalib", False),
("metadata", False),
("visitInfo", False),
("apCorrMap", True),
("transmissionCurve", True),
("metadata", False)):
with self.subTest(component=compName):
compRef = self.makeDatasetRef(ref.datasetType.componentTypeName(compName), dimensions,
storageClass.components[compName], dataId, id=ref.id)
component = datastore.get(compRef)
if isNone:
self.assertIsNone(component)
else:
self.assertIsInstance(component, compRef.datasetType.storageClass.pytype)

# Get the WCS component to check it
wcsRef = self.makeDatasetRef(ref.datasetType.componentTypeName("wcs"), dimensions,
Expand All @@ -197,6 +209,12 @@ def testExposurePutGet(self):
lsst.geom.Extent2I(9, 9))
self.assertWcsAlmostEqualOverBBox(wcs, exposure.getWcs(), bbox)

# Check basic metadata
metadataRef = self.makeDatasetRef(ref.datasetType.componentTypeName("metadata"), dimensions,
storageClass.components["metadata"], dataId, id=ref.id)
metadata = datastore.get(metadataRef)
self.assertEqual(metadata["WCS_ID"], 3)

def testExposureCompositePutGet(self):
example = os.path.join(self.testDir, "data", "basic", "small.fits")
exposure = lsst.afw.image.ExposureF(example)
Expand Down

0 comments on commit 979aa60

Please sign in to comment.