Skip to content

Commit

Permalink
Use explicit reason for dataset type subsetting failing
Browse files Browse the repository at this point in the history
Report the missing dataset type rather than a more
opaque default KeyError.

Also allow the incompatibility to be accepted if the storage
classes are compatible.

This can happen if a dataset type definition defined in a
connections class that is not yet in registry, does not match the
definition added elsewhere. The example that triggered this
was TaskMetadata which was used in a connection and added
by the pipeline infrastructure.
  • Loading branch information
timj committed Feb 4, 2022
1 parent 2ab1b5b commit a21551a
Showing 1 changed file with 38 additions and 3 deletions.
41 changes: 38 additions & 3 deletions python/lsst/pipe/base/graphBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,44 @@ def fromSubset(
A new dictionary instance.
"""
combined = ChainMap(first, *rest)
return cls(
{datasetType: combined[datasetType] for datasetType in datasetTypes}, universe=first.universe
)
_dict = {}
for datasetType in datasetTypes:
if datasetType in combined:
_dict[datasetType] = combined[datasetType]
else:
# The dataset type is not found. It may not be listed
# or it may be that it is there with the same name
# but different definition.
for d in combined:
if d.name == datasetType.name:
# This implies some inconsistency in definitions
# for connections. If there is support for storage
# class conversion we can let it slide.
# At this point we do not know
# where the inconsistency is but trust that down
# stream code will be more explicit about input
# vs output incompatibilities.
if d.is_compatible_with(datasetType) or datasetType.is_compatible_with(d):
_LOG.debug(
"Dataset type mismatch (%s != %s) but continuing since they are compatible",
datasetType,
d,
)
_dict[datasetType] = combined[d]
break
else:
raise KeyError(
"Dataset type definition mismatch and they are not compatible: "
f"{datasetType} != {d}"
)

if datasetType not in _dict:
raise KeyError(
f"DatasetType {datasetType} not present in list of known types: "
+ ", ".join(d.name for d in combined)
)

return cls(_dict, universe=first.universe)

@property
def dimensions(self) -> DimensionGraph:
Expand Down

0 comments on commit a21551a

Please sign in to comment.