Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40121 Optimizations in QuantumGraph loading #870

Merged
merged 1 commit into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-40121.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added various optimizations to QuatnumGraph loading.
26 changes: 15 additions & 11 deletions python/lsst/daf/butler/core/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

__all__ = ("DimensionRecord", "SerializedDimensionRecord")

from collections.abc import Hashable
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Tuple

import lsst.sphgeom
Expand Down Expand Up @@ -170,23 +171,22 @@ def direct(

This method should only be called when the inputs are trusted.
"""
_recItems = record.items()
# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
_recItems = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore
TallJimbo marked this conversation as resolved.
Show resolved Hide resolved

# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
key = (
definition,
TallJimbo marked this conversation as resolved.
Show resolved Hide resolved
frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
frozenset(_recItems.items()),
TallJimbo marked this conversation as resolved.
Show resolved Hide resolved
)
cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
if cache is not None and (result := cache.get(key)) is not None:
return result

# This method requires tuples as values of the mapping, but JSON
# readers will read things in as lists. Be kind and transparently
# transform to tuples
serialized_record = {k: v if type(v) != list else tuple(v) for k, v in record.items()} # type: ignore

node = cls.model_construct(definition=definition, record=serialized_record) # type: ignore
node = cls.model_construct(definition=definition, record=_recItems) # type: ignore

if cache is not None:
cache[key] = node
Expand Down Expand Up @@ -350,6 +350,7 @@ def from_simple(
simple: SerializedDimensionRecord,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
cacheKey: Hashable | None = None,
) -> DimensionRecord:
"""Construct a new object from the simplified form.

Expand All @@ -366,6 +367,10 @@ def from_simple(
registry : `lsst.daf.butler.Registry`, optional
Registry from which a universe can be extracted. Can be `None`
if universe is provided explicitly.
cacheKey : `Hashable` or `None`
If this is not None, it will be used as a key for any cached
reconstruction instead of calculating a value from the serialized
format.

Returns
-------
Expand All @@ -379,12 +384,11 @@ def from_simple(
if universe is None:
# this is for mypy
raise ValueError("Unable to determine a usable universe")
_recItems = simple.record.items()
# Type ignore because the ternary statement seems to confuse mypy
# based on conflicting inferred types of v.
key = (
key = cacheKey or (
simple.definition,
frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems), # type: ignore
frozenset(simple.record.items()), # type: ignore
)
cache = PersistenceContextVars.dimensionRecords.get()
if cache is not None and (result := cache.get(key)) is not None:
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/core/persistenceContext.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


import uuid
from collections.abc import Callable
from collections.abc import Callable, Hashable
from contextvars import Context, ContextVar, Token, copy_context
from typing import TYPE_CHECKING, ParamSpec, TypeVar, cast

Expand Down Expand Up @@ -117,7 +117,7 @@ class PersistenceContextVars:
r"""A cache of `DatasetRef`\ s.
"""

dimensionRecords: ContextVar[dict[tuple[str, frozenset], DimensionRecord] | None] = ContextVar(
dimensionRecords: ContextVar[dict[Hashable, DimensionRecord] | None] = ContextVar(
"dimensionRecords", default=None
)
r"""A cache of `DimensionRecord`\ s.
Expand Down
20 changes: 10 additions & 10 deletions python/lsst/daf/butler/core/quantum.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,19 @@
) -> DatasetRef:
"""Reconstruct a DatasetRef stored in a Serialized Quantum."""
# Reconstruct the dimension records
# if the dimension record has been loaded previously use that,
# otherwise load it from the dict of Serialized DimensionRecords
if dimensionRecords is None and ids:
raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")

Check warning on line 59 in python/lsst/daf/butler/core/quantum.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/core/quantum.py#L59

Added line #L59 was not covered by tests
records = {}
for dId in ids:
# if the dimension record has been loaded previously use that,
# otherwise load it from the dict of Serialized DimensionRecords
if dimensionRecords is None:
raise ValueError("Cannot construct from a SerializedQuantum with no dimension records. ")
tmpSerialized = dimensionRecords[dId]
reconstructedDim = DimensionRecord.from_simple(tmpSerialized, universe=universe)
records[sys.intern(reconstructedDim.definition.name)] = reconstructedDim
# turn the serialized form into an object and attach the dimension records
# Ignore typing because it is missing that the above if statement
# ensures that if there is a loop that dimensionRecords is not None.
tmpSerialized = dimensionRecords[dId] # type: ignore
records[tmpSerialized.definition] = tmpSerialized
if simple.dataId is not None:
simple.dataId.records = records or None
rebuiltDatasetRef = DatasetRef.from_simple(simple, universe, datasetType=type_)
if records:
object.__setattr__(rebuiltDatasetRef, "dataId", rebuiltDatasetRef.dataId.expanded(records))
return rebuiltDatasetRef


Expand Down
Loading