lsst · natelust · Jul 4, 2023 · Jun 27, 2023 · Jun 27, 2023 · Jun 27, 2023
diff --git a/doc/changes/DM-39582.misc.md b/doc/changes/DM-39582.misc.md
@@ -0,0 +1,2 @@
+Added ability for some butler primitives to be cached and re-used on deserialization through a special
+interface.
diff --git a/doc/changes/DM-39582.removal.md b/doc/changes/DM-39582.removal.md
@@ -0,0 +1 @@
+Deprecate reconstituteDimensions argument from `Quantum.from_simple`.
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -1651,9 +1651,9 @@ def exists(
         if full_check:
             if self.datastore.exists(ref):
                 existence |= DatasetExistence._ARTIFACT
-        elif existence != DatasetExistence.UNRECOGNIZED:
+        elif existence.value != DatasetExistence.UNRECOGNIZED.value:
             # Do not add this flag if we have no other idea about a dataset.
-            existence |= DatasetExistence._ASSUMED
+            existence |= DatasetExistence(DatasetExistence._ASSUMED)
 
         return existence
 

diff --git a/python/lsst/daf/butler/core/__init__.py b/python/lsst/daf/butler/core/__init__.py
@@ -29,6 +29,7 @@
 from .logging import ButlerLogRecords
 from .mappingFactory import *
 from .named import *
+from .persistenceContext import *
 from .progress import Progress
 from .quantum import *
 from .storageClass import *

diff --git a/python/lsst/daf/butler/core/config.py b/python/lsst/daf/butler/core/config.py
@@ -39,7 +39,7 @@
 
 import yaml
 from lsst.resources import ResourcePath, ResourcePathExpression
-from lsst.utils import doImport
+from lsst.utils import doImportType
 from yaml.representer import Representer
 
 yaml.add_representer(defaultdict, Representer.represent_dict)
@@ -1203,18 +1203,17 @@ def __init__(
 
             if pytype is not None:
                 try:
-                    cls = doImport(pytype)
+                    cls = doImportType(pytype)
                 except ImportError as e:
                     raise RuntimeError(f"Failed to import cls '{pytype}' for config {type(self)}") from e
-                defaultsFile = cls.defaultConfigFile
+                # The class referenced from the config file is not required
+                # to specify a default config file.
+                defaultsFile = getattr(cls, "defaultConfigFile", None)
                 if defaultsFile is not None:
                     self._updateWithConfigsFromPath(fullSearchPath, defaultsFile)
 
-                # Get the container key in case we need it
-                try:
-                    containerKey = cls.containerKey
-                except AttributeError:
-                    pass
+                # Get the container key in case we need it and it is specified.
+                containerKey = getattr(cls, "containerKey", None)
 
         # Now update this object with the external values so that the external
         # values always override the defaults

diff --git a/python/lsst/daf/butler/core/datasets/ref.py b/python/lsst/daf/butler/core/datasets/ref.py
@@ -30,6 +30,7 @@
 ]
 
 import enum
+import sys
 import uuid
 from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, ClassVar
@@ -41,6 +42,7 @@
 from ..dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
 from ..json import from_json_pydantic, to_json_pydantic
 from ..named import NamedKeyDict
+from ..persistenceContext import PersistenceContextVars
 from .type import DatasetType, SerializedDatasetType
 
 if TYPE_CHECKING:
@@ -142,6 +144,10 @@
             return uuid.uuid5(self.NS_UUID, data)
 
 
+# This is constant, so don't recreate a set for each instance
+_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}
+
+
 class SerializedDatasetRef(BaseModel):
     """Simplified model of a `DatasetRef` suitable for serialization."""
 
@@ -202,9 +208,9 @@
             datasetType if datasetType is None else SerializedDatasetType.direct(**datasetType),
         )
         setter(node, "dataId", dataId if dataId is None else SerializedDataCoordinate.direct(**dataId))
-        setter(node, "run", run)
+        setter(node, "run", sys.intern(run))
         setter(node, "component", component)
-        setter(node, "__fields_set__", {"id", "datasetType", "dataId", "run", "component"})
+        setter(node, "__fields_set__", _serializedDatasetRefFieldsSet)
         return node
 
 
@@ -254,7 +260,7 @@
 
     _serializedType = SerializedDatasetRef
     __slots__ = (
-        "id",
+        "_id",
         "datasetType",
         "dataId",
         "run",
@@ -277,12 +283,22 @@
             self.dataId = dataId
         self.run = run
         if id is not None:
-            self.id = id
+            self._id = id.int
         else:
-            self.id = DatasetIdFactory().makeDatasetId(
-                self.run, self.datasetType, self.dataId, id_generation_mode
+            self._id = (
+                DatasetIdFactory()
+                .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode)
+                .int
             )
 
+    @property
+    def id(self) -> DatasetId:
+        """Primary key of the dataset (`DatasetId`).
+
+        Cannot be changed after a `DatasetRef` is constructed.
+        """
+        return uuid.UUID(int=self._id)
+
     def __eq__(self, other: Any) -> bool:
         try:
             return (self.datasetType, self.dataId, self.id) == (other.datasetType, other.dataId, other.id)
@@ -396,9 +412,18 @@
         ref : `DatasetRef`
             Newly-constructed object.
         """
+        cache = PersistenceContextVars.datasetRefs.get()
+        localName = sys.intern(
+            datasetType.name
+            if datasetType is not None
+            else (x.name if (x := simple.datasetType) is not None else "")
+        )
+        key = (simple.id.int, localName)
+        if cache is not None and (cachedRef := cache.get(key, None)) is not None:
+            return cachedRef
         # Minimalist component will just specify component and id and
         # require registry to reconstruct
-        if set(simple.dict(exclude_unset=True, exclude_defaults=True)).issubset({"id", "component"}):
+        if not (simple.datasetType is not None or simple.dataId is not None or simple.run is not None):
             if registry is None:
                 raise ValueError("Registry is required to construct component DatasetRef from integer id")
             if simple.id is None:
@@ -408,6 +433,8 @@
                 raise RuntimeError(f"No matching dataset found in registry for id {simple.id}")
             if simple.component:
                 ref = ref.makeComponentRef(simple.component)
+            if cache is not None:
+                cache[key] = ref
             return ref
 
         if universe is None and registry is None:
@@ -443,7 +470,10 @@
                 f"Encountered with {simple!r}{dstr}."
             )
 
-        return cls(datasetType, dataId, id=simple.id, run=simple.run)
+        newRef = cls(datasetType, dataId, id=simple.id, run=simple.run)
+        if cache is not None:
+            cache[key] = newRef
+        return newRef
 
     to_json = to_json_pydantic
     from_json: ClassVar = classmethod(from_json_pydantic)
@@ -682,9 +712,3 @@
 
     Cannot be changed after a `DatasetRef` is constructed.
     """
-
-    id: DatasetId
-    """Primary key of the dataset (`DatasetId`).
-
-    Cannot be changed after a `DatasetRef` is constructed.
-    """
diff --git a/python/lsst/daf/butler/core/datasets/type.py b/python/lsst/daf/butler/core/datasets/type.py
@@ -34,6 +34,7 @@
 from ..configSupport import LookupKey
 from ..dimensions import DimensionGraph, SerializedDimensionGraph
 from ..json import from_json_pydantic, to_json_pydantic
+from ..persistenceContext import PersistenceContextVars
 from ..storageClass import StorageClass, StorageClassFactory
 
 if TYPE_CHECKING:
@@ -74,6 +75,10 @@
 
         This method should only be called when the inputs are trusted.
         """
+        cache = PersistenceContextVars.serializedDatasetTypeMapping.get()
+        key = (name, storageClass or "")
+        if cache is not None and (type_ := cache.get(key, None)) is not None:
+            return type_
         node = SerializedDatasetType.__new__(cls)
         setter = object.__setattr__
         setter(node, "name", name)
@@ -90,6 +95,8 @@
             "__fields_set__",
             {"name", "storageClass", "dimensions", "parentStorageClass", "isCalibration"},
         )
+        if cache is not None:
+            cache[key] = node
         return node
 
 
@@ -685,6 +692,13 @@
         datasetType : `DatasetType`
             Newly-constructed object.
         """
+        # check to see if there is a cache, and if there is, if there is a
+        # cached dataset type
+        cache = PersistenceContextVars.loadedTypes.get()
+        key = (simple.name, simple.storageClass or "")
+        if cache is not None and (type_ := cache.get(key, None)) is not None:
+            return type_
+
         if simple.storageClass is None:
             # Treat this as minimalist representation
             if registry is None:
@@ -708,14 +722,17 @@
             # mypy hint
             raise ValueError(f"Dimensions must be specified in {simple}")
 
-        return cls(
+        newType = cls(
             name=simple.name,
             dimensions=DimensionGraph.from_simple(simple.dimensions, universe=universe),
             storageClass=simple.storageClass,
             isCalibration=simple.isCalibration,
             parentStorageClass=simple.parentStorageClass,
             universe=universe,
         )
+        if cache is not None:
+            cache[key] = newType
+        return newType
 
     to_json = to_json_pydantic
     from_json: ClassVar = classmethod(from_json_pydantic)

diff --git a/python/lsst/daf/butler/core/datastoreRecordData.py b/python/lsst/daf/butler/core/datastoreRecordData.py
@@ -36,6 +36,7 @@
 
 from .datasets import DatasetId
 from .dimensions import DimensionUniverse
+from .persistenceContext import PersistenceContextVars
 from .storedFileInfo import StoredDatastoreItemInfo
 
 if TYPE_CHECKING:
@@ -204,16 +205,28 @@
         item_info : `StoredDatastoreItemInfo`
             De-serialized instance of `StoredDatastoreItemInfo`.
         """
+        cache = PersistenceContextVars.dataStoreRecords.get()
+        key = frozenset(simple.dataset_ids)
+        if cache is not None and (cachedRecord := cache.get(key)) is not None:
+            return cachedRecord
         records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {}
         # make sure that all dataset IDs appear in the dict even if they don't
         # have records.
         for dataset_id in simple.dataset_ids:
             records[dataset_id] = {}
         for class_name, table_data in simple.records.items():
             klass = doImportType(class_name)
+            if not issubclass(klass, StoredDatastoreItemInfo):
+                raise RuntimeError(
+                    "The class specified in the SerializedDatastoreRecordData "
+                    f"({get_full_type_name(klass)}) is not a StoredDatastoreItemInfo."
+                )
             for table_name, table_records in table_data.items():
                 for record in table_records:
                     info = klass.from_record(record)
                     dataset_type_records = records.setdefault(info.dataset_id, {})
                     dataset_type_records.setdefault(table_name, []).append(info)
-        return cls(records=records)
+        newRecord = cls(records=records)
+        if cache is not None:
+            cache[key] = newRecord
+        return newRecord
diff --git a/python/lsst/daf/butler/core/dimensions/_coordinate.py b/python/lsst/daf/butler/core/dimensions/_coordinate.py
@@ -39,6 +39,7 @@
 
 from ..json import from_json_pydantic, to_json_pydantic
 from ..named import NamedKeyDict, NamedKeyMapping, NamedValueAbstractSet, NameLookupMapping
+from ..persistenceContext import PersistenceContextVars
 from ..timespan import Timespan
 from ._elements import Dimension, DimensionElement
 from ._graph import DimensionGraph
@@ -76,6 +77,10 @@
 
         This method should only be called when the inputs are trusted.
         """
+        key = (frozenset(dataId.items()), records is not None)
+        cache = PersistenceContextVars.serializedDataCoordinateMapping.get()
+        if cache is not None and (result := cache.get(key)) is not None:
+            return result
         node = SerializedDataCoordinate.__new__(cls)
         setter = object.__setattr__
         setter(node, "dataId", dataId)
@@ -87,6 +92,8 @@
             else {k: SerializedDimensionRecord.direct(**v) for k, v in records.items()},
         )
         setter(node, "__fields_set__", {"dataId", "records"})
+        if cache is not None:
+            cache[key] = node
         return node
 
 
@@ -730,6 +737,10 @@
         dataId : `DataCoordinate`
             Newly-constructed object.
         """
+        key = (frozenset(simple.dataId.items()), simple.records is not None)
+        cache = PersistenceContextVars.dataCoordinates.get()
+        if cache is not None and (result := cache.get(key)) is not None:
+            return result
         if universe is None and registry is None:
             raise ValueError("One of universe or registry is required to convert a dict to a DataCoordinate")
         if universe is None and registry is not None:
@@ -743,6 +754,8 @@
             dataId = dataId.expanded(
                 {k: DimensionRecord.from_simple(v, universe=universe) for k, v in simple.records.items()}
             )
+        if cache is not None:
+            cache[key] = dataId
         return dataId
 
     to_json = to_json_pydantic

diff --git a/python/lsst/daf/butler/core/dimensions/_records.py b/python/lsst/daf/butler/core/dimensions/_records.py
@@ -30,6 +30,7 @@
 from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr, create_model
 
 from ..json import from_json_pydantic, to_json_pydantic
+from ..persistenceContext import PersistenceContextVars
 from ..timespan import Timespan, TimespanDatabaseRepresentation
 from ._elements import Dimension, DimensionElement
 
@@ -166,7 +167,16 @@
 
         This method should only be called when the inputs are trusted.
         """
-        node = cls.construct(definition=definition, record=record)
+        _recItems = record.items()
+        # Type ignore because the ternary statement seems to confuse mypy
+        # based on conflicting inferred types of v.
+        key = (
+            definition,
+            frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems),  # type: ignore
+        )
+        cache = PersistenceContextVars.serializedDimensionRecordMapping.get()
+        if cache is not None and (result := cache.get(key)) is not None:
+            return result
         node = SerializedDimensionRecord.__new__(cls)
         setter = object.__setattr__
         setter(node, "definition", definition)
@@ -177,6 +187,8 @@
             node, "record", {k: v if type(v) != list else tuple(v) for k, v in record.items()}  # type: ignore
         )
         setter(node, "__fields_set__", {"definition", "record"})
+        if cache is not None:
+            cache[key] = node
         return node
 
 
@@ -367,6 +379,16 @@
         if universe is None:
             # this is for mypy
             raise ValueError("Unable to determine a usable universe")
+        _recItems = simple.record.items()
+        # Type ignore because the ternary statement seems to confuse mypy
+        # based on conflicting inferred types of v.
+        key = (
+            simple.definition,
+            frozenset((k, v if not isinstance(v, list) else tuple(v)) for k, v in _recItems),  # type: ignore
+        )
+        cache = PersistenceContextVars.dimensionRecords.get()
+        if cache is not None and (result := cache.get(key)) is not None:
+            return result
 
         definition = DimensionElement.from_simple(simple.definition, universe=universe)
 
@@ -389,7 +411,10 @@
         if (hsh := "hash") in rec:
             rec[hsh] = bytes.fromhex(rec[hsh].decode())
 
-        return _reconstructDimensionRecord(definition, rec)
+        dimRec = _reconstructDimensionRecord(definition, rec)
+        if cache is not None:
+            cache[key] = dimRec
+        return dimRec
 
     to_json = to_json_pydantic
     from_json: ClassVar = classmethod(from_json_pydantic)