Optimize the serialization time for QuantumGraph

Make use of the butler serialization caching mechanisms to make sure object are effectively cached instead of reconstructing objects needlessly. Also lower the compression ratio of LZMA. This results in slightly larger graph sizes, but is offset by a large runtime gain.
lsst · Aug 21, 2023 · f2093ce · f2093ce
1 parent dbc57bf
commit f2093ce
Showing 1 changed file with 8 additions and 2 deletions.
diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py
@@ -42,6 +42,7 @@
     DatasetType,
     DimensionRecordsAccumulator,
     DimensionUniverse,
+    PersistenceContextVars,
     Quantum,
 )
 from lsst.resources import ResourcePath, ResourcePathExpression
@@ -1005,6 +1006,11 @@ def save(self, file: BinaryIO) -> None:
         file.write(buffer)  # type: ignore # Ignore because bytearray is safe to use in place of bytes
 
     def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
+        thing = PersistenceContextVars()
+        result = thing.run(self._buildSaveObjectImpl, returnHeader)
+        return result
+
+    def _buildSaveObjectImpl(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
         # make some containers
         jsonData: deque[bytes] = deque()
         # node map is a list because json does not accept mapping keys that
@@ -1076,7 +1082,7 @@ def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[byte
 
             # dump to json string, and encode that string to bytes and then
             # conpress those bytes
-            dump = lzma.compress(json.dumps(taskDescription).encode())
+            dump = lzma.compress(json.dumps(taskDescription).encode(), preset=2)
             # record the sizing and relation information
             taskDefMap[taskDef.label] = {
                 "bytes": (count, count + len(dump)),
@@ -1095,7 +1101,7 @@ def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[byte
             # a large impact on on disk size, so it is worth doing
             simpleNode = node.to_simple(accumulator=dimAccumulator)
 
-            dump = lzma.compress(simpleNode.json().encode())
+            dump = lzma.compress(simpleNode.json().encode(), preset=2)
             jsonData.append(dump)
             nodeMap.append(
                 (