Skip to content

Commit

Permalink
Optimize the serialization time for QuantumGraph
Browse files Browse the repository at this point in the history
Make use of the butler serialization caching mechanisms to make
sure object are effectively cached instead of reconstructing
objects needlessly. Also lower the compression ratio of LZMA.
This results in slightly larger graph sizes, but is offset by
a large runtime gain.
  • Loading branch information
natelust authored and TallJimbo committed Aug 21, 2023
1 parent dbc57bf commit f2093ce
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions python/lsst/pipe/base/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
DatasetType,
DimensionRecordsAccumulator,
DimensionUniverse,
PersistenceContextVars,
Quantum,
)
from lsst.resources import ResourcePath, ResourcePathExpression
Expand Down Expand Up @@ -1005,6 +1006,11 @@ def save(self, file: BinaryIO) -> None:
file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes

def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
thing = PersistenceContextVars()
result = thing.run(self._buildSaveObjectImpl, returnHeader)
return result

def _buildSaveObjectImpl(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]:
# make some containers
jsonData: deque[bytes] = deque()
# node map is a list because json does not accept mapping keys that
Expand Down Expand Up @@ -1076,7 +1082,7 @@ def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[byte

# dump to json string, and encode that string to bytes and then
# conpress those bytes
dump = lzma.compress(json.dumps(taskDescription).encode())
dump = lzma.compress(json.dumps(taskDescription).encode(), preset=2)
# record the sizing and relation information
taskDefMap[taskDef.label] = {
"bytes": (count, count + len(dump)),
Expand All @@ -1095,7 +1101,7 @@ def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[byte
# a large impact on on disk size, so it is worth doing
simpleNode = node.to_simple(accumulator=dimAccumulator)

dump = lzma.compress(simpleNode.json().encode())
dump = lzma.compress(simpleNode.json().encode(), preset=2)
jsonData.append(dump)
nodeMap.append(
(
Expand Down

0 comments on commit f2093ce

Please sign in to comment.