-
Notifications
You must be signed in to change notification settings - Fork 5
refactor(executors): improve function execution chain #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
eywalker
merged 8 commits into
dev
from
eywalker/plt-920-clean-up-the-logic-around-function-node-function-pod-packet
Mar 14, 2026
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
3abb1bb
refactor(executors): improve function execution chain
eywalker 865b901
test(executors): add thorough coverage for new executor features
eywalker aa4c1cf
refactor(function-node): use CachedFunctionPod for result caching
eywalker cb4550d
fix(cached-function-pod): cache by packet hash only, not tag
eywalker 94999e5
test(function-node): add comprehensive pipeline + result cache tests
eywalker 5edb463
refactor(caching): extract shared ResultCache from CachedPacketFuncti…
eywalker 7776cff
test(result-cache): add direct unit tests for ResultCache
eywalker f7fe3e0
fix: address Copilot review feedback
eywalker File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| """CachedFunctionPod — pod-level caching wrapper that intercepts process_packet().""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import logging | ||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| from orcapod.core.function_pod import WrappedFunctionPod | ||
| from orcapod.core.result_cache import ResultCache | ||
| from orcapod.protocols.core_protocols import ( | ||
| FunctionPodProtocol, | ||
| PacketProtocol, | ||
| StreamProtocol, | ||
| TagProtocol, | ||
| ) | ||
| from orcapod.protocols.database_protocols import ArrowDatabaseProtocol | ||
|
|
||
| if TYPE_CHECKING: | ||
| import pyarrow as pa | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class CachedFunctionPod(WrappedFunctionPod): | ||
| """Pod-level caching wrapper that intercepts ``process_packet()``. | ||
|
|
||
| Caches at the ``process_packet(tag, packet)`` level using only the | ||
| **input packet content hash** as the cache key — the output of a | ||
| packet function depends solely on the packet, not the tag. | ||
|
|
||
| Tag-level provenance tracking (tag + system tags + packet hash) is | ||
| handled separately by ``FunctionNode.add_pipeline_record``. | ||
|
|
||
| Uses a shared ``ResultCache`` for lookup/store/conflict-resolution | ||
| logic (same mechanism as ``CachedPacketFunction``). | ||
| """ | ||
|
|
||
| # Expose RESULT_COMPUTED_FLAG from the shared ResultCache | ||
| RESULT_COMPUTED_FLAG = ResultCache.RESULT_COMPUTED_FLAG | ||
|
|
||
| def __init__( | ||
| self, | ||
| function_pod: FunctionPodProtocol, | ||
| result_database: ArrowDatabaseProtocol, | ||
| record_path_prefix: tuple[str, ...] = (), | ||
| auto_flush: bool = True, | ||
| **kwargs, | ||
| ) -> None: | ||
| super().__init__(function_pod, **kwargs) | ||
| self._record_path_prefix = record_path_prefix | ||
| self._cache = ResultCache( | ||
| result_database=result_database, | ||
| record_path=record_path_prefix + self.uri, | ||
| auto_flush=auto_flush, | ||
| ) | ||
|
|
||
| @property | ||
| def _result_database(self) -> ArrowDatabaseProtocol: | ||
| """The underlying result database (for FunctionNode access).""" | ||
| return self._cache.result_database | ||
|
|
||
| @property | ||
| def record_path(self) -> tuple[str, ...]: | ||
| """Return the path to the cached records in the result store.""" | ||
| return self._cache.record_path | ||
|
|
||
| def process_packet( | ||
| self, tag: TagProtocol, packet: PacketProtocol | ||
| ) -> tuple[TagProtocol, PacketProtocol | None]: | ||
| """Process a packet with pod-level caching. | ||
|
|
||
| The cache key is the input packet content hash only — the function | ||
| output depends solely on the packet, not the tag. The output | ||
| packet carries a ``RESULT_COMPUTED_FLAG`` meta value: ``True`` if | ||
| freshly computed, ``False`` if retrieved from cache. | ||
|
|
||
| Args: | ||
| tag: The tag associated with the packet. | ||
| packet: The input packet to process. | ||
|
|
||
| Returns: | ||
| A ``(tag, output_packet)`` tuple; output_packet is ``None`` | ||
| if the inner function filters the packet out. | ||
| """ | ||
| cached = self._cache.lookup(packet) | ||
| if cached is not None: | ||
| logger.info("Pod-level cache hit") | ||
| return tag, cached | ||
|
|
||
| tag, output = self._function_pod.process_packet(tag, packet) | ||
| if output is not None: | ||
| pf = self._function_pod.packet_function | ||
| self._cache.store( | ||
| packet, | ||
| output, | ||
| variation_data=pf.get_function_variation_data(), | ||
| execution_data=pf.get_execution_data(), | ||
| ) | ||
| output = output.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: True}) | ||
| return tag, output | ||
|
|
||
| async def async_process_packet( | ||
| self, tag: TagProtocol, packet: PacketProtocol | ||
| ) -> tuple[TagProtocol, PacketProtocol | None]: | ||
| """Async counterpart of ``process_packet``. | ||
|
|
||
| DB lookup and store are synchronous (DB protocol is sync), but the | ||
| actual computation uses the inner pod's ``async_process_packet`` | ||
| for true async execution. | ||
| """ | ||
| cached = self._cache.lookup(packet) | ||
| if cached is not None: | ||
| logger.info("Pod-level cache hit") | ||
| return tag, cached | ||
|
|
||
| tag, output = await self._function_pod.async_process_packet(tag, packet) | ||
| if output is not None: | ||
| pf = self._function_pod.packet_function | ||
| self._cache.store( | ||
| packet, | ||
| output, | ||
| variation_data=pf.get_function_variation_data(), | ||
| execution_data=pf.get_execution_data(), | ||
| ) | ||
| output = output.with_meta_columns(**{self.RESULT_COMPUTED_FLAG: True}) | ||
| return tag, output | ||
|
|
||
| def get_all_cached_outputs( | ||
| self, include_system_columns: bool = False | ||
| ) -> "pa.Table | None": | ||
| """Return all cached records from the result store for this pod.""" | ||
| return self._cache.get_all_records( | ||
| include_system_columns=include_system_columns | ||
| ) | ||
|
|
||
| def process( | ||
| self, *streams: StreamProtocol, label: str | None = None | ||
| ) -> StreamProtocol: | ||
| """Invoke the inner pod but with pod-level caching on process_packet. | ||
|
|
||
| The stream returned uses *this* pod's ``process_packet`` (which | ||
| includes caching) rather than the inner pod's. | ||
| """ | ||
| from orcapod.core.function_pod import FunctionPodStream | ||
|
|
||
| # Validate and prepare the input stream | ||
| input_stream = self._function_pod.handle_input_streams(*streams) | ||
| self._function_pod.validate_inputs(*streams) | ||
|
|
||
| return FunctionPodStream( | ||
| function_pod=self, | ||
| input_stream=input_stream, | ||
| label=label, | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.