Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
{
"name": "Python 3",
"image": "mcr.microsoft.com/devcontainers/python:3.13-bullseye",
"image": "mcr.microsoft.com/devcontainers/python:3.14-bookworm",
"features": {
"ghcr.io/va-h/devcontainers-features/uv:1": {},
"ghcr.io/devcontainers/features/azure-cli:1.2.8": {}
"ghcr.io/devcontainers/features/docker-in-docker:3": {},
"ghcr.io/devcontainers/features/azure-cli:1.2.9": {},
"ghcr.io/devcontainers/features/copilot-cli:1": {}
},
"postCreateCommand": "bash ./devsetup.sh",
"workspaceFolder": "/workspaces/agent-framework/python/",
"customizations": {
"vscode": {
"extensions": [
"GitHub.copilot",
"GitHub.vscode-github-actions",
"ms-python.python",
"ms-windows-ai-studio.windows-ai-studio",
"littlefoxteam.vscode-python-test-adapter"
Expand Down
2 changes: 1 addition & 1 deletion dotnet/src/Microsoft.Agents.AI.DurableTask/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
## v1.0.0-preview.260219.1

- [BREAKING] Changed ChatHistory and AIContext Providers to have pipeline semantics ([#3806](https://github.com/microsoft/agent-framework/pull/3806))
- Marked all `RunAsync<T>` overloads as `new`, added missing ones, and added support for primitives and arrays ([#3803](https://github.com/microsoft/agent-framework/pull/3803))
- Marked all `RunAsync<T>` overloads as `new`, added missing ones, and added support for primitives and arrays #3803
- Improve session cast error message quality and consistency ([#3973](https://github.com/microsoft/agent-framework/pull/3973))

## v1.0.0-preview.260212.1
Expand Down
8 changes: 8 additions & 0 deletions python/packages/core/agent_framework/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
CharacterEstimatorTokenizer,
CompactionProvider,
CompactionStrategy,
ContextWindowCompactionStrategy,
SelectiveToolCallCompactionStrategy,
SlidingWindowStrategy,
SummarizationStrategy,
Expand Down Expand Up @@ -79,6 +80,10 @@
tool_calls_present,
)
from ._feature_stage import ExperimentalFeature, ReleaseCandidateFeature
from ._harness._agent import (
DEFAULT_HARNESS_INSTRUCTIONS,
create_harness_agent,
)
from ._harness._background_agents import (
DEFAULT_BACKGROUND_AGENTS_SOURCE_ID,
BackgroundAgentsProvider,
Expand Down Expand Up @@ -304,6 +309,7 @@
"APP_INFO",
"COMPACTION_STATE_KEY",
"DEFAULT_BACKGROUND_AGENTS_SOURCE_ID",
"DEFAULT_HARNESS_INSTRUCTIONS",
"DEFAULT_MAX_ITERATIONS",
"DEFAULT_MEMORY_SOURCE_ID",
"DEFAULT_MODE_SOURCE_ID",
Expand Down Expand Up @@ -362,6 +368,7 @@
"CompactionStrategy",
"Content",
"ContextProvider",
"ContextWindowCompactionStrategy",
"ContinuationToken",
"ConversationSplit",
"ConversationSplitter",
Expand Down Expand Up @@ -509,6 +516,7 @@
"apply_compaction",
"chat_middleware",
"create_edge_runner",
"create_harness_agent",
"detect_media_type_from_base64",
"evaluate_agent",
"evaluate_workflow",
Expand Down
116 changes: 116 additions & 0 deletions python/packages/core/agent_framework/_compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,121 @@ async def after_run(
# whether excluded messages are loaded on the next turn.


class ContextWindowCompactionStrategy:
"""Token-budget compaction derived from a model's context window size.

Computes an input budget from the model's context window and output token
limits, then applies a two-phase compaction pipeline:

1. **Tool result eviction** — collapses older tool-call groups into summaries
when included tokens exceed ``tool_eviction_threshold`` of the input budget.
2. **Truncation** — removes oldest non-system groups when included tokens
exceed ``truncation_threshold`` of the input budget.

The class uses two independent :class:`TokenBudgetComposedStrategy`
instances — one per phase — so each fires only when its own threshold
is exceeded.

Examples:
.. code-block:: python

from agent_framework import ContextWindowCompactionStrategy, CompactionProvider

strategy = ContextWindowCompactionStrategy(
max_context_window_tokens=128_000,
max_output_tokens=16_384,
)
provider = CompactionProvider(before_strategy=strategy)
"""

DEFAULT_TOOL_EVICTION_THRESHOLD: float = 0.5
"""Default fraction of input budget at which tool result eviction triggers."""

DEFAULT_TRUNCATION_THRESHOLD: float = 0.8
"""Default fraction of input budget at which truncation triggers."""

def __init__(
self,
*,
max_context_window_tokens: int,
max_output_tokens: int,
tokenizer: TokenizerProtocol | None = None,
tool_eviction_threshold: float = DEFAULT_TOOL_EVICTION_THRESHOLD,
truncation_threshold: float = DEFAULT_TRUNCATION_THRESHOLD,
keep_last_tool_call_groups: int = 4,
) -> None:
"""Create a context-window compaction strategy.

Keyword Args:
max_context_window_tokens: The model's maximum context window size
in tokens (e.g. 128,000).
max_output_tokens: The model's maximum output tokens per response
(e.g. 16,384).
tokenizer: Token counter for measuring message sizes. Defaults to
:class:`CharacterEstimatorTokenizer` (4 chars/token heuristic).
tool_eviction_threshold: Fraction of input budget (0.0, 1.0] at
which tool result eviction triggers. Defaults to 0.5.
truncation_threshold: Fraction of input budget (0.0, 1.0] at which
truncation triggers. Must be ≥ ``tool_eviction_threshold``.
Defaults to 0.8.
keep_last_tool_call_groups: Number of most recent tool-call groups
to retain verbatim during tool eviction. Older groups are
collapsed into summaries. Defaults to 4.

Raises:
ValueError: If thresholds are out of range or inconsistent.
"""
if max_context_window_tokens <= 0:
raise ValueError("max_context_window_tokens must be positive.")
if max_output_tokens < 0 or max_output_tokens >= max_context_window_tokens:
raise ValueError("max_output_tokens must be >= 0 and < max_context_window_tokens.")
if not (0.0 < tool_eviction_threshold <= 1.0):
raise ValueError("tool_eviction_threshold must be in (0.0, 1.0].")
if not (0.0 < truncation_threshold <= 1.0):
raise ValueError("truncation_threshold must be in (0.0, 1.0].")
if truncation_threshold < tool_eviction_threshold:
raise ValueError("truncation_threshold must be >= tool_eviction_threshold.")

resolved_tokenizer = tokenizer or CharacterEstimatorTokenizer()
input_budget = max_context_window_tokens - max_output_tokens
tool_eviction_tokens = int(input_budget * tool_eviction_threshold)
truncation_tokens = int(input_budget * truncation_threshold)

self.max_context_window_tokens = max_context_window_tokens
self.max_output_tokens = max_output_tokens
self.input_budget_tokens = input_budget
self.tool_eviction_threshold = tool_eviction_threshold
self.truncation_threshold = truncation_threshold

self._tool_eviction = TokenBudgetComposedStrategy(
token_budget=tool_eviction_tokens,
tokenizer=resolved_tokenizer,
strategies=[
ToolResultCompactionStrategy(keep_last_tool_call_groups=keep_last_tool_call_groups),
],
)
self._truncation = TokenBudgetComposedStrategy(
token_budget=truncation_tokens,
tokenizer=resolved_tokenizer,
strategies=[
TruncationStrategy(
max_n=truncation_tokens,
compact_to=tool_eviction_tokens,
tokenizer=resolved_tokenizer,
),
],
)

async def __call__(self, messages: list[Message]) -> bool:
"""Apply the two-phase compaction pipeline.

Returns:
True if compaction changed message inclusion; otherwise False.
"""
changed = await self._tool_eviction(messages)
return (await self._truncation(messages)) or changed


__all__ = [
"COMPACTION_STATE_KEY",
"EXCLUDED_KEY",
Expand All @@ -1293,6 +1408,7 @@ async def after_run(
"CharacterEstimatorTokenizer",
"CompactionProvider",
"CompactionStrategy",
"ContextWindowCompactionStrategy",
"GroupKind",
"SelectiveToolCallCompactionStrategy",
"SlidingWindowStrategy",
Expand Down
Loading
Loading