From 20c382e816e85ab6de7382db7464e20712cf6c84 Mon Sep 17 00:00:00 2001 From: Wen-Tien Chang Date: Tue, 18 Nov 2025 12:43:41 +0800 Subject: [PATCH] Add prompt_cache_retention to ModelSettings --- src/agents/model_settings.py | 6 ++++++ src/agents/models/openai_chatcompletions.py | 1 + src/agents/models/openai_responses.py | 1 + tests/model_settings/test_serialization.py | 1 + 4 files changed, 9 insertions(+) diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py index 6a3dbd04c..0707f1dd4 100644 --- a/src/agents/model_settings.py +++ b/src/agents/model_settings.py @@ -116,6 +116,12 @@ class ModelSettings: For Responses API: automatically enabled when not specified. For Chat Completions API: disabled when not specified.""" + prompt_cache_retention: Literal["in_memory", "24h"] | None = None + """The retention policy for the prompt cache. Set to `24h` to enable extended + prompt caching, which keeps cached prefixes active for longer, up to a maximum + of 24 hours. + [Learn more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).""" + include_usage: bool | None = None """Whether to include usage chunk. Only available for Chat Completions API.""" diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index d6cf662d2..594848d3e 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -308,6 +308,7 @@ async def _fetch_response( reasoning_effort=self._non_null_or_omit(reasoning_effort), verbosity=self._non_null_or_omit(model_settings.verbosity), top_logprobs=self._non_null_or_omit(model_settings.top_logprobs), + prompt_cache_retention=self._non_null_or_omit(model_settings.prompt_cache_retention), extra_headers=self._merge_headers(model_settings), extra_query=model_settings.extra_query, extra_body=model_settings.extra_body, diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index d52c13b36..4588937cb 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -326,6 +326,7 @@ async def _fetch_response( extra_body=model_settings.extra_body, text=response_format, store=self._non_null_or_omit(model_settings.store), + prompt_cache_retention=self._non_null_or_omit(model_settings.prompt_cache_retention), reasoning=self._non_null_or_omit(model_settings.reasoning), metadata=self._non_null_or_omit(model_settings.metadata), **extra_args, diff --git a/tests/model_settings/test_serialization.py b/tests/model_settings/test_serialization.py index f099a1a31..97314d270 100644 --- a/tests/model_settings/test_serialization.py +++ b/tests/model_settings/test_serialization.py @@ -56,6 +56,7 @@ def test_all_fields_serialization() -> None: reasoning=Reasoning(), metadata={"foo": "bar"}, store=False, + prompt_cache_retention="24h", include_usage=False, response_include=["reasoning.encrypted_content"], top_logprobs=1,