From 4d1c9a75e45d6a5380531f06a671b8570e1a649d Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Tue, 18 Nov 2025 15:58:36 +0900 Subject: [PATCH] feat: Add prompt_cache_retention option to ModelSettings --- .changeset/thirty-clubs-smell.md | 6 ++++ packages/agents-core/src/model.ts | 6 ++++ .../src/openaiChatCompletionsModel.ts | 1 + .../agents-openai/src/openaiResponsesModel.ts | 1 + .../test/openaiChatCompletionsModel.test.ts | 31 +++++++++++++++++++ .../test/openaiResponsesModel.test.ts | 27 ++++++++++++++++ 6 files changed, 72 insertions(+) create mode 100644 .changeset/thirty-clubs-smell.md diff --git a/.changeset/thirty-clubs-smell.md b/.changeset/thirty-clubs-smell.md new file mode 100644 index 00000000..971a71f1 --- /dev/null +++ b/.changeset/thirty-clubs-smell.md @@ -0,0 +1,6 @@ +--- +'@openai/agents-openai': patch +'@openai/agents-core': patch +--- + +feat: Add prompt_cache_retention option to ModelSettings diff --git a/packages/agents-core/src/model.ts b/packages/agents-core/src/model.ts index cf23893b..bcfe65dd 100644 --- a/packages/agents-core/src/model.ts +++ b/packages/agents-core/src/model.ts @@ -122,6 +122,12 @@ export type ModelSettings = { */ store?: boolean; + /** + * Enables prompt caching and controls how long cached content should be retained by the model provider. + * See https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention for the available options. + */ + promptCacheRetention?: 'in-memory' | '24h' | null; + /** * The reasoning settings to use when calling the model. */ diff --git a/packages/agents-openai/src/openaiChatCompletionsModel.ts b/packages/agents-openai/src/openaiChatCompletionsModel.ts index 66a0ddc3..f445a4d0 100644 --- a/packages/agents-openai/src/openaiChatCompletionsModel.ts +++ b/packages/agents-openai/src/openaiChatCompletionsModel.ts @@ -337,6 +337,7 @@ export class OpenAIChatCompletionsModel implements Model { parallel_tool_calls: parallelToolCalls, stream, store: request.modelSettings.store, + prompt_cache_retention: request.modelSettings.promptCacheRetention, ...providerData, }; diff --git a/packages/agents-openai/src/openaiResponsesModel.ts b/packages/agents-openai/src/openaiResponsesModel.ts index f63acb12..373e750c 100644 --- a/packages/agents-openai/src/openaiResponsesModel.ts +++ b/packages/agents-openai/src/openaiResponsesModel.ts @@ -1622,6 +1622,7 @@ export class OpenAIResponsesModel implements Model { stream, text: responseFormat, store: request.modelSettings.store, + prompt_cache_retention: request.modelSettings.promptCacheRetention, ...restOfProviderData, }; diff --git a/packages/agents-openai/test/openaiChatCompletionsModel.test.ts b/packages/agents-openai/test/openaiChatCompletionsModel.test.ts index f50c566a..c53219e3 100644 --- a/packages/agents-openai/test/openaiChatCompletionsModel.test.ts +++ b/packages/agents-openai/test/openaiChatCompletionsModel.test.ts @@ -137,6 +137,37 @@ describe('OpenAIChatCompletionsModel', () => { ]); }); + it('sends prompt cache retention when provided', async () => { + const client = new FakeClient(); + const response = { + id: 'r', + choices: [{ message: { content: 'cached' } }], + usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, + } as any; + client.chat.completions.create.mockResolvedValue(response); + + const model = new OpenAIChatCompletionsModel(client as any, 'gpt'); + const req: any = { + input: 'u', + modelSettings: { + promptCacheRetention: '24h', + }, + tools: [], + outputType: 'text', + handoffs: [], + tracing: false, + }; + + await withTrace('t', () => model.getResponse(req)); + + expect(client.chat.completions.create).toHaveBeenCalledWith( + expect.objectContaining({ + prompt_cache_retention: '24h', + }), + { headers: HEADERS, signal: undefined }, + ); + }); + it('handles refusal message', async () => { const client = new FakeClient(); const response = { diff --git a/packages/agents-openai/test/openaiResponsesModel.test.ts b/packages/agents-openai/test/openaiResponsesModel.test.ts index a993d2cb..13ed1524 100644 --- a/packages/agents-openai/test/openaiResponsesModel.test.ts +++ b/packages/agents-openai/test/openaiResponsesModel.test.ts @@ -74,6 +74,33 @@ describe('OpenAIResponsesModel', () => { }); }); + it('sends prompt cache retention setting to the Responses API', async () => { + await withTrace('test', async () => { + const fakeResponse = { id: 'res-cache', usage: {}, output: [] }; + const createMock = vi.fn().mockResolvedValue(fakeResponse); + const fakeClient = { + responses: { create: createMock }, + } as unknown as OpenAI; + const model = new OpenAIResponsesModel(fakeClient, 'gpt-cache'); + + const request = { + systemInstructions: undefined, + input: 'hello', + modelSettings: { promptCacheRetention: 'in-memory' }, + tools: [], + outputType: 'text', + handoffs: [], + tracing: false, + signal: undefined, + }; + + await model.getResponse(request as any); + + const [args] = createMock.mock.calls[0]; + expect(args.prompt_cache_retention).toBe('in-memory'); + }); + }); + it('still sends an empty tools array when no prompt is provided', async () => { await withTrace('test', async () => { const fakeResponse = { id: 'res-no-prompt', usage: {}, output: [] };