From 1347abb7c7155a4e63a629f4fec4bbb846b0a48c Mon Sep 17 00:00:00 2001 From: Dileep Yavanamandha Date: Sun, 7 Jun 2026 15:23:40 -0700 Subject: [PATCH] Enable persistent CoT for hidden models --- extensions/copilot/package.json | 9 ++++ extensions/copilot/package.nls.json | 1 + .../common/configurationService.ts | 2 + .../platform/endpoint/node/responsesApi.ts | 9 ++-- .../endpoint/node/test/responsesApi.spec.ts | 54 +++++++++++++++++++ .../platform/networking/common/networking.ts | 2 +- 6 files changed, 73 insertions(+), 4 deletions(-) diff --git a/extensions/copilot/package.json b/extensions/copilot/package.json index c4a2c922b314e8..c1c7a59fd07c49 100644 --- a/extensions/copilot/package.json +++ b/extensions/copilot/package.json @@ -4045,6 +4045,15 @@ "onExp" ] }, + "github.copilot.chat.responsesApi.persistentCoT.enabled": { + "type": "boolean", + "default": false, + "markdownDescription": "%github.copilot.config.responsesApi.persistentCoT.enabled%", + "tags": [ + "experimental", + "onExp" + ] + }, "github.copilot.chat.updated53CodexPrompt.enabled": { "type": "boolean", "default": true, diff --git a/extensions/copilot/package.nls.json b/extensions/copilot/package.nls.json index 620a86a2f07a51..08f101fad3efbe 100644 --- a/extensions/copilot/package.nls.json +++ b/extensions/copilot/package.nls.json @@ -345,6 +345,7 @@ "github.copilot.config.responsesApiReasoningSummary": "Sets the reasoning summary style used for the Responses API. Requires `#github.copilot.chat.useResponsesApi#`.", "github.copilot.config.responsesApiContextManagement.enabled": "Enables context management for the Responses API. Requires `#github.copilot.chat.useResponsesApi#`.", "github.copilot.config.responsesApi.promptCacheKey.enabled": "Enables prompt cache key being set for the Responses API.", + "github.copilot.config.responsesApi.persistentCoT.enabled": "Enables persistent chain of thought for supported Responses API models.", "github.copilot.config.updated53CodexPrompt.enabled": "Enables the updated prompt for gpt-5.3-codex model.", "github.copilot.config.claude47OpusPrompt.enabled": "Enables the updated system prompt tuned for the Claude Opus 4.7 model.", "github.copilot.config.gpt55GetChangedFilesTool.enabled": "Enables the Get Changed Files tool for gpt-5.5 models.", diff --git a/extensions/copilot/src/platform/configuration/common/configurationService.ts b/extensions/copilot/src/platform/configuration/common/configurationService.ts index ed50d017d908ba..9da3de2dd23989 100644 --- a/extensions/copilot/src/platform/configuration/common/configurationService.ts +++ b/extensions/copilot/src/platform/configuration/common/configurationService.ts @@ -968,6 +968,8 @@ export namespace ConfigKey { export const ResponsesApiContextManagementEnabled = defineSetting('chat.responsesApiContextManagement.enabled', ConfigType.ExperimentBased, false); /** Enable client-side prompt_cache_key (conversationId:modelFamily) sent to Responses API */ export const ResponsesApiPromptCacheKeyEnabled = defineSetting('chat.responsesApi.promptCacheKey.enabled', ConfigType.ExperimentBased, false); + /** Enable persistent chain of thought for supported Responses API model families */ + export const ResponsesApiPersistentCoTEnabled = defineSetting('chat.responsesApi.persistentCoT.enabled', ConfigType.ExperimentBased, false); /** Enable updated prompt for 5.3Codex model */ export const Updated53CodexPromptEnabled = defineSetting('chat.updated53CodexPrompt.enabled', ConfigType.ExperimentBased, true); /** Enable updated prompt for Claude Opus 4.7 model */ diff --git a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts index d7ea339da1c417..5660d746d55bb2 100644 --- a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts +++ b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts @@ -27,7 +27,7 @@ import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManage import { IExperimentationService } from '../../telemetry/common/nullExperimentationService'; import { ITelemetryService } from '../../telemetry/common/telemetry'; import { TelemetryData } from '../../telemetry/common/telemetryData'; -import { getVerbosityForModelSync } from '../common/chatModelCapabilities'; +import { getVerbosityForModelSync, isHiddenModelM } from '../common/chatModelCapabilities'; import { rawPartAsCompactionData } from '../common/compactionDataContainer'; import { rawPartAsPhaseData } from '../common/phaseDataContainer'; import { getIndexOfStatefulMarker, getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer'; @@ -163,10 +163,13 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options: ? (effortFromSetting || options.modelCapabilities?.reasoningEffort || 'medium') : undefined; const summary = summaryConfig === 'off' || shouldDisableReasoningSummary ? undefined : summaryConfig; - if (effort || summary) { + const persistentCoTEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, expService) + && isHiddenModelM(endpoint); + if (effort || summary || persistentCoTEnabled) { body.reasoning = { ...(effort ? { effort } : {}), - ...(summary ? { summary } : {}) + ...(summary ? { summary } : {}), + ...(persistentCoTEnabled ? { context: 'all_turns' } : {}) }; } diff --git a/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts b/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts index 3ff4682167dd98..c8bd81d3882095 100644 --- a/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts +++ b/extensions/copilot/src/platform/endpoint/node/test/responsesApi.spec.ts @@ -9,6 +9,7 @@ import { describe, expect, it } from 'vitest'; import { TokenizerType } from '../../../../util/common/tokenizer'; import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation'; import { ChatLocation } from '../../../chat/common/commonTypes'; +import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService'; import { ILogService } from '../../../log/common/logService'; import { isOpenAIContextManagementResponse } from '../../../networking/common/fetch'; import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking'; @@ -368,6 +369,59 @@ describe('responseApiInputToRawMessagesForLogging', () => { }); describe('createResponsesRequestBody', () => { + it('enables persistent CoT on initial requests for hidden model M when the experiment is enabled', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true); + const endpoint = { ...testEndpoint, family: 'ember-alpha', supportsReasoningEffort: ['low', 'medium', 'high'] }; + + const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), endpoint.model, endpoint)); + + expect(body.reasoning).toEqual({ effort: 'medium', summary: 'detailed', context: 'all_turns' }); + + accessor.dispose(); + services.dispose(); + }); + + it('does not enable persistent CoT when the experiment is disabled or the family is unsupported', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + const emberEndpoint = { ...testEndpoint, family: 'ember-alpha' }; + const unsupportedEndpoint = { ...testEndpoint, model: 'ember-alpha', family: 'other-family' }; + + const disabledBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), emberEndpoint.model, emberEndpoint)); + accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true); + const unsupportedBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), unsupportedEndpoint.model, unsupportedEndpoint)); + + expect(disabledBody.reasoning?.context).toBeUndefined(); + expect(unsupportedBody.reasoning?.context).toBeUndefined(); + + accessor.dispose(); + services.dispose(); + }); + + it('keeps persistent CoT enabled when continuing from a previous response', () => { + const services = createPlatformServices(); + const accessor = services.createTestingAccessor(); + const instantiationService = accessor.get(IInstantiationService); + accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true); + const endpoint = { ...testEndpoint, family: 'ember-alpha' }; + const messages: Raw.ChatMessage[] = [ + createStatefulMarkerMessage(endpoint.model, 'resp-prev'), + { role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'continue' }] }, + ]; + + const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), endpoint.model, endpoint)); + + expect(body.previous_response_id).toBe('resp-prev'); + expect(body.reasoning?.context).toBe('all_turns'); + + accessor.dispose(); + services.dispose(); + }); + it('extracts compaction threshold from request body context management', () => { expect(getResponsesApiCompactionThresholdFromBody({ context_management: [{ diff --git a/extensions/copilot/src/platform/networking/common/networking.ts b/extensions/copilot/src/platform/networking/common/networking.ts index fe01f95ee555ec..6af13b5dab9a89 100644 --- a/extensions/copilot/src/platform/networking/common/networking.ts +++ b/extensions/copilot/src/platform/networking/common/networking.ts @@ -75,7 +75,7 @@ export interface IEndpointBody { prediction?: Prediction; messages?: any[]; n?: number; - reasoning?: { effort?: string; summary?: string }; + reasoning?: { effort?: string; summary?: string; context?: 'current_turn' | 'all_turns' }; tool_choice?: OptionalChatRequestParams['tool_choice'] | { type: 'function'; name: string } | string; top_logprobs?: number; intent?: boolean;