Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ts/config.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ openAI:
# modelProvider: copilot
# copilot:
# defaultModel: claude-sonnet-4.5 # requires an authenticated `copilot` CLI
# # Optional: connect to an already-running Copilot CLI server over TCP
# # instead of spawning a CLI child per host. Eliminates the one-time
# # (multi-second) CLI startup latency. Format: "host:port" / "port".
# # Mutually exclusive with cliPath. Start the server with the Copilot CLI's
# # server/headless mode first, then point cliUrl at it.
# # cliUrl: 127.0.0.1:8123
# embedding:
# provider: local
# model: Xenova/all-MiniLM-L6-v2
Expand Down
228 changes: 217 additions & 11 deletions ts/packages/aiclient/src/copilotModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,76 @@ import {
CompleteUsageStatsCallback,
} from "./models.js";
import { CompletionUsageStats } from "./openai.js";
import { CopilotApiSettings } from "./copilotSettings.js";
import {
CopilotApiSettings,
copilotApiSettingsFromConfig,
} from "./copilotSettings.js";
import { TokenCounter } from "./tokenCounter.js";

const debug = registerDebug("typeagent:aiclient:copilot");
// Per-phase latency breakdown (client start / session create / send / total)
// and real model time vs. SDK/session overhead. Enable with
// DEBUG=typeagent:aiclient:copilot:timing.
const debugTiming = registerDebug("typeagent:aiclient:copilot:timing");
// The final, transformed prompt the CLI actually sends to the model (shows
// any XML wrapping / augmentation bloat added on top of our prompt). Enable
// with DEBUG=typeagent:aiclient:copilot:prompt.
const debugPrompt = registerDebug("typeagent:aiclient:copilot:prompt");

type CopilotSdkLogLevel =
| "none"
| "error"
| "warning"
| "info"
| "debug"
| "all";

// Optional passthrough to the SDK's own stderr timing logs. Set
// TYPEAGENT_COPILOT_SDK_LOG_LEVEL=debug (or all) to surface CLI-side timing.
function sdkLogLevel(): CopilotSdkLogLevel | undefined {
const v = process.env.TYPEAGENT_COPILOT_SDK_LOG_LEVEL?.trim().toLowerCase();
const valid = ["none", "error", "warning", "info", "debug", "all"];
return v && valid.includes(v) ? (v as CopilotSdkLogLevel) : undefined;
}

// Subscribe to the SDK's usage / final-prompt events so we can attribute the
// wall-clock time of a request to model latency vs. our own session overhead.
// Listeners are only attached when a diagnostic namespace is enabled, so this
// is a no-op on the hot path in production.
function attachDiagnostics(session: CopilotSession): {
getApiDurationMs: () => number | undefined;
dispose: () => void;
} {
let apiDurationMs: number | undefined;
if (!debugTiming.enabled && !debugPrompt.enabled) {
return { getApiDurationMs: () => undefined, dispose: () => {} };
}
const offUsage = session.on("assistant.usage", (event: any) => {
const d = event?.data;
apiDurationMs = d?.duration;
debugTiming(
`assistant.usage model=${d?.model} api=${d?.duration}ms ` +
`in=${d?.inputTokens} out=${d?.outputTokens} ` +
`cacheRead=${d?.cacheReadTokens} providerCallId=${d?.providerCallId}`,
);
});
const offUser = session.on("user.message", (event: any) => {
if (!debugPrompt.enabled) return;
const tc = event?.data?.transformedContent;
if (typeof tc === "string") {
debugPrompt(
`final transformedContent (${tc.length} chars):\n${tc}`,
);
}
});
return {
getApiDurationMs: () => apiDurationMs,
dispose: () => {
offUsage();
offUser();
},
};
}

// Replaces the Copilot CLI's default "terminal coding assistant" system
// prompt. In "append" mode the runtime still ships its full base persona
Expand All @@ -42,8 +108,20 @@ const TRANSLATION_SYSTEM_PROMPT =
let cachedClient: CopilotClient | undefined;
let startPromise: Promise<CopilotClient> | undefined;
let cachedCliPath: string | undefined;
let cachedCliUrl: string | undefined;
let exitHandlerInstalled = false;

export interface CopilotClientOptions {
/** Path to the copilot CLI binary. Ignored when `cliUrl` is set. */
cliPath?: string | undefined;
/**
* URL of an already-running Copilot CLI server ("host:port"). When set,
* the SDK connects over TCP instead of spawning a CLI child, avoiding
* process startup latency. Mutually exclusive with `cliPath`.
*/
cliUrl?: string | undefined;
}

function findCopilotPath(): string {
try {
const isWindows = process.platform === "win32";
Expand All @@ -58,26 +136,43 @@ function findCopilotPath(): string {
}
}

async function getClient(cliPathOverride?: string): Promise<CopilotClient> {
async function getClient(
options?: CopilotClientOptions,
): Promise<CopilotClient> {
if (cachedClient) return cachedClient;
if (startPromise) return startPromise;

const cliPath = cliPathOverride ?? cachedCliPath ?? findCopilotPath();
const cliUrl = options?.cliUrl ?? cachedCliUrl;
cachedCliUrl = cliUrl;
// When connecting to an external CLI server, cliPath is ignored (the two
// are mutually exclusive in the SDK).
const cliPath = cliUrl
? undefined
: (options?.cliPath ?? cachedCliPath ?? findCopilotPath());
cachedCliPath = cliPath;

startPromise = (async () => {
debug(`Starting CopilotClient at ${cliPath}`);
const client = new CopilotClient({ cliPath });
const target = cliUrl ? `server ${cliUrl}` : `CLI ${cliPath}`;
debug(`Starting CopilotClient (${target})`);
const tStart = Date.now();
const level = sdkLogLevel();
const client = new CopilotClient({
...(cliUrl ? { cliUrl } : { cliPath: cliPath! }),
...(level ? { logLevel: level } : {}),
});
try {
await client.start();
} catch (err) {
startPromise = undefined;
throw new Error(
`Failed to start GitHub Copilot CLI client at '${cliPath}'. ` +
`Ensure 'copilot' is installed and authenticated (try 'copilot auth login').\n` +
`Failed to start GitHub Copilot CLI client (${target}). ` +
(cliUrl
? `Ensure a Copilot CLI server is running and reachable at '${cliUrl}'.\n`
: `Ensure 'copilot' is installed and authenticated (try 'copilot auth login').\n`) +
`Underlying error: ${err instanceof Error ? err.message : String(err)}`,
);
}
debugTiming(`client.start ${Date.now() - tStart}ms`);
cachedClient = client;
if (!exitHandlerInstalled) {
exitHandlerInstalled = true;
Expand All @@ -97,9 +192,70 @@ async function getClient(cliPathOverride?: string): Promise<CopilotClient> {
* spawn two CLI children.
*/
export async function getCopilotClient(
cliPathOverride?: string,
options?: CopilotClientOptions | string,
): Promise<CopilotClient> {
return getClient(cliPathOverride);
return getClient(
typeof options === "string" ? { cliPath: options } : options,
);
}

/**
* Eagerly start (or connect to) the Copilot CLI so the first user-visible
* request doesn't pay the one-time spawn/startup cost (measured at several
* seconds cold). Safe to call multiple times — it reuses the singleton and
* swallows errors (a failed pre-warm just means the first real request will
* retry and surface the error normally).
*/
export async function warmupCopilotClient(
options?: CopilotClientOptions,
sessionConfig?: SessionConfig,
): Promise<void> {
let client: CopilotClient;
try {
client = await getClient(options);
debug("Copilot client pre-warmed");
} catch (err) {
debug(
`Copilot client pre-warm failed (will retry on first request): ${
err instanceof Error ? err.message : String(err)
}`,
);
return;
}
// Warm the CLI-side session subsystem with a throwaway session so the
// first real request doesn't pay the cold createSession cost (~1s+ on the
// first session of a process). Best-effort: session creation is auth-free
// (only sendAndWait needs auth), so this succeeds even when unauthenticated
// or pointed at an external server.
if (sessionConfig) {
try {
const t = Date.now();
const session = await client.createSession(sessionConfig);
await session.disconnect().catch(() => {});
debugTiming(`warm session ${Date.now() - t}ms`);
debug("Copilot session subsystem pre-warmed");
} catch (err) {
debug(
`Copilot session pre-warm skipped: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
}
}

/**
* Pre-warm the Copilot client and session subsystem using the values from the
* active runtime config. Called at host startup (from runtimeConfig) when the
* provider is Copilot so the first user request avoids both the CLI spawn and
* the cold session-creation cost.
*/
export async function warmupCopilotFromConfig(): Promise<void> {
const settings = copilotApiSettingsFromConfig();
await warmupCopilotClient(
{ cliPath: settings.cliPath, cliUrl: settings.cliUrl },
buildSessionConfig(settings, {}, false),
);
}

function withAbortSignal<T>(
Expand Down Expand Up @@ -240,15 +396,28 @@ export function createCopilotChatModel(
? [{ role: "user", content: prompt }]
: prompt;
const promptText = renderPrompt(messages);
if (debugPrompt.enabled) {
debugPrompt(
`prompt to Copilot: ${promptText.length} chars ` +
`(~${estimateTokens(promptText)} tokens), model=${settings.modelName}`,
);
}

const tTotal = Date.now();
let client: CopilotClient;
const tClient = Date.now();
try {
client = await getClient(settings.cliPath);
client = await getClient({
cliPath: settings.cliPath,
cliUrl: settings.cliUrl,
});
} catch (err) {
return error(err instanceof Error ? err.message : String(err));
}
debugTiming(`getClient ${Date.now() - tClient}ms`);

let session: CopilotSession;
const tCreate = Date.now();
try {
session = await client.createSession(
buildSessionConfig(settings, completionSettings!, false),
Expand All @@ -258,13 +427,23 @@ export function createCopilotChatModel(
`Failed to create Copilot session: ${err instanceof Error ? err.message : String(err)}`,
);
}
debugTiming(`createSession ${Date.now() - tCreate}ms`);

const diag = attachDiagnostics(session);
try {
const tSend = Date.now();
const response: AssistantMessageEvent | undefined =
await withAbortSignal(
session.sendAndWait({ prompt: promptText }),
signal,
);
const sendMs = Date.now() - tSend;
const apiMs = diag.getApiDurationMs();
debugTiming(
`sendAndWait ${sendMs}ms ` +
`(model≈${apiMs ?? "?"}ms, ` +
`sdk/session overhead≈${apiMs !== undefined ? sendMs - apiMs : "?"}ms)`,
);
const text = response?.data?.content ?? "";
if (model.completionCallback) {
model.completionCallback(
Expand All @@ -286,7 +465,9 @@ export function createCopilotChatModel(
} catch (err) {
return error(err instanceof Error ? err.message : String(err));
} finally {
diag.dispose();
session.disconnect().catch(() => {});
debugTiming(`complete total ${Date.now() - tTotal}ms`);
}
}

Expand All @@ -302,15 +483,28 @@ export function createCopilotChatModel(
? [{ role: "user", content: prompt }]
: prompt;
const promptText = renderPrompt(messages);
if (debugPrompt.enabled) {
debugPrompt(
`stream prompt to Copilot: ${promptText.length} chars ` +
`(~${estimateTokens(promptText)} tokens), model=${settings.modelName}`,
);
}

const tTotal = Date.now();
let client: CopilotClient;
const tClient = Date.now();
try {
client = await getClient(settings.cliPath);
client = await getClient({
cliPath: settings.cliPath,
cliUrl: settings.cliUrl,
});
} catch (err) {
return error(err instanceof Error ? err.message : String(err));
}
debugTiming(`getClient ${Date.now() - tClient}ms`);

let session: CopilotSession;
const tCreate = Date.now();
try {
session = await client.createSession(
buildSessionConfig(settings, completionSettings!, true),
Expand All @@ -320,7 +514,9 @@ export function createCopilotChatModel(
`Failed to create Copilot session: ${err instanceof Error ? err.message : String(err)}`,
);
}
debugTiming(`createSession ${Date.now() - tCreate}ms`);

const diag = attachDiagnostics(session);
// Buffer streaming deltas; the consumer pulls from a queue.
const queue: Array<{ value?: string; done?: boolean; err?: Error }> =
[];
Expand Down Expand Up @@ -361,11 +557,19 @@ export function createCopilotChatModel(
},
);

const tSend = Date.now();
const completion = withAbortSignal(
session.sendAndWait({ prompt: promptText }),
signal,
)
.then((response) => {
const sendMs = Date.now() - tSend;
const apiMs = diag.getApiDurationMs();
debugTiming(
`sendAndWait(stream) ${sendMs}ms ` +
`(model≈${apiMs ?? "?"}ms, ` +
`sdk/session overhead≈${apiMs !== undefined ? sendMs - apiMs : "?"}ms)`,
);
if (model.completionCallback) {
model.completionCallback(
{ prompt: promptText, model: settings.modelName },
Expand Down Expand Up @@ -427,8 +631,10 @@ export function createCopilotChatModel(
cleaned = true;
unsubscribeDelta();
unsubscribeMessage();
diag.dispose();
await completion.catch(() => {});
await session.disconnect().catch(() => {});
debugTiming(`completeStream total ${Date.now() - tTotal}ms`);
}

return success(iterator);
Expand Down
2 changes: 2 additions & 0 deletions ts/packages/aiclient/src/copilotSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export type CopilotApiSettings = CommonApiSettings & {
modelType: ModelType.Chat;
modelName: string;
cliPath?: string;
cliUrl?: string;
reasoningEffort?: CopilotReasoningEffort;
disableInfiniteSessions: boolean;
};
Expand All @@ -29,6 +30,7 @@ export function copilotApiSettingsFromConfig(
endpoint: "copilot-cli",
modelName: resolvedModel,
...(copilot?.cliPath !== undefined ? { cliPath: copilot.cliPath } : {}),
...(copilot?.cliUrl !== undefined ? { cliUrl: copilot.cliUrl } : {}),
...(copilot?.reasoningEffort !== undefined
? { reasoningEffort: copilot.reasoningEffort }
: {}),
Expand Down
7 changes: 6 additions & 1 deletion ts/packages/aiclient/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ export * as bing from "./bing.js";
export * from "./restClient.js";
export * from "./auth.js";
export * from "./tokenCounter.js";
export { getCopilotClient } from "./copilotModels.js";
export {
getCopilotClient,
warmupCopilotClient,
warmupCopilotFromConfig,
type CopilotClientOptions,
} from "./copilotModels.js";
export {
copilotApiSettingsFromConfig,
type CopilotApiSettings,
Expand Down
Loading
Loading