Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/neat-ligers-notice.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@openai/agents-core": patch
"@openai/agents-openai": patch
---

feat: track token usage while streaming responses for openai models
1 change: 0 additions & 1 deletion packages/agents-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ export {
FunctionTool,
FunctionToolResult,
Tool,
ToolOptions,
tool,
ToolExecuteArgument,
ToolEnabledFunction,
Expand Down
1 change: 1 addition & 0 deletions packages/agents-core/src/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@ export class Runner extends RunHooks<any, AgentOutputType<unknown>> {
output: parsed.response.output,
responseId: parsed.response.id,
};
result.state._context.usage.add(finalResponse.usage);
}
if (result.cancelled) {
// When the user's code exits a loop to consume the stream, we need to break
Expand Down
3 changes: 1 addition & 2 deletions packages/agents-core/src/runContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ export class RunContext<TContext = UnknownContext> {
context: TContext;

/**
* The usage of the agent run so far. For streamed responses, the usage will be stale until the
* last chunk of the stream is processed.
* The usage of the agent run so far. For streamed responses, the usage will be updated in real-time
*/
usage: Usage;

Expand Down
175 changes: 175 additions & 0 deletions packages/agents-core/test/run.stream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,181 @@ describe('Runner.run (streaming)', () => {
expect(runnerEndEvents[0].output).toBe('Final output');
});

it('updates cumulative usage during streaming responses', async () => {
const testTool = tool({
name: 'calculator',
description: 'Does math',
parameters: z.object({ value: z.number() }),
execute: async ({ value }) => `result: ${value * 2}`,
});

const firstResponse: ModelResponse = {
output: [
{
type: 'function_call',
id: 'fc_1',
callId: 'call_1',
name: 'calculator',
status: 'completed',
arguments: JSON.stringify({ value: 5 }),
} as protocol.FunctionCallItem,
],
usage: new Usage({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }),
};

const secondResponse: ModelResponse = {
output: [fakeModelMessage('The answer is 10')],
usage: new Usage({ inputTokens: 20, outputTokens: 10, totalTokens: 30 }),
};

class MultiTurnStreamingModel implements Model {
#callCount = 0;

async getResponse(_req: ModelRequest): Promise<ModelResponse> {
const current = this.#callCount++;
return current === 0 ? firstResponse : secondResponse;
}

async *getStreamedResponse(
req: ModelRequest,
): AsyncIterable<StreamEvent> {
const response = await this.getResponse(req);
yield {
type: 'response_done',
response: {
id: `r_${this.#callCount}`,
usage: {
requests: 1,
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
totalTokens: response.usage.totalTokens,
},
output: response.output,
},
} as any;
}
}

const agent = new Agent({
name: 'UsageTracker',
model: new MultiTurnStreamingModel(),
tools: [testTool],
});

const runner = new Runner();
const result = await runner.run(agent, 'calculate', { stream: true });

const totals: number[] = [];
for await (const event of result.toStream()) {
if (
event.type === 'raw_model_stream_event' &&
event.data.type === 'response_done'
) {
totals.push(result.state.usage.totalTokens);
}
}
await result.completed;

expect(totals).toEqual([15, 45]);
expect(result.state.usage.inputTokens).toBe(30);
expect(result.state.usage.outputTokens).toBe(15);
expect(result.state.usage.requestUsageEntries?.length).toBe(2);
expect(result.finalOutput).toBe('The answer is 10');
});

it('allows aborting a stream based on cumulative usage', async () => {
const testTool = tool({
name: 'expensive',
description: 'Uses lots of tokens',
parameters: z.object({}),
execute: async () => 'expensive result',
});

const responses: ModelResponse[] = [
{
output: [
{
type: 'function_call',
id: 'fc_1',
callId: 'call_1',
name: 'expensive',
status: 'completed',
arguments: '{}',
} as protocol.FunctionCallItem,
],
usage: new Usage({
inputTokens: 5000,
outputTokens: 2000,
totalTokens: 7000,
}),
},
{
output: [fakeModelMessage('continuing...')],
usage: new Usage({
inputTokens: 6000,
outputTokens: 3000,
totalTokens: 9000,
}),
},
];

class ExpensiveStreamingModel implements Model {
#callCount = 0;

async getResponse(_req: ModelRequest): Promise<ModelResponse> {
return responses[this.#callCount++] ?? responses[responses.length - 1];
}

async *getStreamedResponse(
req: ModelRequest,
): AsyncIterable<StreamEvent> {
const response = await this.getResponse(req);
yield {
type: 'response_done',
response: {
id: `r_${this.#callCount}`,
usage: {
requests: 1,
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
totalTokens: response.usage.totalTokens,
},
output: response.output,
},
} as any;
}
}

const agent = new Agent({
name: 'ExpensiveAgent',
model: new ExpensiveStreamingModel(),
tools: [testTool],
});

const runner = new Runner();
const result = await runner.run(agent, 'do expensive work', {
stream: true,
});

const MAX_TOKENS = 10_000;
let aborted = false;

for await (const event of result.toStream()) {
if (
event.type === 'raw_model_stream_event' &&
event.data.type === 'response_done' &&
result.state.usage.totalTokens > MAX_TOKENS
) {
aborted = true;
break;
}
}

expect(aborted).toBe(true);
expect(result.state.usage.totalTokens).toBe(16_000);
expect(result.finalOutput).toBeUndefined();
});

it('streams tool_called before the tool finishes executing', async () => {
let releaseTool: (() => void) | undefined;
const toolExecuted = vi.fn();
Expand Down
1 change: 1 addition & 0 deletions packages/agents-openai/src/openaiChatCompletionsModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ export class OpenAIChatCompletionsModel implements Model {
response_format: responseFormat,
parallel_tool_calls: parallelToolCalls,
stream,
stream_options: stream ? { include_usage: true } : undefined,
store: request.modelSettings.store,
prompt_cache_retention: request.modelSettings.promptCacheRetention,
...providerData,
Expand Down