Skip to content

Streaming extract <think> tags split up over multiple chunks #3007

@phemmer

Description

@phemmer

Initial Checks

Description

When pydantic-ai receives a streaming response from an OpenAI compatible model, it does not extract the <think></think> tags into a separate ThinkingPart. However it works fine when not streaming.

Example Code

@pytest.mark.asyncio
async def test_thinking_nostream():
    agent = Agent(
        model=OpenAIChatModel('', provider='litellm')
    )

    async def fake_completions_create(*args, **kwargs) -> ChatCompletion:
        return ChatCompletion(
            id="abcd",
            choices=[Choice(
                finish_reason="stop",
                index=0,
                message=ChatCompletionMessage(
                    content="<think>\nthinking content</think>\nNormal content.",
                    role="assistant",
                )
            )],
            created=int(datetime.datetime.now().timestamp()),
            model="abcd",
            object="chat.completion",
        )

    agent.model._completions_create = fake_completions_create
    result = await agent.run("asdf")

    msgs = result.new_messages()
    parts = msgs[-1].parts
    assert isinstance(parts[0], ThinkingPart)
    assert parts[0].content.strip() == "thinking content"
    assert isinstance(parts[1], TextPart)
    assert parts[1].content.strip() == "Normal content."

@pytest.mark.asyncio
async def test_thinking_stream(httpx_mock: HTTPXMock):
    agent = Agent(
        model=OpenAIChatModel('', provider='litellm')
    )

    chunks = [
        '''data: {"id":"asdf","created":1234567890,"model":"asdf","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"<"}}]}''',
        '''data: {"id":"asdf","created":1234567890,"model":"asdf","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"think>\\nthinking content"}}]}''',
        '''data: {"id":"asdf","created":1234567890,"model":"asdf","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"</think>\\nNormal content."}}]}''',
        '''data: {"id":"asdf","created":1234567890,"model":"asdf","object":"chat.completion.chunk","choices":[{"index":0,"finish_reason":"stop","delta":{}}]}''',
        '''data: {"id":"asdf","created":1234567890,"model":"asdf","object":"chat.completion.chunk","choices":[{"index":0,"delta":{}}],"usage":{"completion_tokens":123,"prompt_tokens":1234,"total_tokens":123,"completion_tokens_details":{"reasoning_tokens":0},"prompt_tokens_details":{"cached_tokens":123,"text_tokens":123}}}''',
        '''data: [DONE]'''
    ]
    httpx_mock.add_response(
        url="https://api.openai.com/v1/chat/completions",
        method="POST",
        stream=IteratorStream([(c + "\n\n").encode() for c in chunks]),
        headers={'Content-Type': 'text/event-stream'},
    )


    async def event_stream_handler(ctx, stream):
        pass

    result = await agent.run("asdf", event_stream_handler=event_stream_handler)

    msgs = result.new_messages()
    parts = msgs[-1].parts
    assert isinstance(parts[0], ThinkingPart)
    assert parts[0].content.strip() == "thinking content"
    assert isinstance(parts[1], TextPart)
    assert parts[1].content.strip() == "Normal content."

test_thinking_nostream passes
test_thinking_stream fails with:

>       assert isinstance(parts[0], ThinkingPart)
E       AssertionError: assert False
E        +  where False = isinstance(TextPart(content='<think>\nthinking content</think>\nNormal content.'), ThinkingPart)

(Yes, the tests don't mock out the response the same way. But it's immaterial to the <think> extraction, as that happens after. I wrote the test_thinking_nostream first, and when that didn't reproduce the issue, I had to go to streaming, and mocking that wasn't nearly as simple)

Python, Pydantic AI & LLM client version

Python 3.13.3
pydantic-ai 1.0.10

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions