Enable previous_response_id chaining for internal calls on the first turn (#2117)

ihower · web-flow · commit a9d95b4fa8a1 · 2025-12-04T09:35:56.000+09:00
diff --git a/docs/running_agents.md b/docs/running_agents.md
@@ -143,25 +143,16 @@ from openai import AsyncOpenAI
 client = AsyncOpenAI()
 
 async def main():
+    agent = Agent(name="Assistant", instructions="Reply very concisely.")
+
     # Create a server-managed conversation
     conversation = await client.conversations.create()
-    conv_id = conversation.id    
+    conv_id = conversation.id
 
-    agent = Agent(name="Assistant", instructions="Reply very concisely.")
-
-    # First turn
-    result1 = await Runner.run(agent, "What city is the Golden Gate Bridge in?", conversation_id=conv_id)
-    print(result1.final_output)
-    # San Francisco
-
-    # Second turn reuses the same conversation_id
-    result2 = await Runner.run(
-        agent,
-        "What state is it in?",
-        conversation_id=conv_id,
-    )
-    print(result2.final_output)
-    # California
+    while True:
+        user_input = input("You: ")
+        result = await Runner.run(agent, user_input, conversation_id=conv_id)
+        print(f"Assistant: {result.final_output}")
 ```
 
 #### 2. Using `previous_response_id`
@@ -174,22 +165,23 @@ from agents import Agent, Runner
 async def main():
     agent = Agent(name="Assistant", instructions="Reply very concisely.")
 
-    # First turn
-    result1 = await Runner.run(agent, "What city is the Golden Gate Bridge in?")
-    print(result1.final_output)
-    # San Francisco
-
-    # Second turn, chained to the previous response
-    result2 = await Runner.run(
-        agent,
-        "What state is it in?",
-        previous_response_id=result1.last_response_id,
-    )
-    print(result2.final_output)
-    # California
+    previous_response_id = None
+
+    while True:
+        user_input = input("You: ")
+
+        # Setting auto_previous_response_id=True enables response chaining automatically
+        # for the first turn, even when there's no actual previous response ID yet.
+        result = await Runner.run(
+            agent,
+            user_input,
+            previous_response_id=previous_response_id,
+            auto_previous_response_id=True,
+        )
+        previous_response_id = result.last_response_id
+        print(f"Assistant: {result.final_output}")
 ```
 
-
 ## Long running agents & human-in-the-loop
 
 You can use the Agents SDK [Temporal](https://temporal.io/) integration to run durable, long-running workflows, including human-in-the-loop tasks. View a demo of Temporal and the Agents SDK working in action to complete long-running tasks [in this video](https://www.youtube.com/watch?v=fFBZqzT4DD8), and [view docs here](https://github.com/temporalio/sdk-python/tree/main/temporalio/contrib/openai_agents).
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -129,21 +129,26 @@ class CallModelData(Generic[TContext]):
 @dataclass
 class _ServerConversationTracker:
     """Tracks server-side conversation state for either conversation_id or
-    previous_response_id modes."""
+    previous_response_id modes.
+
+    Note: When auto_previous_response_id=True is used, response chaining is enabled
+    automatically for the first turn, even when there's no actual previous response ID yet.
+    """
 
     conversation_id: str | None = None
     previous_response_id: str | None = None
+    auto_previous_response_id: bool = False
     sent_items: set[int] = field(default_factory=set)
     server_items: set[int] = field(default_factory=set)
 
     def track_server_items(self, model_response: ModelResponse) -> None:
         for output_item in model_response.output:
             self.server_items.add(id(output_item))
 
-        # Update previous_response_id only when using previous_response_id
+        # Update previous_response_id when using previous_response_id mode or auto mode
         if (
             self.conversation_id is None
-            and self.previous_response_id is not None
+            and (self.previous_response_id is not None or self.auto_previous_response_id)
             and model_response.response_id is not None
         ):
             self.previous_response_id = model_response.response_id
@@ -284,6 +289,9 @@ class RunOptions(TypedDict, Generic[TContext]):
     previous_response_id: NotRequired[str | None]
     """The ID of the previous response, if any."""
 
+    auto_previous_response_id: NotRequired[bool]
+    """Enable automatic response chaining for the first turn."""
+
     conversation_id: NotRequired[str | None]
     """The ID of the stored conversation, if any."""
 
@@ -303,6 +311,7 @@ async def run(
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
         conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResult:
@@ -363,6 +372,7 @@ async def run(
             hooks=hooks,
             run_config=run_config,
             previous_response_id=previous_response_id,
+            auto_previous_response_id=auto_previous_response_id,
             conversation_id=conversation_id,
             session=session,
         )
@@ -378,6 +388,7 @@ def run_sync(
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
         conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResult:
@@ -438,6 +449,7 @@ def run_sync(
             previous_response_id=previous_response_id,
             conversation_id=conversation_id,
             session=session,
+            auto_previous_response_id=auto_previous_response_id,
         )
 
     @classmethod
@@ -450,6 +462,7 @@ def run_streamed(
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
         conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResultStreaming:
@@ -505,6 +518,7 @@ def run_streamed(
             hooks=hooks,
             run_config=run_config,
             previous_response_id=previous_response_id,
+            auto_previous_response_id=auto_previous_response_id,
             conversation_id=conversation_id,
             session=session,
         )
@@ -527,14 +541,23 @@ async def run(
         hooks = cast(RunHooks[TContext], self._validate_run_hooks(kwargs.get("hooks")))
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
         conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
+
         if run_config is None:
             run_config = RunConfig()
 
-        if conversation_id is not None or previous_response_id is not None:
+        # Check whether to enable OpenAI server-managed conversation
+        if (
+            conversation_id is not None
+            or previous_response_id is not None
+            or auto_previous_response_id
+        ):
             server_conversation_tracker = _ServerConversationTracker(
-                conversation_id=conversation_id, previous_response_id=previous_response_id
+                conversation_id=conversation_id,
+                previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
             )
         else:
             server_conversation_tracker = None
@@ -773,6 +796,7 @@ def run_sync(
         hooks = kwargs.get("hooks")
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
         conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
 
@@ -819,6 +843,7 @@ def run_sync(
                 hooks=hooks,
                 run_config=run_config,
                 previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
                 conversation_id=conversation_id,
             )
         )
@@ -852,6 +877,7 @@ def run_streamed(
         hooks = cast(RunHooks[TContext], self._validate_run_hooks(kwargs.get("hooks")))
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
         conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
 
@@ -907,6 +933,7 @@ def run_streamed(
                 context_wrapper=context_wrapper,
                 run_config=run_config,
                 previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
                 conversation_id=conversation_id,
                 session=session,
             )
@@ -1035,6 +1062,7 @@ async def _start_streaming(
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
         previous_response_id: str | None,
+        auto_previous_response_id: bool,
         conversation_id: str | None,
         session: Session | None,
     ):
@@ -1047,9 +1075,16 @@ async def _start_streaming(
         should_run_agent_start_hooks = True
         tool_use_tracker = AgentToolUseTracker()
 
-        if conversation_id is not None or previous_response_id is not None:
+        # Check whether to enable OpenAI server-managed conversation
+        if (
+            conversation_id is not None
+            or previous_response_id is not None
+            or auto_previous_response_id
+        ):
             server_conversation_tracker = _ServerConversationTracker(
-                conversation_id=conversation_id, previous_response_id=previous_response_id
+                conversation_id=conversation_id,
+                previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
             )
         else:
             server_conversation_tracker = None
@@ -1376,6 +1411,7 @@ async def _run_single_turn_streamed(
         previous_response_id = (
             server_conversation_tracker.previous_response_id
             if server_conversation_tracker
+            and server_conversation_tracker.previous_response_id is not None
             else None
         )
         conversation_id = (
@@ -1814,6 +1850,7 @@ async def _get_new_response(
         previous_response_id = (
             server_conversation_tracker.previous_response_id
             if server_conversation_tracker
+            and server_conversation_tracker.previous_response_id is not None
             else None
         )
         conversation_id = (
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py