From dba82b4aad159c3997e0bff4f8538e357eda2581 Mon Sep 17 00:00:00 2001 From: Daniel Miller Date: Mon, 3 Nov 2025 18:11:20 -0500 Subject: [PATCH] Add better tracing for sync_provider --- pyproject.toml | 6 +- requirements-dev.lock | 40 +++--- requirements.lock | 39 +++--- .../adk/providers/_modules/sync_provider.py | 126 +++++++++++++++--- 4 files changed, 148 insertions(+), 63 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ab86a5fd..8f9ca01d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "pyyaml>=6.0.2,<7", "jsonschema>=4.23.0,<5", "jsonref>=1.1.0,<2", - "temporalio>=1.10.0,<2", + "temporalio>=1.18.2,<2", "aiohttp>=3.10.10,<4", "redis>=5.2.0,<6", "litellm>=1.66.0,<2", @@ -32,7 +32,7 @@ dependencies = [ "jinja2>=3.1.3,<4", "mcp[cli]>=1.4.1", "scale-gp>=0.1.0a59", - "openai-agents==0.2.7", # 0.2.3 bug - https://github.com/openai/openai-agents-python/issues/1276 + "openai-agents==0.4.2", "tzlocal>=5.3.1", "tzdata>=2025.2", "pytest>=8.4.0", @@ -40,7 +40,7 @@ dependencies = [ "pytest-asyncio>=1.0.0", "scale-gp-beta==0.1.0a20", "ipykernel>=6.29.5", - "openai==1.99.9", # anything higher than 1.99.9 breaks litellm - https://github.com/BerriAI/litellm/issues/13711 + "openai>=2.2,<3", # Required by openai-agents 0.4.2; litellm now supports openai 2.x (issue #13711 resolved: https://github.com/BerriAI/litellm/issues/13711) "cloudpickle>=3.1.1", "datadog>=0.52.1", "ddtrace>=3.13.0" diff --git a/requirements-dev.lock b/requirements-dev.lock index a282ef7f..31ac0a65 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -113,9 +113,9 @@ httpx==0.27.2 # via mcp # via openai # via respx -httpx-aiohttp==0.1.9 # via scale-gp # via scale-gp-beta +httpx-aiohttp==0.1.9 # via agentex-sdk httpx-sse==0.4.1 # via mcp @@ -191,11 +191,11 @@ nox==2023.4.22 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib -openai==1.99.9 +openai==2.7.1 # via agentex-sdk # via litellm # via openai-agents -openai-agents==0.2.7 +openai-agents==0.4.2 # via agentex-sdk opentelemetry-api==1.37.0 # via ddtrace @@ -219,20 +219,6 @@ prompt-toolkit==3.0.51 propcache==0.3.1 # via aiohttp # via yarl -pydantic==2.11.9 - # via agentex-sdk - # via agentex-sdk - # via fastapi - # via litellm - # via mcp - # via openai - # via openai-agents - # via pydantic-settings - # via python-on-whales - # via scale-gp - # via scale-gp-beta -pydantic-core==2.33.2 - # via pydantic protobuf==5.29.5 # via ddtrace # via temporalio @@ -247,6 +233,19 @@ pyasn1==0.6.1 # via rsa pyasn1-modules==0.4.2 # via google-auth +pydantic==2.11.9 + # via agentex-sdk + # via fastapi + # via litellm + # via mcp + # via openai + # via openai-agents + # via pydantic-settings + # via python-on-whales + # via scale-gp + # via scale-gp-beta +pydantic-core==2.33.2 + # via pydantic pydantic-settings==2.10.1 # via mcp pygments==2.18.0 @@ -340,7 +339,7 @@ stack-data==0.6.3 starlette==0.46.2 # via fastapi # via mcp -temporalio==1.15.0 +temporalio==1.18.2 # via agentex-sdk tiktoken==0.11.0 # via litellm @@ -383,9 +382,6 @@ typing-extensions==4.12.2 # via pydantic # via pydantic-core # via pyright - # via typing-inspection -typing-inspection==0.4.1 - # via pydantic # via python-on-whales # via referencing # via scale-gp @@ -393,6 +389,8 @@ typing-inspection==0.4.1 # via temporalio # via typer # via typing-inspection +typing-inspection==0.4.1 + # via pydantic # via pydantic-settings tzdata==2025.2 # via agentex-sdk diff --git a/requirements.lock b/requirements.lock index 81d91576..b5f77614 100644 --- a/requirements.lock +++ b/requirements.lock @@ -99,12 +99,12 @@ httpcore==1.0.9 httpx==0.27.2 # via agentex-sdk # via httpx-aiohttp -httpx-aiohttp==0.1.9 # via litellm # via mcp # via openai # via scale-gp # via scale-gp-beta +httpx-aiohttp==0.1.9 # via agentex-sdk httpx-sse==0.4.1 # via mcp @@ -174,11 +174,11 @@ nexus-rpc==1.1.0 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib -openai==1.99.9 +openai==2.7.1 # via agentex-sdk # via litellm # via openai-agents -openai-agents==0.2.7 +openai-agents==0.4.2 # via agentex-sdk opentelemetry-api==1.37.0 # via ddtrace @@ -200,19 +200,6 @@ prompt-toolkit==3.0.51 propcache==0.3.1 # via aiohttp # via yarl -pydantic==2.11.9 - # via agentex-sdk - # via fastapi - # via litellm - # via mcp - # via openai - # via openai-agents - # via pydantic-settings - # via python-on-whales - # via scale-gp - # via scale-gp-beta -pydantic-core==2.33.2 - # via pydantic protobuf==5.29.5 # via ddtrace # via temporalio @@ -227,6 +214,19 @@ pyasn1==0.6.1 # via rsa pyasn1-modules==0.4.2 # via google-auth +pydantic==2.11.9 + # via agentex-sdk + # via fastapi + # via litellm + # via mcp + # via openai + # via openai-agents + # via pydantic-settings + # via python-on-whales + # via scale-gp + # via scale-gp-beta +pydantic-core==2.33.2 + # via pydantic pydantic-settings==2.10.1 # via mcp pygments==2.19.2 @@ -311,7 +311,7 @@ stack-data==0.6.3 starlette==0.46.2 # via fastapi # via mcp -temporalio==1.15.0 +temporalio==1.18.2 # via agentex-sdk tiktoken==0.11.0 # via litellm @@ -351,9 +351,6 @@ typing-extensions==4.12.2 # via opentelemetry-api # via pydantic # via pydantic-core - # via typing-inspection -typing-inspection==0.4.1 - # via pydantic # via python-on-whales # via referencing # via scale-gp @@ -361,6 +358,8 @@ typing-inspection==0.4.1 # via temporalio # via typer # via typing-inspection +typing-inspection==0.4.1 + # via pydantic # via pydantic-settings tzdata==2025.2 # via agentex-sdk diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py index c22c8cdd..6fac54b5 100644 --- a/src/agentex/lib/adk/providers/_modules/sync_provider.py +++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py @@ -28,6 +28,7 @@ from agents.models.openai_provider import OpenAIProvider from agentex import AsyncAgentex +from agentex.lib.utils.logging import make_logger from agentex.lib.core.tracing.tracer import AsyncTracer from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import ( @@ -40,6 +41,44 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent +logger = make_logger(__name__) + + +def _serialize_item(item: Any) -> dict[str, Any]: + """ + Universal serializer for any item type from OpenAI Agents SDK. + + Uses model_dump() for Pydantic models, otherwise extracts attributes manually. + Filters out internal Pydantic fields that can't be serialized. + """ + if hasattr(item, 'model_dump'): + # Pydantic model - use model_dump for proper serialization + try: + return item.model_dump(mode='json', exclude_unset=True) + except Exception: + # Fallback to dict conversion + return dict(item) if hasattr(item, '__iter__') else {} + else: + # Not a Pydantic model - extract attributes manually + item_dict = {} + for attr_name in dir(item): + if not attr_name.startswith('_') and attr_name not in ('model_fields', 'model_config', 'model_computed_fields'): + try: + attr_value = getattr(item, attr_name, None) + # Skip methods and None values + if attr_value is not None and not callable(attr_value): + # Convert to JSON-serializable format + if hasattr(attr_value, 'model_dump'): + item_dict[attr_name] = attr_value.model_dump() + elif isinstance(attr_value, (str, int, float, bool, list, dict)): + item_dict[attr_name] = attr_value + else: + item_dict[attr_name] = str(attr_value) + except Exception: + # Skip attributes that can't be accessed + pass + return item_dict + class SyncStreamingModel(Model): """Simple model wrapper that adds logging to stream_response and supports tracing.""" @@ -109,10 +148,38 @@ async def get_response( response = await self.original_model.get_response(**kwargs) - # Set span output - if span: + # Set span output with structured data + if span and response: + new_items = [] + final_output = None + + # Extract final output text from response + response_final_output = getattr(response, 'final_output', None) + if response_final_output: + final_output = response_final_output + + # Extract items from the response output + response_output = getattr(response, 'output', None) + if response_output: + output_items = response_output if isinstance(response_output, list) else [response_output] + + for item in output_items: + item_dict = _serialize_item(item) + if item_dict: + new_items.append(item_dict) + + # Extract final_output from message type if available + if item_dict.get('type') == 'message' and not final_output: + content = item_dict.get('content', []) + if content and isinstance(content, list): + for content_part in content: + if isinstance(content_part, dict) and 'text' in content_part: + final_output = content_part['text'] + break + span.output = { - "response": str(response) if response else None, + "new_items": new_items, + "final_output": final_output, } return response @@ -160,7 +227,9 @@ async def stream_response( # Wrap the streaming in a tracing span if tracer is available if self.tracer and self.trace_id: trace = self.tracer.trace(self.trace_id) - async with trace.span( + + # Manually start the span instead of using context manager + span = await trace.start_span( parent_id=self.parent_span_id, name="run_agent_streamed", input={ @@ -172,7 +241,9 @@ async def stream_response( "handoffs": [str(h) for h in handoffs] if handoffs else [], "previous_response_id": previous_response_id, }, - ) as span: + ) + + try: # Get the stream from the original model stream_kwargs = { "system_instructions": system_instructions, @@ -193,23 +264,40 @@ async def stream_response( # Get the stream response from the original model and yield each event stream_response = self.original_model.stream_response(**stream_kwargs) - # Pass through each event from the original stream - event_count = 0 - final_output = None + # Pass through each event from the original stream and track items + new_items = [] + final_response_text = "" + async for event in stream_response: - event_count += 1 - # Track the final output if available - if hasattr(event, 'type') and event.type == 'raw_response_event': - if hasattr(event.data, 'output'): - final_output = event.data.output + event_type = getattr(event, 'type', 'no-type') + + # Handle response.output_item.done events which contain completed items + if event_type == 'response.output_item.done': + item = getattr(event, 'item', None) + if item is not None: + item_dict = _serialize_item(item) + if item_dict: + new_items.append(item_dict) + + # Update final_response_text from message type if available + if item_dict.get('type') == 'message': + content = item_dict.get('content', []) + if content and isinstance(content, list): + for content_part in content: + if isinstance(content_part, dict) and 'text' in content_part: + final_response_text = content_part['text'] + break + yield event - # Set span output - if span: - span.output = { - "event_count": event_count, - "final_output": str(final_output) if final_output else None, - } + # Set span output with structured data including tool calls and final response + span.output = { + "new_items": new_items, + "final_output": final_response_text if final_response_text else None, + } + finally: + # End the span after all events have been yielded + await trace.end_span(span) else: # No tracing, just stream normally # Get the stream from the original model