From 82f53652405ec4a12d34105f4d087c2fe2e77a8b Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Fri, 17 Oct 2025 15:18:35 -0300 Subject: [PATCH 1/7] feat(closes OPEN-7543): update trace openai to support responses api --- .../openai/openai_responses_api_tracing.ipynb | 197 +++++++ .../lib/integrations/async_openai_tracer.py | 292 ++++++++-- .../lib/integrations/openai_tracer.py | 531 +++++++++++++++--- 3 files changed, 913 insertions(+), 107 deletions(-) create mode 100644 examples/tracing/openai/openai_responses_api_tracing.ipynb diff --git a/examples/tracing/openai/openai_responses_api_tracing.ipynb b/examples/tracing/openai/openai_responses_api_tracing.ipynb new file mode 100644 index 00000000..8bd7ba14 --- /dev/null +++ b/examples/tracing/openai/openai_responses_api_tracing.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2722b419", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openlayer-ai/openlayer-python/blob/main/examples/tracing/openai/openai_responses_api_tracing.ipynb)\n", + "\n", + "\n", + "# OpenAI Responses API monitoring with Openlayer\n", + "\n", + "This notebook shows how to monitor both OpenAI's Chat Completions API and the new Responses API with Openlayer. The same `trace_openai()` function supports both APIs seamlessly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "020c8f6a", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install openlayer openai" + ] + }, + { + "cell_type": "markdown", + "id": "75c2a473", + "metadata": {}, + "source": [ + "## 1. Set the environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3f4fa13", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import openai\n", + "\n", + "# OpenAI API key\n", + "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n", + "\n", + "# Openlayer configuration\n", + "os.environ[\"OPENLAYER_API_KEY\"] = \"YOUR_OPENLAYER_API_KEY_HERE\"\n", + "os.environ[\"OPENLAYER_INFERENCE_PIPELINE_ID\"] = \"YOUR_OPENLAYER_INFERENCE_PIPELINE_ID_HERE\"" + ] + }, + { + "cell_type": "markdown", + "id": "9758533f", + "metadata": {}, + "source": [ + "## 2. Create traced OpenAI client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c35d9860-dc41-4f7c-8d69-cc2ac7e5e485", + "metadata": {}, + "outputs": [], + "source": [ + "from openlayer.lib import trace_openai\n", + "\n", + "# Single function traces both Chat Completions AND Responses APIs\n", + "client = trace_openai(openai.OpenAI())" + ] + }, + { + "cell_type": "markdown", + "id": "72a6b954", + "metadata": {}, + "source": [ + "## 3. Use Chat Completions API (existing functionality)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e00c1c79", + "metadata": {}, + "outputs": [], + "source": [ + "# Chat Completions API - works exactly as before\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[{\"role\": \"user\", \"content\": \"What is 2 + 2?\"}],\n", + " max_tokens=50\n", + ")\n", + "\n", + "print(\"Chat Completions:\", response.choices[0].message.content)" + ] + }, + { + "cell_type": "markdown", + "id": "76a350b4", + "metadata": {}, + "source": [ + "## 4. Use Responses API (new unified interface)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "responses-api-example", + "metadata": {}, + "outputs": [], + "source": [ + "# Responses API - new unified interface with enhanced metadata\n", + "if hasattr(client, 'responses'):\n", + " response = client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=\"What is 3 + 3?\",\n", + " max_output_tokens=50\n", + " )\n", + " \n", + " # Extract response text\n", + " if response.output and len(response.output) > 0:\n", + " result = response.output[0].content[0].text\n", + " print(\"Responses API:\", result)\n", + " print(f\"Response ID: {response.id}\")\nelse:\n", + " print(\"Responses API not available in this OpenAI version\")" + ] + }, + { + "cell_type": "markdown", + "id": "streaming-example", + "metadata": {}, + "source": [ + "## 5. Streaming example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "streaming-code", + "metadata": {}, + "outputs": [], + "source": [ + "# Streaming works with both APIs\n", + "stream = client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[{\"role\": \"user\", \"content\": \"Count from 1 to 3\"}],\n", + " stream=True\n", + ")\n", + "\n", + "print(\"Streaming response: \", end=\"\")\n", + "for chunk in stream:\n", + " if chunk.choices[0].delta.content:\n", + " print(chunk.choices[0].delta.content, end=\"\")\nprint(\"\\n✓ All requests automatically traced to Openlayer!\")" + ] + }, + { + "cell_type": "markdown", + "id": "conclusion", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "That's it! The same `trace_openai()` function now supports:\n", + "\n", + "- ✅ **Chat Completions API** - Full backward compatibility\n", + "- ✅ **Responses API** - New unified interface with enhanced metadata\n", + "- ✅ **Streaming** - Both APIs support streaming\n", + "- ✅ **Function calling** - Tool calls work with both APIs\n", + "- ✅ **Async support** - Use `trace_async_openai()` for async clients\n", + "\n", + "All traces are automatically sent to Openlayer with proper API type differentiation!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/src/openlayer/lib/integrations/async_openai_tracer.py b/src/openlayer/lib/integrations/async_openai_tracer.py index f670fa16..799d64e9 100644 --- a/src/openlayer/lib/integrations/async_openai_tracer.py +++ b/src/openlayer/lib/integrations/async_openai_tracer.py @@ -8,6 +8,7 @@ try: import openai + HAVE_OPENAI = True except ImportError: HAVE_OPENAI = False @@ -20,6 +21,12 @@ create_trace_args, add_to_trace, parse_non_streaming_output_data, + # Import Responses API helper functions + extract_responses_chunk_data, + extract_responses_inputs, + parse_responses_output_data, + extract_responses_usage, + get_responses_model_parameters, ) logger = logging.getLogger(__name__) @@ -28,20 +35,24 @@ def trace_async_openai( client: Union["openai.AsyncOpenAI", "openai.AsyncAzureOpenAI"], ) -> Union["openai.AsyncOpenAI", "openai.AsyncAzureOpenAI"]: - """Patch the AsyncOpenAI or AsyncAzureOpenAI client to trace chat completions. - - The following information is collected for each chat completion: - - start_time: The time when the completion was requested. - - end_time: The time when the completion was received. - - latency: The time it took to generate the completion. - - tokens: The total number of tokens used to generate the completion. - - prompt_tokens: The number of tokens in the prompt. - - completion_tokens: The number of tokens in the completion. - - model: The model used to generate the completion. + """Patch the AsyncOpenAI or AsyncAzureOpenAI client to trace chat completions and responses. + + This function patches both the Chat Completions API (client.chat.completions.create) + and the Responses API (client.responses.create) to provide comprehensive tracing + for both APIs while maintaining backward compatibility. + + The following information is collected for each completion/response: + - start_time: The time when the completion/response was requested. + - end_time: The time when the completion/response was received. + - latency: The time it took to generate the completion/response. + - tokens: The total number of tokens used to generate the completion/response. + - prompt_tokens: The number of tokens in the prompt/input. + - completion_tokens: The number of tokens in the completion/output. + - model: The model used to generate the completion/response. - model_parameters: The parameters used to configure the model. - raw_output: The raw output of the model. - - inputs: The inputs used to generate the completion. - - metadata: Additional metadata about the completion. For example, the time it + - inputs: The inputs used to generate the completion/response. + - metadata: Additional metadata about the completion/response. For example, the time it took to generate the first token, when streaming. Parameters @@ -55,15 +66,15 @@ def trace_async_openai( The patched AsyncOpenAI client. """ if not HAVE_OPENAI: - raise ImportError( - "OpenAI library is not installed. Please install it with: pip install openai" - ) - + raise ImportError("OpenAI library is not installed. Please install it with: pip install openai") + is_azure_openai = isinstance(client, openai.AsyncAzureOpenAI) - create_func = client.chat.completions.create - @wraps(create_func) - async def traced_create_func(*args, **kwargs): + # Patch Chat Completions API + chat_create_func = client.chat.completions.create + + @wraps(chat_create_func) + async def traced_chat_create_func(*args, **kwargs): inference_id = kwargs.pop("inference_id", None) stream = kwargs.get("stream", False) @@ -71,19 +82,51 @@ async def traced_create_func(*args, **kwargs): return handle_async_streaming_create( *args, **kwargs, - create_func=create_func, + create_func=chat_create_func, inference_id=inference_id, is_azure_openai=is_azure_openai, + api_type="chat_completions", ) return await handle_async_non_streaming_create( *args, **kwargs, - create_func=create_func, + create_func=chat_create_func, inference_id=inference_id, is_azure_openai=is_azure_openai, + api_type="chat_completions", ) - client.chat.completions.create = traced_create_func + client.chat.completions.create = traced_chat_create_func + + # Patch Responses API (if available) + if hasattr(client, "responses"): + responses_create_func = client.responses.create + + @wraps(responses_create_func) + async def traced_responses_create_func(*args, **kwargs): + inference_id = kwargs.pop("inference_id", None) + stream = kwargs.get("stream", False) + + if stream: + return handle_async_responses_streaming_create( + *args, + **kwargs, + create_func=responses_create_func, + inference_id=inference_id, + is_azure_openai=is_azure_openai, + ) + return await handle_async_responses_non_streaming_create( + *args, + **kwargs, + create_func=responses_create_func, + inference_id=inference_id, + is_azure_openai=is_azure_openai, + ) + + client.responses.create = traced_responses_create_func + else: + logger.debug("Responses API not available in this AsyncOpenAI client version") + return client @@ -92,6 +135,7 @@ async def handle_async_streaming_create( *args, is_azure_openai: bool = False, inference_id: Optional[str] = None, + api_type: str = "chat_completions", **kwargs, ) -> AsyncIterator[Any]: """Handles the create method when streaming is enabled. @@ -142,16 +186,12 @@ async def handle_async_streaming_create( if delta.function_call.name: collected_function_call["name"] += delta.function_call.name if delta.function_call.arguments: - collected_function_call[ - "arguments" - ] += delta.function_call.arguments + collected_function_call["arguments"] += delta.function_call.arguments elif delta.tool_calls: if delta.tool_calls[0].function.name: collected_function_call["name"] += delta.tool_calls[0].function.name if delta.tool_calls[0].function.arguments: - collected_function_call["arguments"] += delta.tool_calls[ - 0 - ].function.arguments + collected_function_call["arguments"] += delta.tool_calls[0].function.arguments yield chunk @@ -163,15 +203,11 @@ async def handle_async_streaming_create( finally: # Try to add step to the trace try: - collected_output_data = [ - message for message in collected_output_data if message is not None - ] + collected_output_data = [message for message in collected_output_data if message is not None] if collected_output_data: output_data = "".join(collected_output_data) else: - collected_function_call["arguments"] = json.loads( - collected_function_call["arguments"] - ) + collected_function_call["arguments"] = json.loads(collected_function_call["arguments"]) output_data = collected_function_call trace_args = create_trace_args( @@ -186,13 +222,7 @@ async def handle_async_streaming_create( model_parameters=get_model_parameters(kwargs), raw_output=raw_outputs, id=inference_id, - metadata={ - "timeToFirstToken": ( - (first_token_time - start_time) * 1000 - if first_token_time - else None - ) - }, + metadata={"timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None)}, ) add_to_trace( **trace_args, @@ -212,8 +242,9 @@ async def handle_async_non_streaming_create( *args, is_azure_openai: bool = False, inference_id: Optional[str] = None, + api_type: str = "chat_completions", **kwargs, -) -> "openai.types.chat.chat_completion.ChatCompletion": +) -> Union["openai.types.chat.chat_completion.ChatCompletion", Any]: """Handles the create method when streaming is disabled. Parameters @@ -257,8 +288,181 @@ async def handle_async_non_streaming_create( ) # pylint: disable=broad-except except Exception as e: - logger.error( - "Failed to trace the create chat completion request with Openlayer. %s", e + logger.error("Failed to trace the create chat completion request with Openlayer. %s", e) + + return response + + +# -------------------------------- Async Responses API Handlers -------------------------------- # + + +async def handle_async_responses_streaming_create( + create_func: callable, + *args, + is_azure_openai: bool = False, + inference_id: Optional[str] = None, + **kwargs, +) -> AsyncIterator[Any]: + """Handles the Responses API create method when streaming is enabled (async version). + + Parameters + ---------- + create_func : callable + The Responses API create method to handle. + is_azure_openai : bool, optional + Whether the client is an Azure OpenAI client, by default False + inference_id : Optional[str], optional + A user-generated inference id, by default None + + Returns + ------- + AsyncIterator[Any] + An async generator that yields the chunks of the response stream. + """ + chunks = await create_func(*args, **kwargs) + + # Create and return a new async generator that processes chunks + collected_output_data = [] + collected_function_call = { + "name": "", + "arguments": "", + } + raw_outputs = [] + start_time = time.time() + end_time = None + first_token_time = None + num_of_completion_tokens = None + latency = None + + try: + i = 0 + async for chunk in chunks: + raw_outputs.append(chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk)) + if i == 0: + first_token_time = time.time() + if i > 0: + num_of_completion_tokens = i + 1 + i += 1 + + # Handle different types of ResponseStreamEvent + chunk_data = extract_responses_chunk_data(chunk) + + if chunk_data.get("content"): + collected_output_data.append(chunk_data["content"]) + elif chunk_data.get("function_call"): + func_call = chunk_data["function_call"] + if func_call.get("name"): + collected_function_call["name"] += func_call["name"] + if func_call.get("arguments"): + collected_function_call["arguments"] += func_call["arguments"] + + yield chunk + + end_time = time.time() + latency = (end_time - start_time) * 1000 + # pylint: disable=broad-except + except Exception as e: + logger.error("Failed yield chunk. %s", e) + finally: + # Try to add step to the trace + try: + collected_output_data = [message for message in collected_output_data if message is not None] + if collected_output_data: + output_data = "".join(collected_output_data) + else: + if collected_function_call["arguments"]: + try: + collected_function_call["arguments"] = json.loads(collected_function_call["arguments"]) + except json.JSONDecodeError: + # Keep as string if not valid JSON + pass + output_data = collected_function_call + + trace_args = create_trace_args( + end_time=end_time, + inputs=extract_responses_inputs(kwargs), + output=output_data, + latency=latency, + tokens=num_of_completion_tokens, + prompt_tokens=0, + completion_tokens=num_of_completion_tokens, + model=kwargs.get("model", "unknown"), + model_parameters=get_responses_model_parameters(kwargs), + raw_output=raw_outputs, + id=inference_id, + metadata={ + "timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None), + "api_type": "responses", + }, + ) + add_to_trace( + **trace_args, + is_azure_openai=is_azure_openai, + api_type="responses", + ) + + # pylint: disable=broad-except + except Exception as e: + logger.error( + "Failed to trace the Responses API request with Openlayer. %s", + e, + ) + + +async def handle_async_responses_non_streaming_create( + create_func: callable, + *args, + is_azure_openai: bool = False, + inference_id: Optional[str] = None, + **kwargs, +) -> Any: + """Handles the Responses API create method when streaming is disabled (async version). + + Parameters + ---------- + create_func : callable + The Responses API create method to handle. + is_azure_openai : bool, optional + Whether the client is an Azure OpenAI client, by default False + inference_id : Optional[str], optional + A user-generated inference id, by default None + + Returns + ------- + Any + The response object. + """ + start_time = time.time() + response = await create_func(*args, **kwargs) + end_time = time.time() + + # Try to add step to the trace + try: + output_data = parse_responses_output_data(response) + usage_data = extract_responses_usage(response) + + trace_args = create_trace_args( + end_time=end_time, + inputs=extract_responses_inputs(kwargs), + output=output_data, + latency=(end_time - start_time) * 1000, + tokens=usage_data.get("total_tokens", 0), + prompt_tokens=usage_data.get("prompt_tokens", 0), + completion_tokens=usage_data.get("completion_tokens", 0), + model=getattr(response, "model", kwargs.get("model", "unknown")), + model_parameters=get_responses_model_parameters(kwargs), + raw_output=response.model_dump() if hasattr(response, "model_dump") else str(response), + id=inference_id, + metadata={"api_type": "responses"}, ) + add_to_trace( + is_azure_openai=is_azure_openai, + api_type="responses", + **trace_args, + ) + # pylint: disable=broad-except + except Exception as e: + logger.error("Failed to trace the Responses API request with Openlayer. %s", e) + return response diff --git a/src/openlayer/lib/integrations/openai_tracer.py b/src/openlayer/lib/integrations/openai_tracer.py index 0c787aa2..967c70f2 100644 --- a/src/openlayer/lib/integrations/openai_tracer.py +++ b/src/openlayer/lib/integrations/openai_tracer.py @@ -8,6 +8,7 @@ try: import openai + HAVE_OPENAI = True except ImportError: HAVE_OPENAI = False @@ -23,20 +24,24 @@ def trace_openai( client: Union["openai.OpenAI", "openai.AzureOpenAI"], ) -> Union["openai.OpenAI", "openai.AzureOpenAI"]: - """Patch the OpenAI or AzureOpenAI client to trace chat completions. - - The following information is collected for each chat completion: - - start_time: The time when the completion was requested. - - end_time: The time when the completion was received. - - latency: The time it took to generate the completion. - - tokens: The total number of tokens used to generate the completion. - - prompt_tokens: The number of tokens in the prompt. - - completion_tokens: The number of tokens in the completion. - - model: The model used to generate the completion. + """Patch the OpenAI or AzureOpenAI client to trace chat completions and responses. + + This function patches both the Chat Completions API (client.chat.completions.create) + and the Responses API (client.responses.create) to provide comprehensive tracing + for both APIs while maintaining backward compatibility. + + The following information is collected for each completion/response: + - start_time: The time when the completion/response was requested. + - end_time: The time when the completion/response was received. + - latency: The time it took to generate the completion/response. + - tokens: The total number of tokens used to generate the completion/response. + - prompt_tokens: The number of tokens in the prompt/input. + - completion_tokens: The number of tokens in the completion/output. + - model: The model used to generate the completion/response. - model_parameters: The parameters used to configure the model. - raw_output: The raw output of the model. - - inputs: The inputs used to generate the completion. - - metadata: Additional metadata about the completion. For example, the time it + - inputs: The inputs used to generate the completion/response. + - metadata: Additional metadata about the completion/response. For example, the time it took to generate the first token, when streaming. Parameters @@ -50,15 +55,15 @@ def trace_openai( The patched OpenAI client. """ if not HAVE_OPENAI: - raise ImportError( - "OpenAI library is not installed. Please install it with: pip install openai" - ) - + raise ImportError("OpenAI library is not installed. Please install it with: pip install openai") + is_azure_openai = isinstance(client, openai.AzureOpenAI) - create_func = client.chat.completions.create - @wraps(create_func) - def traced_create_func(*args, **kwargs): + # Patch Chat Completions API + chat_create_func = client.chat.completions.create + + @wraps(chat_create_func) + def traced_chat_create_func(*args, **kwargs): inference_id = kwargs.pop("inference_id", None) stream = kwargs.get("stream", False) @@ -66,19 +71,51 @@ def traced_create_func(*args, **kwargs): return handle_streaming_create( *args, **kwargs, - create_func=create_func, + create_func=chat_create_func, inference_id=inference_id, is_azure_openai=is_azure_openai, + api_type="chat_completions", ) return handle_non_streaming_create( *args, **kwargs, - create_func=create_func, + create_func=chat_create_func, inference_id=inference_id, is_azure_openai=is_azure_openai, + api_type="chat_completions", ) - client.chat.completions.create = traced_create_func + client.chat.completions.create = traced_chat_create_func + + # Patch Responses API (if available) + if hasattr(client, "responses"): + responses_create_func = client.responses.create + + @wraps(responses_create_func) + def traced_responses_create_func(*args, **kwargs): + inference_id = kwargs.pop("inference_id", None) + stream = kwargs.get("stream", False) + + if stream: + return handle_responses_streaming_create( + *args, + **kwargs, + create_func=responses_create_func, + inference_id=inference_id, + is_azure_openai=is_azure_openai, + ) + return handle_responses_non_streaming_create( + *args, + **kwargs, + create_func=responses_create_func, + inference_id=inference_id, + is_azure_openai=is_azure_openai, + ) + + client.responses.create = traced_responses_create_func + else: + logger.debug("Responses API not available in this OpenAI client version") + return client @@ -87,6 +124,7 @@ def handle_streaming_create( *args, is_azure_openai: bool = False, inference_id: Optional[str] = None, + api_type: str = "chat_completions", **kwargs, ) -> Iterator[Any]: """Handles the create method when streaming is enabled. @@ -149,16 +187,12 @@ def stream_chunks( if delta.function_call.name: collected_function_call["name"] += delta.function_call.name if delta.function_call.arguments: - collected_function_call[ - "arguments" - ] += delta.function_call.arguments + collected_function_call["arguments"] += delta.function_call.arguments elif delta.tool_calls: if delta.tool_calls[0].function.name: collected_function_call["name"] += delta.tool_calls[0].function.name if delta.tool_calls[0].function.arguments: - collected_function_call["arguments"] += delta.tool_calls[ - 0 - ].function.arguments + collected_function_call["arguments"] += delta.tool_calls[0].function.arguments yield chunk end_time = time.time() @@ -169,15 +203,11 @@ def stream_chunks( finally: # Try to add step to the trace try: - collected_output_data = [ - message for message in collected_output_data if message is not None - ] + collected_output_data = [message for message in collected_output_data if message is not None] if collected_output_data: output_data = "".join(collected_output_data) else: - collected_function_call["arguments"] = json.loads( - collected_function_call["arguments"] - ) + collected_function_call["arguments"] = json.loads(collected_function_call["arguments"]) output_data = collected_function_call trace_args = create_trace_args( @@ -192,13 +222,7 @@ def stream_chunks( model_parameters=get_model_parameters(kwargs), raw_output=raw_outputs, id=inference_id, - metadata={ - "timeToFirstToken": ( - (first_token_time - start_time) * 1000 - if first_token_time - else None - ) - }, + metadata={"timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None)}, ) add_to_trace( **trace_args, @@ -263,16 +287,23 @@ def create_trace_args( return trace_args -def add_to_trace(is_azure_openai: bool = False, **kwargs) -> None: - """Add a chat completion step to the trace.""" - if is_azure_openai: - tracer.add_chat_completion_step_to_trace( - **kwargs, name="Azure OpenAI Chat Completion", provider="Azure" - ) +def add_to_trace(is_azure_openai: bool = False, api_type: str = "chat_completions", **kwargs) -> None: + """Add a chat completion or responses step to the trace.""" + # Remove api_type from kwargs to avoid passing it to the tracer + kwargs.pop("api_type", None) + + if api_type == "responses": + # Handle Responses API tracing + if is_azure_openai: + tracer.add_chat_completion_step_to_trace(**kwargs, name="Azure OpenAI Response", provider="Azure") + else: + tracer.add_chat_completion_step_to_trace(**kwargs, name="OpenAI Response", provider="OpenAI") else: - tracer.add_chat_completion_step_to_trace( - **kwargs, name="OpenAI Chat Completion", provider="OpenAI" - ) + # Handle Chat Completions API tracing (default behavior) + if is_azure_openai: + tracer.add_chat_completion_step_to_trace(**kwargs, name="Azure OpenAI Chat Completion", provider="Azure") + else: + tracer.add_chat_completion_step_to_trace(**kwargs, name="OpenAI Chat Completion", provider="OpenAI") def handle_non_streaming_create( @@ -280,8 +311,9 @@ def handle_non_streaming_create( *args, is_azure_openai: bool = False, inference_id: Optional[str] = None, + api_type: str = "chat_completions", **kwargs, -) -> "openai.types.chat.chat_completion.ChatCompletion": +) -> Union["openai.types.chat.chat_completion.ChatCompletion", Any]: """Handles the create method when streaming is disabled. Parameters @@ -325,13 +357,392 @@ def handle_non_streaming_create( ) # pylint: disable=broad-except except Exception as e: - logger.error( - "Failed to trace the create chat completion request with Openlayer. %s", e + logger.error("Failed to trace the create chat completion request with Openlayer. %s", e) + + return response + + +# -------------------------------- Responses API Handlers -------------------------------- # + + +def handle_responses_streaming_create( + create_func: callable, + *args, + is_azure_openai: bool = False, + inference_id: Optional[str] = None, + **kwargs, +) -> Iterator[Any]: + """Handles the Responses API create method when streaming is enabled. + + Parameters + ---------- + create_func : callable + The Responses API create method to handle. + is_azure_openai : bool, optional + Whether the client is an Azure OpenAI client, by default False + inference_id : Optional[str], optional + A user-generated inference id, by default None + + Returns + ------- + Iterator[Any] + A generator that yields the chunks of the response stream. + """ + chunks = create_func(*args, **kwargs) + return stream_responses_chunks( + chunks=chunks, + kwargs=kwargs, + inference_id=inference_id, + is_azure_openai=is_azure_openai, + ) + + +def stream_responses_chunks( + chunks: Iterator[Any], + kwargs: Dict[str, any], + is_azure_openai: bool = False, + inference_id: Optional[str] = None, +): + """Streams the chunks of the Responses API and traces the response.""" + collected_output_data = [] + collected_function_call = { + "name": "", + "arguments": "", + } + raw_outputs = [] + start_time = time.time() + end_time = None + first_token_time = None + num_of_completion_tokens = None + latency = None + + try: + i = 0 + for i, chunk in enumerate(chunks): + raw_outputs.append(chunk.model_dump() if hasattr(chunk, "model_dump") else str(chunk)) + if i == 0: + first_token_time = time.time() + if i > 0: + num_of_completion_tokens = i + 1 + + # Handle different types of ResponseStreamEvent + chunk_data = extract_responses_chunk_data(chunk) + + if chunk_data.get("content"): + collected_output_data.append(chunk_data["content"]) + elif chunk_data.get("function_call"): + func_call = chunk_data["function_call"] + if func_call.get("name"): + collected_function_call["name"] += func_call["name"] + if func_call.get("arguments"): + collected_function_call["arguments"] += func_call["arguments"] + + yield chunk + + end_time = time.time() + latency = (end_time - start_time) * 1000 + # pylint: disable=broad-except + except Exception as e: + logger.error("Failed yield chunk. %s", e) + finally: + # Try to add step to the trace + try: + collected_output_data = [message for message in collected_output_data if message is not None] + if collected_output_data: + output_data = "".join(collected_output_data) + else: + if collected_function_call["arguments"]: + try: + collected_function_call["arguments"] = json.loads(collected_function_call["arguments"]) + except json.JSONDecodeError: + # Keep as string if not valid JSON + pass + output_data = collected_function_call + + trace_args = create_trace_args( + end_time=end_time, + inputs=extract_responses_inputs(kwargs), + output=output_data, + latency=latency, + tokens=num_of_completion_tokens, + prompt_tokens=0, + completion_tokens=num_of_completion_tokens, + model=kwargs.get("model", "unknown"), + model_parameters=get_responses_model_parameters(kwargs), + raw_output=raw_outputs, + id=inference_id, + metadata={ + "timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None), + "api_type": "responses", + }, + ) + add_to_trace( + **trace_args, + is_azure_openai=is_azure_openai, + api_type="responses", + ) + + # pylint: disable=broad-except + except Exception as e: + logger.error( + "Failed to trace the Responses API request with Openlayer. %s", + e, + ) + + +def handle_responses_non_streaming_create( + create_func: callable, + *args, + is_azure_openai: bool = False, + inference_id: Optional[str] = None, + **kwargs, +) -> Any: + """Handles the Responses API create method when streaming is disabled. + + Parameters + ---------- + create_func : callable + The Responses API create method to handle. + is_azure_openai : bool, optional + Whether the client is an Azure OpenAI client, by default False + inference_id : Optional[str], optional + A user-generated inference id, by default None + + Returns + ------- + Any + The response object. + """ + start_time = time.time() + response = create_func(*args, **kwargs) + end_time = time.time() + + # Try to add step to the trace + try: + output_data = parse_responses_output_data(response) + usage_data = extract_responses_usage(response) + + trace_args = create_trace_args( + end_time=end_time, + inputs=extract_responses_inputs(kwargs), + output=output_data, + latency=(end_time - start_time) * 1000, + tokens=usage_data.get("total_tokens", 0), + prompt_tokens=usage_data.get("prompt_tokens", 0), + completion_tokens=usage_data.get("completion_tokens", 0), + model=getattr(response, "model", kwargs.get("model", "unknown")), + model_parameters=get_responses_model_parameters(kwargs), + raw_output=response.model_dump() if hasattr(response, "model_dump") else str(response), + id=inference_id, + metadata={"api_type": "responses"}, ) + add_to_trace( + is_azure_openai=is_azure_openai, + api_type="responses", + **trace_args, + ) + # pylint: disable=broad-except + except Exception as e: + logger.error("Failed to trace the Responses API request with Openlayer. %s", e) + return response +# -------------------------------- Responses API Helper Functions -------------------------------- # + + +def extract_responses_chunk_data(chunk: Any) -> Dict[str, Any]: + """Extract content and function call data from a ResponseStreamEvent chunk. + + Args: + chunk: A ResponseStreamEvent object + + Returns: + Dictionary with content and/or function_call data + """ + result = {} + + try: + # Handle different types of response stream events + chunk_type = getattr(chunk, "type", None) + + if chunk_type == "response.text.delta": + # Text content delta + if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"): + result["content"] = chunk.delta.text + elif chunk_type == "response.function_call.arguments.delta": + # Function call arguments delta + if hasattr(chunk, "delta"): + result["function_call"] = {"arguments": chunk.delta} + elif chunk_type == "response.function_call.name": + # Function call name + if hasattr(chunk, "name"): + result["function_call"] = {"name": chunk.name} + elif hasattr(chunk, "choices") and chunk.choices: + # Fallback to chat-style format if available + choice = chunk.choices[0] + if hasattr(choice, "delta"): + delta = choice.delta + if hasattr(delta, "content") and delta.content: + result["content"] = delta.content + elif hasattr(delta, "function_call"): + func_call = {} + if hasattr(delta.function_call, "name") and delta.function_call.name: + func_call["name"] = delta.function_call.name + if hasattr(delta.function_call, "arguments") and delta.function_call.arguments: + func_call["arguments"] = delta.function_call.arguments + if func_call: + result["function_call"] = func_call + + except Exception as e: + logger.debug("Could not extract chunk data from ResponseStreamEvent: %s", e) + + return result + + +def extract_responses_inputs(kwargs: Dict[str, Any]) -> Dict[str, Any]: + """Extract inputs from Responses API parameters. + + Args: + kwargs: The parameters passed to the Responses API + + Returns: + Dictionary with prompt/input data + """ + inputs = {} + + # Handle different input formats for Responses API + if "input" in kwargs: + inputs["prompt"] = kwargs["input"] + elif "conversation" in kwargs: + inputs["prompt"] = kwargs["conversation"] + elif "instructions" in kwargs: + inputs["prompt"] = kwargs["instructions"] + elif "prompt" in kwargs: + inputs["prompt"] = kwargs["prompt"] + else: + # Fallback: try to construct from available parameters + prompt_parts = [] + if "instructions" in kwargs: + prompt_parts.append(f"Instructions: {kwargs['instructions']}") + if "input" in kwargs: + prompt_parts.append(f"Input: {kwargs['input']}") + inputs["prompt"] = " | ".join(prompt_parts) if prompt_parts else "No input provided" + + return inputs + + +def parse_responses_output_data(response: Any) -> Union[str, Dict[str, Any], None]: + """Parses the output data from a Responses API response. + + Args: + response: The Response object from the Responses API + + Returns: + The parsed output data + """ + try: + # Handle Response object structure - check for output first (Responses API structure) + if hasattr(response, "output") and response.output: + if isinstance(response.output, list) and response.output: + # Handle list of output messages + first_output = response.output[0] + if hasattr(first_output, "content") and first_output.content: + # Extract text from content list + if isinstance(first_output.content, list) and first_output.content: + text_content = first_output.content[0] + if hasattr(text_content, "text"): + return text_content.text.strip() + elif hasattr(first_output.content, "text"): + return first_output.content.text.strip() + else: + return str(first_output.content).strip() + elif hasattr(first_output, "text"): + return first_output.text.strip() + elif hasattr(response.output, "text"): + return response.output.text.strip() + elif hasattr(response.output, "content"): + return str(response.output.content).strip() + + # Handle Chat Completions style structure (fallback) + if hasattr(response, "choices") and response.choices: + choice = response.choices[0] + if hasattr(choice, "message"): + message = choice.message + if hasattr(message, "content") and message.content: + return message.content.strip() + elif hasattr(message, "function_call"): + return { + "name": message.function_call.name, + "arguments": json.loads(message.function_call.arguments) + if message.function_call.arguments + else {}, + } + elif hasattr(message, "tool_calls") and message.tool_calls: + tool_call = message.tool_calls[0] + return { + "name": tool_call.function.name, + "arguments": json.loads(tool_call.function.arguments) if tool_call.function.arguments else {}, + } + + # Handle direct text response + if hasattr(response, "text") and response.text: + return response.text.strip() + + except Exception as e: + logger.debug("Could not parse Responses API output data: %s", e) + + return None + + +def extract_responses_usage(response: Any) -> Dict[str, int]: + """Extract token usage from a Responses API response. + + Args: + response: The Response object from the Responses API + + Returns: + Dictionary with token usage information + """ + usage = {"total_tokens": 0, "prompt_tokens": 0, "completion_tokens": 0} + + try: + if hasattr(response, "usage"): + usage_obj = response.usage + # Handle ResponseUsage object with different attribute names + usage["total_tokens"] = getattr(usage_obj, "total_tokens", 0) + # ResponseUsage uses 'input_tokens' instead of 'prompt_tokens' + usage["prompt_tokens"] = getattr(usage_obj, "input_tokens", getattr(usage_obj, "prompt_tokens", 0)) + # ResponseUsage uses 'output_tokens' instead of 'completion_tokens' + usage["completion_tokens"] = getattr(usage_obj, "output_tokens", getattr(usage_obj, "completion_tokens", 0)) + elif hasattr(response, "token_usage"): + # Alternative usage attribute name + usage_obj = response.token_usage + usage["total_tokens"] = getattr(usage_obj, "total_tokens", 0) + usage["prompt_tokens"] = getattr(usage_obj, "input_tokens", getattr(usage_obj, "prompt_tokens", 0)) + usage["completion_tokens"] = getattr(usage_obj, "output_tokens", getattr(usage_obj, "completion_tokens", 0)) + except Exception as e: + logger.debug("Could not extract token usage from Responses API response: %s", e) + + return usage + + +def get_responses_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]: + """Gets the model parameters from Responses API kwargs.""" + return { + "max_output_tokens": kwargs.get("max_output_tokens"), + "temperature": kwargs.get("temperature", 1), + "top_p": kwargs.get("top_p", 1), + "reasoning": kwargs.get("reasoning"), + "parallel_tool_calls": kwargs.get("parallel_tool_calls"), + "max_tool_calls": kwargs.get("max_tool_calls"), + "background": kwargs.get("background"), + "truncation": kwargs.get("truncation"), + "include": kwargs.get("include"), + } + + def parse_non_streaming_output_data( response: "openai.types.chat.chat_completion.ChatCompletion", ) -> Union[str, Dict[str, Any], None]: @@ -369,18 +780,14 @@ def parse_non_streaming_output_data( # --------------------------- OpenAI Assistants API -------------------------- # -def trace_openai_assistant_thread_run( - client: "openai.OpenAI", run: "openai.types.beta.threads.run.Run" -) -> None: +def trace_openai_assistant_thread_run(client: "openai.OpenAI", run: "openai.types.beta.threads.run.Run") -> None: """Trace a run from an OpenAI assistant. Once the run is completed, the thread data is published to Openlayer, along with the latency, and number of tokens used.""" if not HAVE_OPENAI: - raise ImportError( - "OpenAI library is not installed. Please install it with: pip install openai" - ) - + raise ImportError("OpenAI library is not installed. Please install it with: pip install openai") + _type_check_run(run) # Do nothing if the run is not completed @@ -393,9 +800,7 @@ def trace_openai_assistant_thread_run( metadata = _extract_run_metadata(run) # Convert thread to prompt - messages = client.beta.threads.messages.list( - thread_id=run.thread_id, order="asc" - ) + messages = client.beta.threads.messages.list(thread_id=run.thread_id, order="asc") prompt = _thread_messages_to_prompt(messages) # Add step to the trace From f0209761b2b22e63ceeccdd2432338e35a3fff2e Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Fri, 17 Oct 2025 15:57:43 -0300 Subject: [PATCH 2/7] refactor: improve OpenAI responses API tracing in notebook example --- .../openai/openai_responses_api_tracing.ipynb | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/examples/tracing/openai/openai_responses_api_tracing.ipynb b/examples/tracing/openai/openai_responses_api_tracing.ipynb index 8bd7ba14..2cd8a9a4 100644 --- a/examples/tracing/openai/openai_responses_api_tracing.ipynb +++ b/examples/tracing/openai/openai_responses_api_tracing.ipynb @@ -39,6 +39,7 @@ "outputs": [], "source": [ "import os\n", + "\n", "import openai\n", "\n", "# OpenAI API key\n", @@ -92,7 +93,7 @@ " max_tokens=50\n", ")\n", "\n", - "print(\"Chat Completions:\", response.choices[0].message.content)" + "response.choices[0].message.content" ] }, { @@ -118,12 +119,10 @@ " max_output_tokens=50\n", " )\n", " \n", - " # Extract response text\n", - " if response.output and len(response.output) > 0:\n", - " result = response.output[0].content[0].text\n", - " print(\"Responses API:\", result)\n", - " print(f\"Response ID: {response.id}\")\nelse:\n", - " print(\"Responses API not available in this OpenAI version\")" + " # Response is automatically traced\n", + " response\n", + "else:\n", + " \"Responses API not available in this OpenAI version\"" ] }, { @@ -148,10 +147,12 @@ " stream=True\n", ")\n", "\n", - "print(\"Streaming response: \", end=\"\")\n", + "# Consume the stream - automatically traced\n", "for chunk in stream:\n", " if chunk.choices[0].delta.content:\n", - " print(chunk.choices[0].delta.content, end=\"\")\nprint(\"\\n✓ All requests automatically traced to Openlayer!\")" + " pass # Process chunks as needed\n", + "\n", + "# ✓ All requests automatically traced to Openlayer!" ] }, { @@ -194,4 +195,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 96986a412e9c0a30dfe6be014c450050b235593e Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Fri, 17 Oct 2025 16:02:17 -0300 Subject: [PATCH 3/7] fix: update OpenAI responses API tracing example to accumulate response content --- examples/tracing/openai/openai_responses_api_tracing.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/tracing/openai/openai_responses_api_tracing.ipynb b/examples/tracing/openai/openai_responses_api_tracing.ipynb index 2cd8a9a4..126af983 100644 --- a/examples/tracing/openai/openai_responses_api_tracing.ipynb +++ b/examples/tracing/openai/openai_responses_api_tracing.ipynb @@ -146,13 +146,13 @@ " messages=[{\"role\": \"user\", \"content\": \"Count from 1 to 3\"}],\n", " stream=True\n", ")\n", + "response = ''\n", "\n", "# Consume the stream - automatically traced\n", "for chunk in stream:\n", " if chunk.choices[0].delta.content:\n", - " pass # Process chunks as needed\n", - "\n", - "# ✓ All requests automatically traced to Openlayer!" + " response += chunk.choices[0].delta.content\n", + "response" ] }, { From d3bb18e75d5c955b99c8492f109133f200ae83a6 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Fri, 17 Oct 2025 16:03:36 -0300 Subject: [PATCH 4/7] refactor: streamline OpenAI responses API tracing example in notebook --- .../openai/openai_responses_api_tracing.ipynb | 59 ++----------------- 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/examples/tracing/openai/openai_responses_api_tracing.ipynb b/examples/tracing/openai/openai_responses_api_tracing.ipynb index 126af983..2761076b 100644 --- a/examples/tracing/openai/openai_responses_api_tracing.ipynb +++ b/examples/tracing/openai/openai_responses_api_tracing.ipynb @@ -112,66 +112,15 @@ "outputs": [], "source": [ "# Responses API - new unified interface with enhanced metadata\n", - "if hasattr(client, 'responses'):\n", - " response = client.responses.create(\n", - " model=\"gpt-4o-mini\",\n", - " input=\"What is 3 + 3?\",\n", - " max_output_tokens=50\n", - " )\n", - " \n", - " # Response is automatically traced\n", - " response\n", - "else:\n", - " \"Responses API not available in this OpenAI version\"" - ] - }, - { - "cell_type": "markdown", - "id": "streaming-example", - "metadata": {}, - "source": [ - "## 5. Streaming example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "streaming-code", - "metadata": {}, - "outputs": [], - "source": [ - "# Streaming works with both APIs\n", - "stream = client.chat.completions.create(\n", + "response = client.responses.create(\n", " model=\"gpt-4o-mini\",\n", - " messages=[{\"role\": \"user\", \"content\": \"Count from 1 to 3\"}],\n", - " stream=True\n", + " input=\"What is 3 + 3?\",\n", + " max_output_tokens=50\n", ")\n", - "response = ''\n", "\n", - "# Consume the stream - automatically traced\n", - "for chunk in stream:\n", - " if chunk.choices[0].delta.content:\n", - " response += chunk.choices[0].delta.content\n", + "# Response is automatically traced\n", "response" ] - }, - { - "cell_type": "markdown", - "id": "conclusion", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "That's it! The same `trace_openai()` function now supports:\n", - "\n", - "- ✅ **Chat Completions API** - Full backward compatibility\n", - "- ✅ **Responses API** - New unified interface with enhanced metadata\n", - "- ✅ **Streaming** - Both APIs support streaming\n", - "- ✅ **Function calling** - Tool calls work with both APIs\n", - "- ✅ **Async support** - Use `trace_async_openai()` for async clients\n", - "\n", - "All traces are automatically sent to Openlayer with proper API type differentiation!" - ] } ], "metadata": { From 4857ace7ad1dbcdbcaac471ff4a9eb7acf144b8a Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 4 Nov 2025 21:15:31 -0300 Subject: [PATCH 5/7] refactor: enhance input extraction for OpenAI responses API to support messages array format --- .../lib/integrations/openai_tracer.py | 51 +++++++++++-------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/openlayer/lib/integrations/openai_tracer.py b/src/openlayer/lib/integrations/openai_tracer.py index 967c70f2..e0ad0774 100644 --- a/src/openlayer/lib/integrations/openai_tracer.py +++ b/src/openlayer/lib/integrations/openai_tracer.py @@ -472,8 +472,7 @@ def stream_responses_chunks( raw_output=raw_outputs, id=inference_id, metadata={ - "timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None), - "api_type": "responses", + "timeToFirstToken": ((first_token_time - start_time) * 1000 if first_token_time else None) }, ) add_to_trace( @@ -534,7 +533,6 @@ def handle_responses_non_streaming_create( model_parameters=get_responses_model_parameters(kwargs), raw_output=response.model_dump() if hasattr(response, "model_dump") else str(response), id=inference_id, - metadata={"api_type": "responses"}, ) add_to_trace( @@ -604,33 +602,46 @@ def extract_responses_chunk_data(chunk: Any) -> Dict[str, Any]: def extract_responses_inputs(kwargs: Dict[str, Any]) -> Dict[str, Any]: """Extract inputs from Responses API parameters. + Formats the input as a messages array similar to Chat Completions API format: + {"prompt": [{"role": "user", "content": "..."}]} + Args: kwargs: The parameters passed to the Responses API Returns: - Dictionary with prompt/input data + Dictionary with prompt as a messages array """ - inputs = {} + messages = [] # Handle different input formats for Responses API - if "input" in kwargs: - inputs["prompt"] = kwargs["input"] - elif "conversation" in kwargs: - inputs["prompt"] = kwargs["conversation"] - elif "instructions" in kwargs: - inputs["prompt"] = kwargs["instructions"] - elif "prompt" in kwargs: - inputs["prompt"] = kwargs["prompt"] + if "conversation" in kwargs: + # Conversation is already in messages format + conversation = kwargs["conversation"] + if isinstance(conversation, list): + messages = conversation + else: + # Single message, wrap it + messages = [{"role": "user", "content": str(conversation)}] else: - # Fallback: try to construct from available parameters - prompt_parts = [] + # Build messages array from available parameters if "instructions" in kwargs: - prompt_parts.append(f"Instructions: {kwargs['instructions']}") + messages.append({"role": "system", "content": kwargs["instructions"]}) + if "input" in kwargs: - prompt_parts.append(f"Input: {kwargs['input']}") - inputs["prompt"] = " | ".join(prompt_parts) if prompt_parts else "No input provided" + messages.append({"role": "user", "content": kwargs["input"]}) + elif "prompt" in kwargs: + messages.append({"role": "user", "content": kwargs["prompt"]}) + + # If no user message was added, create a fallback + if not any(msg.get("role") == "user" for msg in messages): + if messages: + # Only system message, add empty user message + messages.append({"role": "user", "content": ""}) + else: + # No messages at all, add placeholder + messages.append({"role": "user", "content": "No input provided"}) - return inputs + return {"prompt": messages} def parse_responses_output_data(response: Any) -> Union[str, Dict[str, Any], None]: @@ -815,7 +826,7 @@ def trace_openai_assistant_thread_run(client: "openai.OpenAI", run: "openai.type # pylint: disable=broad-except except Exception as e: - print(f"Failed to monitor run. {e}") # noqa: T201 + logger.error("Failed to monitor run. %s", e) def _type_check_run(run: "openai.types.beta.threads.run.Run") -> None: From b79dd9bdce00fac63dd6c2b5add2ef07d3f76887 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 4 Nov 2025 21:16:43 -0300 Subject: [PATCH 6/7] chore: remove OpenAI responses API tracing notebook and merge its content with existing OpenAI tracing example --- .../openai/openai_responses_api_tracing.ipynb | 147 ------------------ examples/tracing/openai/openai_tracing.ipynb | 59 ++++++- 2 files changed, 55 insertions(+), 151 deletions(-) delete mode 100644 examples/tracing/openai/openai_responses_api_tracing.ipynb diff --git a/examples/tracing/openai/openai_responses_api_tracing.ipynb b/examples/tracing/openai/openai_responses_api_tracing.ipynb deleted file mode 100644 index 2761076b..00000000 --- a/examples/tracing/openai/openai_responses_api_tracing.ipynb +++ /dev/null @@ -1,147 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "2722b419", - "metadata": {}, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openlayer-ai/openlayer-python/blob/main/examples/tracing/openai/openai_responses_api_tracing.ipynb)\n", - "\n", - "\n", - "# OpenAI Responses API monitoring with Openlayer\n", - "\n", - "This notebook shows how to monitor both OpenAI's Chat Completions API and the new Responses API with Openlayer. The same `trace_openai()` function supports both APIs seamlessly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "020c8f6a", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install openlayer openai" - ] - }, - { - "cell_type": "markdown", - "id": "75c2a473", - "metadata": {}, - "source": [ - "## 1. Set the environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3f4fa13", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import openai\n", - "\n", - "# OpenAI API key\n", - "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n", - "\n", - "# Openlayer configuration\n", - "os.environ[\"OPENLAYER_API_KEY\"] = \"YOUR_OPENLAYER_API_KEY_HERE\"\n", - "os.environ[\"OPENLAYER_INFERENCE_PIPELINE_ID\"] = \"YOUR_OPENLAYER_INFERENCE_PIPELINE_ID_HERE\"" - ] - }, - { - "cell_type": "markdown", - "id": "9758533f", - "metadata": {}, - "source": [ - "## 2. Create traced OpenAI client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c35d9860-dc41-4f7c-8d69-cc2ac7e5e485", - "metadata": {}, - "outputs": [], - "source": [ - "from openlayer.lib import trace_openai\n", - "\n", - "# Single function traces both Chat Completions AND Responses APIs\n", - "client = trace_openai(openai.OpenAI())" - ] - }, - { - "cell_type": "markdown", - "id": "72a6b954", - "metadata": {}, - "source": [ - "## 3. Use Chat Completions API (existing functionality)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e00c1c79", - "metadata": {}, - "outputs": [], - "source": [ - "# Chat Completions API - works exactly as before\n", - "response = client.chat.completions.create(\n", - " model=\"gpt-4o-mini\",\n", - " messages=[{\"role\": \"user\", \"content\": \"What is 2 + 2?\"}],\n", - " max_tokens=50\n", - ")\n", - "\n", - "response.choices[0].message.content" - ] - }, - { - "cell_type": "markdown", - "id": "76a350b4", - "metadata": {}, - "source": [ - "## 4. Use Responses API (new unified interface)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "responses-api-example", - "metadata": {}, - "outputs": [], - "source": [ - "# Responses API - new unified interface with enhanced metadata\n", - "response = client.responses.create(\n", - " model=\"gpt-4o-mini\",\n", - " input=\"What is 3 + 3?\",\n", - " max_output_tokens=50\n", - ")\n", - "\n", - "# Response is automatically traced\n", - "response" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/tracing/openai/openai_tracing.ipynb b/examples/tracing/openai/openai_tracing.ipynb index a79bae1f..8b2d6e51 100644 --- a/examples/tracing/openai/openai_tracing.ipynb +++ b/examples/tracing/openai/openai_tracing.ipynb @@ -39,9 +39,10 @@ "outputs": [], "source": [ "import os\n", - "\n", "import openai\n", "\n", + "from pydantic import BaseModel\n", + "\n", "# OpenAI env variables\n", "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n", "\n", @@ -86,6 +87,14 @@ "That's it! Now you can continue using the traced OpenAI client normally. The data is automatically published to Openlayer and you can start creating tests around it!" ] }, + { + "cell_type": "markdown", + "id": "fb5ebdad", + "metadata": {}, + "source": [ + "### 3.1 Chat Completions API" + ] + }, { "cell_type": "code", "execution_count": null, @@ -104,12 +113,54 @@ "id": "abaf6987-c257-4f0d-96e7-3739b24c7206", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "class Person(BaseModel):\n", + " name: str\n", + " age: int\n", + " occupation: str\n", + "\n", + "# Parse method automatically returns structured Pydantic object\n", + "completion = openai_client.chat.completions.parse(\n", + " model=\"gpt-4o\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Extract: John Doe is 30 years old and works as a software engineer\"}\n", + " ],\n", + " response_format=Person,\n", + ")\n", + "\n", + "completion.choices[0].message.parsed" + ] + }, + { + "cell_type": "markdown", + "id": "4e6fb396", + "metadata": {}, + "source": [ + "### 3.2 Responses API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21369c42", + "metadata": {}, + "outputs": [], + "source": [ + "# Responses API - new unified interface with enhanced metadata\n", + "response = openai_client.responses.create(\n", + " model=\"gpt-4o-mini\",\n", + " input=\"What is 3 + 3?\",\n", + " max_output_tokens=50\n", + ")\n", + "\n", + "# Response is automatically traced\n", + "response" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -123,7 +174,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.12.7" } }, "nbformat": 4, From 22c83a55b97c37dab67fc589cae254b4b9417629 Mon Sep 17 00:00:00 2001 From: Vinicius Mello Date: Tue, 4 Nov 2025 21:17:53 -0300 Subject: [PATCH 7/7] fix: correct import order in OpenAI tracing notebook --- examples/tracing/openai/openai_tracing.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tracing/openai/openai_tracing.ipynb b/examples/tracing/openai/openai_tracing.ipynb index 8b2d6e51..fc5cad02 100644 --- a/examples/tracing/openai/openai_tracing.ipynb +++ b/examples/tracing/openai/openai_tracing.ipynb @@ -39,8 +39,8 @@ "outputs": [], "source": [ "import os\n", - "import openai\n", "\n", + "import openai\n", "from pydantic import BaseModel\n", "\n", "# OpenAI env variables\n",