# OpenAI Streaming - Traced With LastMile

Four use cases:

1. Text - Non-streaming
2. [this one] Text - Streaming
3. Tool Calls - Non-streaming
4. Tool Calls - Streaming

In [1]:
# !pip install scipy --quiet
# !pip install tenacity --quiet
# !pip install tiktoken --quiet
# !pip install termcolor --quiet
# !pip install openai --quiet
# !pip install "tracing-auto-instrumentation[openai]"

# Create ~/.env file with this line: OPENAI_API_KEY=<your key here>
# You can get your key from https://platform.openai.com/api-keys 
import openai
import dotenv
import os
dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
import openai

from lastmile_eval.rag.debugger.api.tracing import LastMileTracer

from tracing_auto_instrumentation.openai import wrap_openai
from lastmile_eval.rag.debugger.tracing.sdk import get_lastmile_tracer

tracer: LastMileTracer = get_lastmile_tracer(
    tracer_name="OpenAI Text Calling w. Streaming",
)
client = openai.OpenAI(api_key=OPENAI_API_KEY)
client = wrap_openai(client, tracer)

[DEBUG] 2024-06-20 01:14:34,012 _config.py:80: load_ssl_context verify=True cert=None trust_env=True http2=False
[DEBUG] 2024-06-20 01:14:34,013 _config.py:146: load_verify_locations cafile='/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/certifi/cacert.pem'


In [3]:
def run_my_existing_openai_app(user_message: str, stream: bool = True):
    completion_params = {
        "model": "gpt-3.5-turbo",
        "top_p": 1,
        "max_tokens": 10,
        "temperature": 1,
        "stream": stream,
        "messages": [
            {
                "content": user_message,
                "role": "user",
            }
        ],
    }

    response = client.chat.completions.create(**completion_params)
    print("Chat Completion Response: ")
    if stream:
        for chunk in response:
            print(f"{chunk=}")
    else:
        print(f"{response=}")

In [4]:
# # Run your code as usual
stream = False
run_my_existing_openai_app("Tell me a joke about apples", stream=stream)

[DEBUG] 2024-06-20 01:14:34,027 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Tell me a joke about apples', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 10, 'stream': False, 'temperature': 1, 'top_p': 1}}
[DEBUG] 2024-06-20 01:14:34,034 _base_client.py:949: Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
[DEBUG] 2024-06-20 01:14:34,035 _trace.py:45: connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5.0 socket_options=None
[DEBUG] 2024-06-20 01:14:34,051 _trace.py:45: connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1224c40b0>
[DEBUG] 2024-06-20 01:14:34,051 _trace.py:45: start_tls.started ssl_context=<ssl.SSLContext object at 0x16c4f2850> server_hostname='api.openai.com' timeout=5.0
[DEBUG] 2024-06-20 01:14:34,067 _trace.py:45: start_tls.complete return_value=<httpcore._backends.sync.SyncStream obje

Chat Completion Response: 
response=ChatCompletion(id='chatcmpl-9c4PyJzf8o0mVkIclRTJ1jS9cPTzR', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the apple go to the doctor? Because', role='assistant', function_call=None, tool_calls=None))], created=1718860474, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=10, prompt_tokens=13, total_tokens=23))


## Time to test this with async calls

In [5]:
client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
client = wrap_openai(client, tracer)

async def run_my_existing_openai_app_async(user_message: str, stream: bool = True):
    completion_params = {
        "model": "gpt-3.5-turbo",
        "top_p": 1,
        "max_tokens": 3000,
        "temperature": 1,
        "stream": stream,
        "messages": [
            {
                "content": user_message,
                "role": "user",
            }
        ],
    }

    response = await client.chat.completions.create(**completion_params)
    print("Chat Completion Response: ")
    if stream:
        async for chunk in response:
            print(f"{chunk=}")
    else:
        print(f"{response=}")
    return response

[DEBUG] 2024-06-20 01:14:35,039 _config.py:80: load_ssl_context verify=True cert=None trust_env=True http2=False
[DEBUG] 2024-06-20 01:14:35,041 _config.py:146: load_verify_locations cafile='/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/certifi/cacert.pem'


In [None]:
stream = False
response = await run_my_existing_openai_app_async("Tell me a joke about apples", stream=stream)

[DEBUG] 2024-06-20 01:14:35,059 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Tell me a joke about apples', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 3000, 'stream': False, 'temperature': 1, 'top_p': 1}}
[DEBUG] 2024-06-20 01:14:35,063 _trace.py:45: close.started
[DEBUG] 2024-06-20 01:14:35,064 _trace.py:45: close.complete
[DEBUG] 2024-06-20 01:14:35,067 _trace.py:85: connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5.0 socket_options=None
[DEBUG] 2024-06-20 01:14:35,081 _trace.py:85: connect_tcp.complete return_value=<httpcore._backends.anyio.AnyIOStream object at 0x117fbe120>
[DEBUG] 2024-06-20 01:14:35,082 _trace.py:85: start_tls.started ssl_context=<ssl.SSLContext object at 0x16c3612d0> server_hostname='api.openai.com' timeout=5.0
[DEBUG] 2024-06-20 01:14:35,096 _trace.py:85: start_tls.complete return_value=<httpcore._backends.anyio.AnyIOStream 

Chat Completion Response: 
response=ChatCompletion(id='chatcmpl-9c4PzsHa3sAEE9TdIvrgHy44GTq6d', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why did the apple go to school?\n\nBecause it wanted to be a "smarty-pants!"', role='assistant', function_call=None, tool_calls=None))], created=1718860475, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=19, prompt_tokens=13, total_tokens=32))


[ERROR] 2024-06-20 01:14:35,729 __init__.py:156: Failed to detach context
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/opentelemetry/trace/__init__.py", line 570, in use_span
    yield span
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/opentelemetry/sdk/trace/__init__.py", line 1091, in start_as_current_span
    yield span
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/lastmile_eval/rag/debugger/tracing/lastmile_tracer.py", line 229, in start_as_current_span
    yield span
  File "/Users/rossdancraig/Projects/tracing_auto_instrumentation/src/tracing_auto_instrumentation/openai/async_openai_wrapper.py", line 258, in create
    yield raw_response
GeneratorExit

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-