# OpenAI Streaming - Traced With LastMile

Four use cases:

1. Text - Non-streaming
2. [this one] Text - Streaming
3. Tool Calls - Non-streaming
4. Tool Calls - Streaming

In [7]:
# !pip install scipy --quiet
# !pip install tenacity --quiet
# !pip install tiktoken --quiet
# !pip install termcolor --quiet
# !pip install openai --quiet
# !pip install "tracing-auto-instrumentation[openai]"

# Create ~/.env file with this line: OPENAI_API_KEY=<your key here>
# You can get your key from https://platform.openai.com/api-keys 
import openai
import dotenv
import os
dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [8]:
import openai

from lastmile_eval.rag.debugger.api.tracing import LastMileTracer

from tracing_auto_instrumentation.openai import wrap_openai
from lastmile_eval.rag.debugger.tracing.sdk import get_lastmile_tracer

tracer: LastMileTracer = get_lastmile_tracer(
    tracer_name="OpenAI Text Calling w. Streaming",
)
client = openai.OpenAI(api_key=OPENAI_API_KEY)
# client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
client = wrap_openai(client, tracer)

2024-05-30 18:37:45,454 - Overriding of current TracerProvider is not allowed


2024-05-30 18:37:45,456 - Starting new HTTPS connection (1): lastmileai.dev:443
2024-05-30 18:37:45,529 - https://lastmileai.dev:443 "GET /api/evaluation_projects/list?name=OpenAI+Text+Calling+w.+Streaming HTTP/1.1" 200 367
2024-05-30 18:37:45,531 - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-05-30 18:37:45,531 - load_verify_locations cafile='/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/certifi/cacert.pem'


In [9]:
def run_my_existing_openai_app(user_message: str, stream: bool = True):
    completion_params = {
        "model": "gpt-3.5-turbo",
        "top_p": 1,
        "max_tokens": 3000,
        "temperature": 1,
        "stream": stream,
        "messages": [
            {
                "content": user_message,
                "role": "user",
            }
        ],
    }

    response = client.chat.completions.create(**completion_params)
    print("Chat Completion Response: ")
    if stream:
        for chunk in response:
            print(f"{chunk=}")
    else:
        print(f"{response=}")

In [10]:
# Run your code as usual
stream = False
run_my_existing_openai_app("Tell me a joke about apples", stream=stream)

2024-05-30 18:37:45,547 - Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Tell me a joke about apples', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'max_tokens': 3000, 'stream': False, 'temperature': 1, 'top_p': 1}}
2024-05-30 18:37:45,547 - Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
2024-05-30 18:37:45,548 - connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=5.0 socket_options=None
2024-05-30 18:37:45,555 - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x32f7c9820>
2024-05-30 18:37:45,556 - start_tls.started ssl_context=<ssl.SSLContext object at 0x32f7a00d0> server_hostname='api.openai.com' timeout=5.0
2024-05-30 18:37:45,570 - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x32f1717f0>
2024-05-30 18:37:45,571 - send_request_headers.started request=<Request [b'POST']>
2024-05-30 18:37:45,572 - send

Chat Completion Response: 
response=ChatCompletion(id='chatcmpl-9UigzXwUGWGmfK14LcpVwieaCKM3T', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Why did the apple go to the doctor? Because it wasn't peeling well!", role='assistant', function_call=None, tool_calls=None))], created=1717108665, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=13, total_tokens=30))


ChatCompletion(id='chatcmpl-9UigzXwUGWGmfK14LcpVwieaCKM3T', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Why did the apple go to the doctor? Because it wasn't peeling well!", role='assistant', function_call=None, tool_calls=None))], created=1717108665, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=13, total_tokens=30))

In [11]:
# async def run_my_existing_openai_app_async(user_message: str, stream: bool = True):
#     completion_params = {
#         "model": "gpt-3.5-turbo",
#         "top_p": 1,
#         "max_tokens": 3000,
#         "temperature": 1,
#         "stream": stream,
#         "messages": [
#             {
#                 "content": user_message,
#                 "role": "user",
#             }
#         ],
#     }

#     response = await client.ChatCompletion.acreate(**completion_params)
#     print("Chat Completion Response: ")
#     if stream:
#         for chunk in response:
#             print(f"{chunk=}")
#     else:
#         print(f"{response=}")
#     return response

In [12]:
# stream = False
# response = await run_my_existing_openai_app_async("Tell me a joke about apples", stream=stream)
# print("God damn it: ", response)