In [1]:
import os
from typing import List

import httpx
from openai import OpenAI, Stream
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.chat.chat_completion import ChatCompletion

In [2]:
base_url = os.environ.get("OPENAI_BASE_URL", "http://localhost:8000/api/openai/v1").replace("/api/openai/v1", "")

assert base_url == "http://localhost:8000", f"Unexpected base_url: {base_url}"

token = "sk-no-key-required"

In [3]:
client = httpx.Client(base_url=base_url, headers={"Authorization": f"Bearer {token}"})

response = client.get("/v1/engines")

print(response.json())

assert response.status_code == 200
assert response.json()["data"][0]["id"] == "copilot-codex"

{'data': [{'id': 'copilot-codex', 'name': 'Copilot Codex', 'description': "OpenAI's Codex model, formerly known as GitHub Copilot"}]}


In [4]:
client = httpx.Client(base_url=base_url, headers={"Authorization": f"Bearer {token}"})

In [5]:
response = client.post(
    "/api/openai/v1/chat/completions",
    headers={
        "Authorization": f"Bearer {token}",
    },
    json={
        "model": "gpt-3.5-turbo",
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            {
                "role": "user",
                "content": "Hello, how are you?",
            },
        ],
    },
)

assert response.status_code == 200

In [6]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY", "openai_api_key"),
    base_url=os.environ.get("OPENAI_BASE_URL", "http://localhost:8000/api/openai/v1"),
    organization=os.environ.get("OPENAI_ORG_ID"),
    project=os.environ.get("OPENAI_PROJECT_ID"),
)

In [7]:
max_tokens = 128
temperature = 0.0
user_prompt = "What is 6 times 7 and how does it relate to the meaning of life?"

In [8]:
chat_completion: ChatCompletion = client.chat.completions.create(
    max_tokens=max_tokens,
    messages=[
        {"role": "system", "content": "You're an AI assistant. Your top priority is responding to user questions with truthful answers."},
        {"role": "user", "content": user_prompt},
    ],
    model="gpt-4o-mini",
    temperature=temperature,
)

full_reply_content = chat_completion.choices[0].message.content
print(full_reply_content)

To answer your question, 6 times 7 is 35. The meaning of life is a philosophical question that has been debated and explored by philosophers and thinkers for centuries. Some people believe that the universe has a purpose or meaning, while others believe that there is no such thing as a meaningful purpose or meaning in life. However, the number 35 is often used as a symbolic representation of the idea that life has a purpose or meaning, and that there is a divine or spiritual force that guides and shapes our existence. The number 35 is also significant in the context of the


In [9]:
chat_completion_chunk_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(
    max_tokens=max_tokens,
    messages=[
        {"role": "system", "content": "You're an AI assistant. Your top priority is responding to user questions with truthful answers."},
        {"role": "user", "content": user_prompt},
    ],
    model="gpt-4o-mini",
    stream=True,
    stream_options={"include_usage": True}, # retrieving token usage for stream response
    temperature=temperature,
)

collected_chunks: List[ChatCompletionChunk] = []
collected_messages = []

# iterate through the stream of events
for chunk in chat_completion_chunk_stream:
    print(f"choices: {chunk.choices}\nusage: {chunk.usage}")
    print("****************")

    collected_chunks.append(chunk)  # save the event response

    if len(chunk.choices) > 0:
        chunk_message = chunk.choices[0].delta.content  # extract the message
        collected_messages.append(chunk_message)  # save the message

# clean None in collected_messages
collected_messages = [m for m in collected_messages if m is not None]
full_reply_content = ''.join(collected_messages)
print(full_reply_content)

choices: [Choice(delta=ChoiceDelta(content='To', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)]
usage: None
****************
choices: [Choice(delta=ChoiceDelta(content=' answer', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)]
usage: None
****************
choices: [Choice(delta=ChoiceDelta(content=' your', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)]
usage: None
****************
choices: [Choice(delta=ChoiceDelta(content=' question', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)]
usage: None
****************
choices: [Choice(delta=ChoiceDelta(content=',', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)]
usage: None
****************
choices: [Choice(delta=ChoiceDelta(content=' ', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, 