### Generate

In [1]:
from gai.llm.client import ChatClient
from openai import OpenAI

## Using Gai client ───────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")


client = ChatClient({
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = client.chat(
    model="ttt",
    messages=[{"role":"user","content":"tell me a one sentence story"}], stream=False
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.choices[0].message.content}")

## Using OpenAI-compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config={
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = client.chat.completions.create(
    model="ttt", 
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.extract()}")

## Using OpenAI-compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")
from gai.llm.openai import AsyncOpenAI
client = AsyncOpenAI(client_config={
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = await client.chat.completions.create(
    model="ttt", 
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.extract()}")




Normal:

Raw:
{'id': 'chatcmpl-4fd5a279-6764-4b3c-a8bb-a7957efa8c4a', 'choices': [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'After years of searching, Sarah finally found her long-lost friend in a small café in Paris, sitting across from a stranger with whom she instantly felt a deep connection.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}}], 'created': 1749533405, 'model': 'exllamav2-mistral7b', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 34, 'prompt_tokens': 15, 'total_tokens': 49, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'extract': <function attach_extractor.<locals>.<lambda> at 0x70ba2a9a04c0>}
Extracted:
After years of searching, Sarah finally found her long-lost friend in a small café in Paris, sitting across from a stranger with whom she instantly felt a deep connection.


Patc

---
### Stream

In [None]:
import json
from gai.llm.client import ChatClient
from openai import OpenAI

## Using Gai client ───────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")

chat_client = ChatClient({
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = chat_client.chat(
    model="ttt",
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)
chunks = []
for chunk in response:
    chunk.extract=None
    chunks.append(chunk.choices[0].delta.content)
print(chunks)

## Using OpenAI-compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
chat_client = OpenAI(client_config={
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})

response = chat_client.chat.completions.create(
    model="ttt", 
    messages=[{"role":"user","content":"tell me a one sentence story"}],
    stream=True
)
chunks = []
for chunk in response:
    chunk = chunk.extract()
    if (isinstance(chunk, str)):
        print(chunk, end="", flush=True)

## Using OpenAI-compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
chat_client = AsyncOpenAI(client_config={
    "client_type":"gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})

response = await chat_client.chat.completions.create(
    model="ttt", 
    messages=[{"role":"user","content":"tell me a one sentence story"}],
    stream=True
)
chunks = []
async for chunk in response:
    chunk = chunk.extract()
    if (isinstance(chunk, str)):
        print(chunk, end="", flush=True)



Normal:

['', 'A', ' solitary', ' traveler', ' stumbled', ' upon', ' an', ' ancient', ',', ' hidden', ' village', ' nestled', ' deep', ' within', ' a', ' mystical', ' forest', ',', ' where', ' time', ' seemed', ' to', ' stand', ' still', ' and', ' the', ' secret', ' to', ' eternal', ' youth', ' was', ' whispered', ' among', ' the', ' trees', '.', None]


Patched:

Once upon a time, in a land far away, there was a magical forest where animals could talk and the trees whispered secrets to those who listened.

Async:

Once upon a time, in a world far away, there was a little girl who found a magical flower that granted her one wish, and she wished for the most extraordinary adventure of her life.

---
### Tool Call

In [None]:
import json
from gai.llm.client import ChatClient    
from openai import OpenAI

## Using Gai client ───────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")
chat_client = ChatClient({
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = chat_client.chat(
    model="ttt",
    messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "google",
                "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }
    ],
    tool_choice="required",
    stream=False    
)
response.extract=None
print(json.dumps(response.dict(), indent=4))

## Using OpenAI-compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
chat_client = OpenAI(client_config={
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = chat_client.chat.completions.create(
    model="ttt",
    messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "google",
                "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }
    ],
    tool_choice="required"
)
print(json.dumps(response.extract(), indent=4))

## Using OpenAI-compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
chat_client = AsyncOpenAI(client_config={
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = await chat_client.chat.completions.create(
    model="ttt",
    messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "google",
                "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }
    ],
    tool_choice="required"
)
print(json.dumps(response.extract(), indent=4))




Normal:



/tmp/ipykernel_27676/1437050604.py:37: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(json.dumps(response.dict(), indent=4))


{
    "id": "chatcmpl-2cb84915-b5df-435c-8cbc-2b666fb1bf68",
    "choices": [
        {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": null,
                "refusal": null,
                "role": "assistant",
                "annotations": null,
                "audio": null,
                "function_call": null,
                "tool_calls": [
                    {
                        "id": "call_74b22762-65dd-43db-aaab-91d09e8f6643",
                        "function": {
                            "arguments": "{\"search_query\": \"current time in Singapore\"}",
                            "name": "google"
                        },
                        "type": "function"
                    }
                ]
            }
        }
    ],
    "created": 1749533616,
    "model": "exllamav2-mistral7b",
    "object": "chat.completion",
    "service_tier": null,
    "system

---

## Structured Output

In [None]:
import json
from pydantic import BaseModel
class Book(BaseModel):
    title: str
    summary: str
    author: str
    published_year: int
data = """Foundation is a science fiction novel by American writer
        Isaac Asimov. It is the first published in his Foundation Trilogy (later
        expanded into the Foundation series). Foundation is a cycle of five
        interrelated short stories, first published as a single book by Gnome Press
        in 1951. Collectively they tell the early story of the Foundation,
        an institute founded by psychohistorian Hari Seldon to preserve the best
        of galactic civilization after the collapse of the Galactic Empire."""       

## Using Gai client ───────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")

from gai.llm.client import ChatClient    

kwargs = {
    "model": "ttt",
    "messages":[{"role":"user","content":data}],
    "json_schema": Book.model_json_schema(),
    "stream": False,
}
chat_client = ChatClient({
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = chat_client.chat(**kwargs)
response.extract=None
dict = response.model_dump()
dict.pop("extract")
print(json.dumps(dict, indent=4))

## Using OpenAI-compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config={
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = client.beta.chat.completions.parse(
    model="ttt", 
    response_format=Book,
    messages=[{"role":"user","content":data}]
    )
dict = response.model_dump(exclude={"extract"})
print(json.dumps(dict, indent=4))

print("\n\nExtracted:\n")
print(response.extract())

## Using OpenAI-compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
client = AsyncOpenAI(client_config={
    "client_type": "gai",
    "url": "http://gai-llm-svr:12031/gen/v1/chat/completions"
})
response = await client.beta.chat.completions.parse(
    model="ttt", 
    response_format=Book,
    messages=[{"role":"user","content":data}]
    )
dict = response.model_dump(exclude={"extract"})
print(json.dumps(dict, indent=4))

print("\n\nExtracted:\n")
print(response.extract())




Normal:

{
    "id": "chatcmpl-45deb2cb-cd11-4884-89d9-8515384605b6",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
                "content": "{\n  \"title\": \"Foundation\",\n  \"summary\": \"Foundation is a science fiction novel by American writer Isaac Asimov. It is the first published in his Foundation Trilogy (later expanded into the Foundation series). Foundation is a cycle of five interrelated short stories, first published as a single book by Gnome Press in 1951. Collectively they tell the early story of the Foundation, an institute founded by psychohistorian Hari Seldon to preserve the best of galactic civilization after the collapse of the Galactic Empire.\",\n  \"author\": \"Isaac Asimov\",\n  \"published_year\": 1951\n}",
                "refusal": null,
                "role": "assistant",
                "annotations": null,
                "audio": null,
                "f