### Setup

Make sure the model is downloaded

```bash
ollama pull llama3.2:3b
```

## Initialize

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI

model = "llama3.2:3b"

client_config = {
    "type": "ttt",
    "client_type": "ollama",
    "engine": "ollama",
    "model": model,
    "name": model,
    }


### Generate

In [4]:
from ollama import chat

model = "llama3.2:3b"

# Using ollama client ────────────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")

response = chat(
    model=model,
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.message.content}")

## Using OpenAI-Compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config=client_config)
response = client.chat.completions.create(
    model=model, 
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.extract()}")

## Using OpenAI-Compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
client = AsyncOpenAI(client_config=client_config)
response = await client.chat.completions.create(
    model=model, 
    messages=[{"role":"user","content":"tell me a one sentence story"}]
)

print(f"Raw:\n{response.model_dump()}\nExtracted:\n{response.extract()}")



Normal:

Raw:
{'model': 'llama3.2:3b', 'created_at': '2025-07-30T12:42:00.697442757Z', 'done': True, 'done_reason': 'stop', 'total_duration': 25966287282, 'load_duration': 7911834327, 'prompt_eval_count': 31, 'prompt_eval_duration': 2814999273, 'eval_count': 40, 'eval_duration': 15229258281, 'message': {'role': 'assistant', 'content': "As she lay in bed, Emily finally worked up the courage to answer the mysterious letter that had been sitting on her nightstand for months, its words written in a language she couldn't understand.", 'images': None, 'tool_calls': None}}
Extracted:
As she lay in bed, Emily finally worked up the courage to answer the mysterious letter that had been sitting on her nightstand for months, its words written in a language she couldn't understand.


Patched:

Raw:
{'id': 'chatcmpl-e6785241-3bdc-4777-9004-2f3f8bfde260', 'choices': [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': "As she rummaged through her grandmother's attic, Emil

---
### Stream

In [1]:
import json
from ollama import chat

model="llama3.2:3b"

## Using ollama client ────────────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")

response = chat(
    model=model, 
    messages=[{"role":"user","content":"tell me a one sentence story"}],
    stream=True
)
chunks = []
for chunk in response:
    chunks.append(chunk)
dict = [ chunk.dict() for chunk in chunks ]
print(json.dumps(dict, indent=4))

## Using OpenAI-Compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config={
    "client_type":"ollama",
    "model": model
})

response = client.chat.completions.create(
    model=model, 
    messages=[{"role":"user","content":"tell me a one sentence story"}],
    stream=True
)
chunks = []
for chunk in response:
    chunk = chunk.extract()
    if (isinstance(chunk, str)):
        print(chunk, end="", flush=True)

# Using OpenAI-Compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
client = AsyncOpenAI(client_config={
    "client_type":"ollama",
    "model": model
})

response = await client.chat.completions.create(
    model=model, 
    messages=[{"role":"user","content":"tell me a one sentence story"}],
    stream=True
)
chunks = []
async for chunk in response:
    chunk = chunk.extract()
    if (isinstance(chunk, str)):
        print(chunk, end="", flush=True)



Normal:



/tmp/ipykernel_57712/893074345.py:18: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  dict = [ chunk.dict() for chunk in chunks ]


[
    {
        "model": "llama3.2:3b",
        "created_at": "2025-06-10T08:22:56.082429691Z",
        "done": false,
        "done_reason": null,
        "total_duration": null,
        "load_duration": null,
        "prompt_eval_count": null,
        "prompt_eval_duration": null,
        "eval_count": null,
        "eval_duration": null,
        "message": {
            "role": "assistant",
            "content": "As",
            "images": null,
            "tool_calls": null
        }
    },
    {
        "model": "llama3.2:3b",
        "created_at": "2025-06-10T08:22:56.574112855Z",
        "done": false,
        "done_reason": null,
        "total_duration": null,
        "load_duration": null,
        "prompt_eval_count": null,
        "prompt_eval_duration": null,
        "eval_count": null,
        "eval_duration": null,
        "message": {
            "role": "assistant",
            "content": " she",
            "images": null,
            "tool_calls": null
        }
   

---
### Tool Call

In [1]:
from ollama import chat

model = "llama3.2:3b"

# ## Using ollama client ────────────────────────────────────────────────────────────────────────

# print("\n\nNormal:\n")

# response = chat(
#     model=model, 
#     messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
#     tools=[
#         {
#             "type": "function",
#             "function": {
#                 "name": "google",
#                 "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
#                 "parameters": {
#                     "type": "object",
#                     "properties": {
#                         "search_query": {
#                             "type": "string",
#                             "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
#                         }
#                     },
#                     "required": ["search_query"]
#                 }
#             }
#         }
#     ],
# )
# import json
# print(json.dumps(response.model_dump(), indent=4))

## Using OpenAI-Compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config={
    "client_type": "ollama",
    "model": model,
})
response = client.chat.completions.create(
    model=model,
    messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
    tools=[
        {
            "type": "function",
            "function": {
                "name": "google",
                "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }
    ],
    tool_choice="required"
)
print(response.extract())

# ## Using OpenAI-Compatible async client ───────────────────────────────────────────────────────────────────

# print("\n\nAsync:\n")

# from gai.llm.openai import AsyncOpenAI
# client = AsyncOpenAI(client_config={
#     "client_type": "ollama",
#     "model": model,
# })
# response = await client.chat.completions.create(
#     model=model,
#     messages=[{"role": "user", "content": "What is the current time in Singapore?"}],
#     tools=[
#         {
#             "type": "function",
#             "function": {
#                 "name": "google",
#                 "description": "The 'google' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current date, current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
#                 "parameters": {
#                     "type": "object",
#                     "properties": {
#                         "search_query": {
#                             "type": "string",
#                             "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
#                         }
#                     },
#                     "required": ["search_query"]
#                 }
#             }
#         }
#     ],
#     tool_choice="required"
# )
# print(json.dumps(response.extract(), indent=4))




Patched:



NameError: name 'json' is not defined

---

## Structured Output

In [11]:
from pydantic import BaseModel
class Book(BaseModel):
    title: str
    summary: str
    author: str
    published_year: int
data = """Foundation is a science fiction novel by American writer
        Isaac Asimov. It is the first published in his Foundation Trilogy (later
        expanded into the Foundation series). Foundation is a cycle of five
        interrelated short stories, first published as a single book by Gnome Press
        in 1951. Collectively they tell the early story of the Foundation,
        an institute founded by psychohistorian Hari Seldon to preserve the best
        of galactic civilization after the collapse of the Galactic Empire."""       

from ollama import chat

model = "llama3.2:3b"

## Using ollama client ────────────────────────────────────────────────────────────────────────

print("\n\nNormal:\n")

response = chat(
    model=model,
    messages=[{"role":"user","content":data}],
    options={
        "temperature":0,   
    },
    format=Book.model_json_schema()    
    )
dict = response.model_dump()
import json
print(json.dumps(dict, indent=4))

## Using OpenAI-Compatible client ───────────────────────────────────────────────────────────────────

print("\n\nPatched:\n")

from gai.llm.openai import OpenAI
client = OpenAI(client_config={
    "client_type":"ollama",
    "model": model
})
response = client.beta.chat.completions.parse(
    model=model, 
    response_format=Book,
    messages=[{"role":"user","content":data}]
    )
dict = response.model_dump(exclude={"extract"})
print(json.dumps(dict, indent=4))

print("\n\nExtracted:\n")
print(response.extract())

## Using OpenAI-Compatible async client ───────────────────────────────────────────────────────────────────

print("\n\nAsync:\n")

from gai.llm.openai import AsyncOpenAI
client = AsyncOpenAI(client_config={
    "client_type":"ollama",
    "model": model
})
response = await client.beta.chat.completions.parse(
    model=model, 
    response_format=Book,
    messages=[{"role":"user","content":data}]
    )
dict = response.model_dump(exclude={"extract"})
print(json.dumps(dict, indent=4))

print("\n\nExtracted:\n")
print(response.extract())




Normal:

{
    "model": "llama3.2:3b",
    "created_at": "2025-06-10T06:42:20.912940557Z",
    "done": true,
    "done_reason": "stop",
    "total_duration": 34328004535,
    "load_duration": 22242986,
    "prompt_eval_count": 130,
    "prompt_eval_duration": 410288216,
    "eval_count": 85,
    "eval_duration": 33892671902,
    "message": {
        "role": "assistant",
        "content": "{\"title\": \"Foundation\", \"summary\": \"A science fiction novel by Isaac Asimov, published in 1951 as a single book by Gnome Press, which tells the early story of the Foundation, an institute founded by psychohistorian Hari Seldon to preserve galactic civilization after the collapse of the Galactic Empire.\", \"author\": \"Isaac Asimov\", \"published_year\": 1951}",
        "images": null,
        "tool_calls": null
    }
}


Patched:

{
    "id": "chatcmpl-659db893-3ca5-4552-b531-05d51851eb36",
    "choices": [
        {
            "finish_reason": "stop",
            "index": 0,
            "