In [None]:
from kamiwaza_sdk import KamiwazaClient as kz
import openai
from pydantic import BaseModel

In [None]:
client = kz("http://localhost:7777/api/")
client.serving.list_active_deployments()

In [None]:
openai_client = client.openai.get_client('Qwen3-8B-GGUF')

## Define the runtime tool & its JSON schema
We expose a dummy weather function so the demo is entirely selfâ€‘contained.


In [None]:
from typing import Dict, Any
import json, time
from pydantic import BaseModel, Field

class WeatherParams(BaseModel):
    city: str = Field(..., description="City name")
    state: str = Field(..., description="US state abbreviation, e.g. CA")
    unit: str = Field(..., description="Either 'celsius' or 'fahrenheit'")

def get_current_weather(city: str, state: str, unit: str) -> Dict[str, Any]:
    """Pretend weather service so we don't need a real API call."""
    fake_temp_f = 100  # you could randomise this or call a real API
    temp = fake_temp_f if unit == "fahrenheit" else round((fake_temp_f - 32) * 5 / 9)
    time.sleep(0.3)  # small pause so the streaming looks lively
    return {
        "temperature": temp,
        "unit": unit,
        "city": city,
        "state": state,
        "description": "clear skies with a light breeze",
    }

weather_tool_schema = {
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given US city",
        "parameters": WeatherParams.schema(),  # Pydantic â†’ JSON Schema
    },
}

tools = [weather_tool_schema]

local_tool_registry = {"get_current_weather": get_current_weather}
print("âœ… Tool registered â€” ready for the chat call")


##Â Kick off a streaming chat with tool support
The system prompt asks the model to *think in public* using `<thinking>` 
tags, then decide whether to call the weather tool.


In [None]:
messages = [
    {
        "role": "system",
        "content": (
            "You are a helpful assistant. When you receive tool outputs, "
            "always provide a complete and helpful final answer to the user's question. "
            "Be conversational and provide practical advice."
        ),
    },
    {
        "role": "user",
        "content": "should i wear short sleeves or long sleeves in nyc today?",
    },
]

print("\nðŸ“¡ **Streaming** â€” watch the tokens, tool call, and final answer arrive\n")

# First API call with tools
response_stream = openai_client.chat.completions.create(
    model="model", 
    messages=messages,
    tools=tools,
    tool_choice="auto",  # Let the model decide when to use tools
    stream=True,
)

# Track tool calls as they stream in
tool_calls = []
current_tool_call = None
accumulated_content = ""

for chunk in response_stream:
    if not chunk.choices:
        continue
    
    choice = chunk.choices[0]
    delta = choice.delta
    
    # Handle reasoning content (if your model supports it)
    if hasattr(delta, "reasoning_content") and delta.reasoning_content:
        print("\033[36m" + delta.reasoning_content + "\033[0m", end="", flush=True)
    
    # Handle regular content
    if delta.content:
        accumulated_content += delta.content
        print(delta.content, end="", flush=True)
    
    # Handle tool calls
    if delta.tool_calls:
        for tc_delta in delta.tool_calls:
            # Start a new tool call
            if tc_delta.index == 0 and not current_tool_call:
                current_tool_call = {
                    "id": tc_delta.id or f"call_{uuid.uuid4().hex[:8]}",
                    "type": "function",
                    "function": {
                        "name": tc_delta.function.name,
                        "arguments": ""
                    }
                }
            
            # Accumulate arguments
            if tc_delta.function.arguments:
                current_tool_call["function"]["arguments"] += tc_delta.function.arguments

# Execute tool calls if any
if choice.finish_reason == "tool_calls" and current_tool_call:
    print(f"\n\nðŸ”§ Model requested tool: {current_tool_call['function']['name']}")
    
    # Parse arguments and execute
    args = json.loads(current_tool_call["function"]["arguments"])
    print(f"   with args: {args}")
    
    tool_name = current_tool_call["function"]["name"]
    tool_result = local_tool_registry[tool_name](**args)
    print(f"ðŸ”§ Tool response: {tool_result}")
    
    # Add the assistant's tool call message
    messages.append({
        "role": "assistant",
        "content": accumulated_content if accumulated_content else None,
        "tool_calls": [current_tool_call]
    })
    
    # Add the tool response
    messages.append({
        "role": "tool",
        "tool_call_id": current_tool_call["id"],
        "name": tool_name,
        "content": json.dumps(tool_result)
    })
    
    # Make a follow-up call to get the final answer
    print("\n\nðŸ’­ Getting final answer...\n")
    
    follow_stream = openai_client.chat.completions.create(
        model="model",
        messages=messages,
        stream=True,
        # No tools this time - we want a final answer
    )
    
    # Stream the final response
    for follow_chunk in follow_stream:
        if not follow_chunk.choices:
            continue
        
        follow_choice = follow_chunk.choices[0]
        follow_delta = follow_choice.delta
        
        # Handle reasoning content
        if hasattr(follow_delta, "reasoning_content") and follow_delta.reasoning_content:
            print("\033[36m" + follow_delta.reasoning_content + "\033[0m", end="", flush=True)
        
        # Handle final answer content
        if follow_delta.content:
            print(follow_delta.content, end="", flush=True)
    
    print("\n\nðŸŽ‰ **Done!**")

# If no tool was called, we're done
elif choice.finish_reason == "stop":
    print("\n\nðŸŽ‰ **Done!** (No tool calls needed)")