In [5]:
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
import requests

load_dotenv()

# Function to get weather data
def get_weather(location: str, unit: str = "fahrenheit") -> dict:
    
    try:
        
        url = f"https://wttr.in/{location}?format=j1"
        resp = requests.get(url, timeout=100)
        resp.raise_for_status()
        data = resp.json()
        
        current = data["current_condition"][0]
        temp_f = int(current["temp_F"])
        temp_c = int(current["temp_C"])
        
        return {
            "location": location,
            "temperature": temp_c if unit == "celsius" else temp_f,
            "unit": unit,
            "condition": current["weatherDesc"][0]["value"],
            "humidity": int(current["humidity"]),
            "wind_mph": int(current["windspeedMiles"]),
            "feels_like": int(current["FeelsLikeC"]) if unit == "celsius" else int(current["FeelsLikeF"])
        }
    except Exception as e:
        return {"location": location, "error": str(e)}

# Define the weather tool
WEATHER_TOOL = {
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get the current weather for a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city name, e.g. San Francisco"
                },
                "unit": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "The temperature unit to use"
                }
            },
            "required": ["location"]
        }
    }
}

# Prompt to get weather for San Francisco
PROMPT = "What's the weather like in San Francisco?"

# Function tool calling via Chat Completions API
def main():

    llama_stack_url = os.getenv("LLAMA_STACK_URL")
    if llama_stack_url is None:
        raise ValueError("LLAMA_STACK_URL is not set in the environment variables")

    model = "vllm/openai/gpt-oss-120b"
    
    print(f"Using model {model} via Llama Stack at {llama_stack_url}")

    client = OpenAI(
        base_url=f"{llama_stack_url}/v1",
        api_key="none"
    )

    messages = [{"role": "user", "content": PROMPT}]

    # First call - model may request tool calls
    print(f"\nUser: {PROMPT}")
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=[WEATHER_TOOL],
        tool_choice="auto"
    )

    assistant_message = response.choices[0].message

    print(f"\nResponse ID: {response.id}")
    print(f"Tool calls: {len(assistant_message.tool_calls) if assistant_message.tool_calls else 0}")
    
    # Check for tool calls in the response
    if assistant_message.tool_calls:
        messages.append(assistant_message)
        
        for tool_call in assistant_message.tool_calls:
            print(f"  - Type: function_call")
            print(f"    Function: {tool_call.function.name}")
            print(f"    Arguments: {tool_call.function.arguments}")
            print(f"    Call ID: {tool_call.id}")
            
            # Execute the function
            args = json.loads(tool_call.function.arguments)
            result = get_weather(**args)
            print(f"    Result: {result}")
            
            # Add tool result to messages
            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": json.dumps(result)
            })
        
        # Second call - get final response with tool results
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=[WEATHER_TOOL]
        )
        final_response = response.choices[0].message.content
    else:
        final_response = assistant_message.content

    print("\n" + "="*60)
    print("Response from GPT-OSS-120b via Chat Completions API (01/15/2026)")
    print("="*60)
    print(final_response)


if __name__ == "__main__":
    main()

Using model vllm/openai/gpt-oss-120b via Llama Stack at http://localhost:8321

User: What's the weather like in San Francisco?

Response ID: chatcmpl-8f43f3ae8cf4e641
Tool calls: 1
  - Type: function_call
    Function: get_weather
    Arguments: {"location": "San Francisco", "unit": "fahrenheit"}
    Call ID: chatcmpl-tool-8b032d11005dd982
    Result: {'location': 'San Francisco', 'temperature': 52, 'unit': 'fahrenheit', 'condition': 'Mist', 'humidity': 97, 'wind_mph': 8, 'feels_like': 49}

Response from GPT-OSS-120b via Chat Completions API (01/15/2026)
In San Francisco right now it’s **52 °F** with a misty, overcast feel. The humidity is high at about 97 %, the wind is blowing around 8 mph, and it feels a little cooler—around **49 °F**.
