In [None]:
import json
import os
from dotenv import load_dotenv
from openai import OpenAI
import requests

load_dotenv()

# Function to get weather data
def get_weather(location: str, unit: str = "fahrenheit") -> dict:    
    
    try:
        
        url = f"https://wttr.in/{location}?format=j1"
        resp = requests.get(url, timeout=100)
        resp.raise_for_status()
        data = resp.json()
        
        current = data["current_condition"][0]
        temp_f = int(current["temp_F"])
        temp_c = int(current["temp_C"])
        
        return {
            "location": location,
            "temperature": temp_c if unit == "celsius" else temp_f,
            "unit": unit,
            "condition": current["weatherDesc"][0]["value"],
            "humidity": int(current["humidity"]),
            "wind_mph": int(current["windspeedMiles"]),
            "feels_like": int(current["FeelsLikeC"]) if unit == "celsius" else int(current["FeelsLikeF"])
        }
    except Exception as e:
        return {"location": location, "error": str(e)}

# Define the weather tool
WEATHER_TOOL = {
    "type": "function",
    "name": "get_weather",
    "description": "Get the current weather for a given location",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The city name, e.g. San Francisco"
            },
            "unit": {
                "type": "string",
                "enum": ["celsius", "fahrenheit"],
                "description": "The temperature unit to use"
            }
        },
        "required": ["location"]
    }
}

# Prompt to get weather for San Francisco
PROMPT = "What's the weather like in San Francisco?"

# Agentic response from response API
def main():

    llama_stack_url = os.getenv("LLAMA_STACK_URL")
    if llama_stack_url is None:
        raise ValueError("LLAMA_STACK_URL is not set in the environment variables")

    model = "vllm/openai/gpt-oss-20b"
    
    print(f"Using model {model} via Llama Stack at {llama_stack_url}")

    client = OpenAI(
        base_url=f"{llama_stack_url}/v1",
        api_key="none"
    )

    # First call - model may request function calls
    print(f"\nUser: {PROMPT}")
    response = client.responses.create(
        model=model,
        tools=[WEATHER_TOOL],
        input=PROMPT
    )

    print(f"\nResponse ID: {response.id}")
    print(f"Output items: {len(response.output)}")
    
    # Check for function calls in the response output
    for item in response.output:
        print(f"  - Type: {item.type}")
        if item.type == "function_call":
            print(f"    Function: {item.name}")
            print(f"    Arguments: {item.arguments}")
            print(f"    Call ID: {item.call_id}")
            
            # Execute the function
            args = json.loads(item.arguments)
            result = get_weather(**args)
            print(f"    Result: {result}")
            
            # Send the function result back
            response = client.responses.create(
                model=model,
                tools=[WEATHER_TOOL],
                previous_response_id=response.id,
                input=[{
                    "type": "function_call_output",
                    "call_id": item.call_id,
                    "output": json.dumps(result)
                }]
            )

    print("\n" + "="*60)
    print("Response from GPT-OSS-20b with vLLM Current Main (01/14/2026)")
    print("="*60)
    print(response.output_text)


if __name__ == "__main__":
    main()

Using model vllm/openai/gpt-oss-20b via Llama Stack at http://localhost:8321

User: What's the weather like in San Francisco?

Response ID: resp_f64a2890-426c-44a8-9c4c-8c11af2353c6
Output items: 1
  - Type: function_call
    Function: get_weather
    Arguments: {"location":"San Francisco"}
    Call ID: chatcmpl-tool-b91fb03ecf523867
    Result: {'location': 'San Francisco', 'temperature': 53, 'unit': 'fahrenheit', 'condition': 'Sunny', 'humidity': 77, 'wind_mph': 7, 'feels_like': 51}

Response from GPT-OSS-20b with vLLM Current Main (01/14/2026)
**San Francisco** (Fahrenheit)

- **Temperature:** 53 °F  
- **Feels like:** 51 °F  
- **Condition:** Sunny  
- **Humidity:** 77 %  
- **Wind:** 7 mph  

Enjoy the lovely weather!
