In [None]:
import json
import os
from datetime import date
from dotenv import load_dotenv
from openai import OpenAI
import httpx

load_dotenv()

LLAMA_STACK_URL = os.getenv("LLAMA_STACK_URL", "http://127.0.0.1:8321")
MODEL_ID = "vllm/openai/gpt-oss-120b"
MODEL_LABEL = "GPT-OSS-120b"
VLLM_VERSION = "0.11.2+rhai5"
RUN_DATE = os.getenv("RUN_DATE", date.today().isoformat())

# Set LLAMA_STACK_INSECURE=1 if you need to disable TLS verification
VERIFY_SSL = os.getenv("LLAMA_STACK_INSECURE", "0") != "1"

client = OpenAI(
    base_url=f"{LLAMA_STACK_URL}/v1",
    api_key="none",
    http_client=httpx.Client(verify=VERIFY_SSL)
)

PROMPTS = [
    "What were the top 3 headlines about NVIDIA in the last 7 days? Include sources.",
    "What is the latest stable release of vLLM and its release date? Cite sources.",
    "What is the most recent U.S. CPI (inflation) release and its month-over-month value? Cite sources."
]


def normalize_result(result):
    if isinstance(result, dict):
        return result
    if hasattr(result, "model_dump"):
        return result.model_dump()
    if hasattr(result, "__dict__"):
        return result.__dict__
    return {"value": str(result)}


def print_search_results(item):
    results = getattr(item, "results", None)
    if results is None:
        return
    print("    Results:")
    for idx, result in enumerate(results, 1):
        data = normalize_result(result)
        title = data.get("title")
        url = data.get("url")
        snippet = data.get("snippet") or data.get("content")
        print(f"      {idx}. {title}")
        if url:
            print(f"         URL: {url}")
        if snippet:
            print(f"         Snippet: {snippet}")


def run_prompt(prompt):
    print(f"\nUser: {prompt}")
    response = client.responses.create(
        model=MODEL_ID,
        tools=[{"type": "web_search"}],
        input=prompt
    )

    print(f"Response ID: {response.id}")
    search_triggered = False

    for item in response.output:
        item_type = getattr(item, "type", "unknown")
        print(f"  - Type: {item_type}")
        if "search" in item_type:
            search_triggered = True
        query = getattr(item, "query", None)
        if query:
            print(f"    Query: {query}")
        print_search_results(item)

    print(f"Search triggered: {search_triggered}")
    print("\n" + "=" * 60)
    print(f"Response from {MODEL_LABEL} with vLLM {VLLM_VERSION} ({RUN_DATE})")
    print("=" * 60)
    print(response.output_text)
    return response


for prompt in PROMPTS:
    run_prompt(prompt)

client.close()


# Run Output (2026-01-19)

## Prompt 1

**Prompt:** What were the top 3 headlines about NVIDIA in the last 7 days? Include sources.

**Search triggered:** True

**Response status:** completed

**Response text:**

Top 3 NVIDIA headlines (Jan 12 - Jan 19 2026)

| Date (2026) | Headline | Why it’s a top story | Source |
|-------------|----------|----------------------|--------|
| Jan 12 | NVIDIA BioNeMo Platform Adopted by Life-Sciences Leaders to Accelerate AI-Driven Drug Discovery | BioNeMo platform partnership expands NVIDIA into life sciences. | https://investor.nvidia.com/news/press-release-details/2026/NVIDIA-BioNeMo-Platform-Adopted-by-Life-Sciences-Leaders-to-Accelerate-AI-Driven-Drug-Discovery/default.aspx |
| Jan 13 | U.S. clears NVIDIA H200 AI-chip exports to China with conditions | Export approval for H200 chips has geopolitical impact. | https://www.reuters.com/world/asia-pacific/us-eases-regulations-nvidia-h200-chip-exports-china-2026-01-13/ |
| Jan 14 | China’s customs agents told NVIDIA H200 chips are not permitted to enter | China customs pushback after U.S. approval. | https://www.reuters.com/world/china/chinas-customs-agents-told-nvidias-h200-chips-are-not-permitted-sources-say-2026-01-14/ |

---

## Prompt 2

**Prompt:** What is the latest stable release of vLLM and its release date? Cite sources.

**Search triggered:** True

**Response status:** incomplete

**Response text:** (empty)

---

## Prompt 3

**Prompt:** What is the most recent U.S. CPI (inflation) release and its month-over-month value? Cite sources.

**Search triggered:** True

**Response status:** completed

**Response text:**

Most recent U.S. CPI release
- Release: Consumer Price Index (CPI-U) for December 2025
- Publication date: January 13 2026
- Month-over-month change: +0.3% (seasonally adjusted)

Sources
1. https://www.bls.gov/news.release/pdf/cpi.pdf
2. https://www.bls.gov/news.release/cpi.nr0.htm

---

## Full Response JSON (with metadata)

### Prompt 1

```json
{
  "id": "resp_d1f906e4-ac5e-4ede-8865-7edfc828eb0c",
  "created_at": 1768832287.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": null,
  "model": "vllm/openai/gpt-oss-120b",
  "object": "response",
  "output": [
    {
      "id": "fc_fd4a3698-c265-47d6-b597-0c9a88703340",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_09a5fa9e-080f-4950-9f0a-0a3f19087867",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_385fc541-f8b8-4a07-8c3a-d9c725e75a8b",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_7294642f-d73d-4a91-834f-5abcd03b0526",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "msg_34099dc7-d279-434f-9a08-c980b323f754",
      "content": [
        {
          "annotations": [],
          "text": "**Top\u202f3 NVIDIA headlines (Jan\u202f12\u202f\u2013\u202fJan\u202f19\u202f2026)**  \n\n| Date (2026) | Headline | Why it\u2019s a top story | Source |\n|-------------|----------|----------------------|--------|\n| **Jan\u202f12** | **\u201cNVIDIA BioNeMo Platform Adopted by Life\u2011Sciences Leaders to Accelerate AI\u2011Driven Drug Discovery\u201d** | The BioNeMo platform \u2013 NVIDIA\u2019s open\u2011development suite for AI\u2011driven biology \u2013 landed a major partnership with Eli\u202fLilly, marking the first large\u2011scale collaboration that blends AI with pharmaceutical R&D. The announcement was made at the J.P.\u202fMorgan Healthcare Conference and signals a new revenue stream beyond chips. | \u3010https://investor.nvidia.com/news/press-release-details/2026/NVIDIA-BioNeMo-Platform-Adopted-by-Life-Sciences-Leaders-to-Accelerate-AI-Driven-Drug-Discovery/default.aspx\u3011 |\n| **Jan\u202f13** | **\u201cU.S. clears NVIDIA H200 AI\u2011chip exports to China with conditions\u201d** | The U.S. government granted a limited export licence for NVIDIA\u2019s H200, the company\u2019s most powerful AI accelerator. The move is controversial because the chips could bolster Chinese AI capabilities, yet it reflects pressure from industry to keep Chinese customers supplied. | \u3010https://www.reuters.com/world/asia-pacific/us-eases-regulations-nvidia-h200-chip-exports-china-2026-01-13/\u3011 |\n| **Jan\u202f14** | **\u201cChina\u2019s customs agents told NVIDIA H200 chips are not permitted to enter\u201d** | Just a day after the export licence, Chinese customs officials warned that H200 chips are \u201cnot permitted\u201d for import, fueling a trade standoff. The clash has immediate supply\u2011chain implications for NVIDIA\u2019s biggest growth market and has been driving market volatility. | \u3010https://www.reuters.com/world/china/chinas-customs-agents-told-nvidias-h200-chips-are-not-permitted-sources-say-2026-01-14/\u3011 |\n\nThese three stories dominated coverage of NVIDIA in the last week \u2013 they involve a strategic expansion into biotech, a high\u2011stakes U.S. export decision on its flagship AI chip, and the resulting geopolitical push\u2011back from China.",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": "completed",
      "type": "message"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": null,
  "tool_choice": null,
  "tools": [
    {
      "type": "web_search",
      "filters": null,
      "search_context_size": "medium",
      "user_location": null
    }
  ],
  "top_p": null,
  "background": null,
  "conversation": null,
  "max_output_tokens": null,
  "max_tool_calls": null,
  "previous_response_id": null,
  "prompt": null,
  "prompt_cache_key": null,
  "reasoning": null,
  "safety_identifier": null,
  "service_tier": null,
  "status": "completed",
  "text": {
    "format": {
      "type": "text"
    },
    "verbosity": null
  },
  "top_logprobs": null,
  "truncation": null,
  "usage": {
    "input_tokens": 13179,
    "input_tokens_details": null,
    "output_tokens": 1238,
    "output_tokens_details": null,
    "total_tokens": 14417
  },
  "user": null
}
```

### Prompt 2

```json
{
  "id": "resp_e6bc93b9-d8c2-48d1-bb1a-d9bbe22a07c1",
  "created_at": 1768832331.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": null,
  "model": "vllm/openai/gpt-oss-120b",
  "object": "response",
  "output": [
    {
      "id": "fc_77da2891-5cea-4df7-a4ef-895dcdb3cffe",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_157b6131-5782-4e12-a222-bac0782579d6",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_83bd5e03-8c52-4c9c-93e1-429b21045b8e",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_a2f05c99-62ec-48ab-94b1-b252b57b9174",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_17038aea-da47-4eb7-9477-afd5c0dc0c1e",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_e7b5c7be-22f6-44f2-92ae-ca7a7bf0d7d6",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_748cc203-7a7b-4367-a092-1667f6a02498",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_5716b3fa-a00f-46a7-9dca-9020cdb31850",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_796a9b56-3f06-454c-bbae-ba0f5eecb252",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_b4cde534-4a18-4f3b-8943-1b7e0ac70ae2",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": null,
  "tool_choice": null,
  "tools": [
    {
      "type": "web_search",
      "filters": null,
      "search_context_size": "medium",
      "user_location": null
    }
  ],
  "top_p": null,
  "background": null,
  "conversation": null,
  "max_output_tokens": null,
  "max_tool_calls": null,
  "previous_response_id": null,
  "prompt": null,
  "prompt_cache_key": null,
  "reasoning": null,
  "safety_identifier": null,
  "service_tier": null,
  "status": "incomplete",
  "text": {
    "format": {
      "type": "text"
    },
    "verbosity": null
  },
  "top_logprobs": null,
  "truncation": null,
  "usage": {
    "input_tokens": 76137,
    "input_tokens_details": null,
    "output_tokens": 907,
    "output_tokens_details": null,
    "total_tokens": 77044
  },
  "user": null
}
```

### Prompt 3

```json
{
  "id": "resp_4c0b12b7-b334-4765-9636-603e235b6d7f",
  "created_at": 1768832382.0,
  "error": null,
  "incomplete_details": null,
  "instructions": null,
  "metadata": null,
  "model": "vllm/openai/gpt-oss-120b",
  "object": "response",
  "output": [
    {
      "id": "fc_ab859f95-5d99-4843-87ba-c8931c6cfe63",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_d0709c5e-1003-4a76-a06e-6bc4b8f249d2",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_032a8bad-4925-4172-8d20-f93e54ad1e85",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_97c7966a-95a3-4a5d-b55a-1c0b09be4ece",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "fc_31952711-013c-4add-8f11-fbd794bbd5b6",
      "action": null,
      "status": "completed",
      "type": "web_search_call"
    },
    {
      "id": "msg_0a80e7d3-226f-45ad-8996-3ba0c3d35e99",
      "content": [
        {
          "annotations": [],
          "text": "**Most recent U.S. CPI release**  \n- **Release:** *Consumer Price Index \u2013 December\u202f2025* (published\u202fJanuary\u202f13\u202f2026)  \n- **Month\u2011over\u2011month change:** **+0.3\u202f%** (seasonally\u2011adjusted increase for the all\u2011items index)\n\n**Why this is the latest release**  \nThe Bureau of Labor Statistics\u2019 schedule shows that the December\u202f2025 CPI data were released on\u202fTuesday,\u202fJanuary\u202f13,\u202f2026. The next scheduled CPI release (for January\u202f2026) is set for\u202fFebruary\u202f11,\u202f2026, which has not yet occurred, making the December\u202f2025 release the most recent one available.\n\n**Sources**\n\n1. **BLS news release PDF** \u2013 \u201cThe Consumer Price Index for All Urban Consumers (CPI\u2011U) increased 0.3\u202fpercent on a seasonally adjusted basis in December\u2026\u201d\u3010https://www.bls.gov/news.release/pdf/cpi.pdf\u3011  \n2. **BLS news\u2011release summary page** \u2013 Same statement that the CPI \u201cincreased 0.3\u202fpercent on a seasonally adjusted basis in December\u201d\u3010https://www.bls.gov/news.release/cpi.nr0.htm\u3011  \n\nThese official BLS documents confirm that the latest CPI figure is for December\u202f2025 and that the month\u2011over\u2011month change was +0.3\u202f%.",
          "type": "output_text",
          "logprobs": null
        }
      ],
      "role": "assistant",
      "status": "completed",
      "type": "message"
    }
  ],
  "parallel_tool_calls": true,
  "temperature": null,
  "tool_choice": null,
  "tools": [
    {
      "type": "web_search",
      "filters": null,
      "search_context_size": "medium",
      "user_location": null
    }
  ],
  "top_p": null,
  "background": null,
  "conversation": null,
  "max_output_tokens": null,
  "max_tool_calls": null,
  "previous_response_id": null,
  "prompt": null,
  "prompt_cache_key": null,
  "reasoning": null,
  "safety_identifier": null,
  "service_tier": null,
  "status": "completed",
  "text": {
    "format": {
      "type": "text"
    },
    "verbosity": null
  },
  "top_logprobs": null,
  "truncation": null,
  "usage": {
    "input_tokens": 21200,
    "input_tokens_details": null,
    "output_tokens": 1249,
    "output_tokens_details": null,
    "total_tokens": 22449
  },
  "user": null
}
```
