In [3]:
!uv add pydantic_ai
import json
import re
import requests
from urllib.parse import quote_plus, unquote, urlparse

from pydantic_ai import Agent


[2mResolved [1m237 packages[0m [2min 2ms[0m[0m
[2mAudited [1m232 packages[0m [2min 46ms[0m[0m


In [4]:
WIKIPEDIA_SEARCH_API = "https://en.wikipedia.org/w/api.php"
WIKIPEDIA_RAW_API = "https://en.wikipedia.org/w/index.php"
USER_AGENT = "tool-call-loop-wikipedia/1.0 (learning project)"


def _title_from_url(url: str) -> str:
    parsed = urlparse(url)
    if "/wiki/" in parsed.path:
        return unquote(parsed.path.split("/wiki/", 1)[1]).replace("_", " ")
    return ""


def search(query: str, limit: int | None = None):
    """Search Wikipedia and return a near-original API payload."""
    url = (
        "https://en.wikipedia.org/w/api.php"
        f"?action=query&format=json&list=search&srsearch={quote_plus(query)}"
    )
    if limit is not None:
        url += f"&srlimit={limit}"

    r = requests.get(url, timeout=15, headers={"User-Agent": USER_AGENT})
    r.raise_for_status()
    data = r.json()

    return {
        "batchcomplete": data.get("batchcomplete", ""),
        "continue": data.get("continue"),
        "query": data.get("query", {}),
    }


def get_page(page_title: str):
    """Fetch raw Wikipedia page content by title or URL."""
    title = page_title
    if page_title.startswith("http://") or page_title.startswith("https://"):
        extracted = _title_from_url(page_title)
        if extracted:
            title = extracted

    url = f"{WIKIPEDIA_RAW_API}?title={quote_plus(title)}&action=raw"
    r = requests.get(url, timeout=15, headers={"User-Agent": USER_AGENT})
    r.raise_for_status()

    return {
        "title": title,
        "url": url,
        "content": r.text,
        "source": "wikipedia",
    }


def count_page_characters(page_title: str):
    """Return exact character count for a Wikipedia page."""
    page = get_page(page_title)
    return {
        "title": page["title"],
        "url": page["url"],
        "character_count": len(page["content"]),
        "source": "wikipedia",
    }


def count_titles_with_term(query: str, term: str, limit: int | None = None):
    """Count returned search result titles containing a whole-word term, case-insensitive."""
    payload = search(query=query, limit=limit)
    search_items = payload.get("query", {}).get("search", [])

    pattern = re.compile(rf"\b{re.escape(term)}\b", flags=re.IGNORECASE)
    matched_titles = []
    for item in search_items:
        title = item.get("title", "")
        if pattern.search(title):
            matched_titles.append(title)

    return {
        "query": query,
        "term": term,
        "title_match_count": len(matched_titles),
        "matched_titles": matched_titles,
        "search_returned_count": len(search_items),
        "source": "wikipedia",
    }


In [5]:
def print_tool_calls(messages):
    found = False
    for m in messages:
        for p in m.parts:
            part_kind = getattr(p, "part_kind", None)
            if part_kind == "tool-call":
                print(f"TOOL CALL: {p.tool_name}({p.args})")
                found = True
            if part_kind == "tool-return":
                print(f"TOOL RETURN: {p.tool_name}")
    if not found:
        print("TOOL CALL: none")


def print_assistant_text(messages):
    for m in messages:
        for p in m.parts:
            if getattr(p, "part_kind", None) == "text":
                print("ASSISTANT:", p.content)


In [6]:
instructions = """
You're a Wikipedia research assistant.

Use tools for factual retrieval.
Rules:
- For result-count questions, use query.search length from search output.
- For total matches, use query.searchinfo.totalhits.
- For title-only counting questions, call count_titles_with_term.
- For character-count questions, call count_page_characters.
"""

wiki_agent = Agent(
    name="wikipedia-tools",
    model="openai:gpt-4o-mini",
    instructions=instructions,
    tools=[search, get_page, count_page_characters, count_titles_with_term],
)


In [7]:
queries = [
    # "How many results are returned by searching for capybara?",
    # "How many total matches exist for capybara?",
    # "How many of those returned results contain the word capybara in the title?",
    # "How many characters are in the Wikipedia page for Capybara?",
    # = "What is this page about? https://en.wikipedia.org/wiki/Capybara"
    "What are the main threats to capybara populations?"
]


In [8]:
messages = []

for i, query in enumerate(queries, start=1):
    print(f"\n=== Query {i} ===")
    print("USER:", query)

    result = await wiki_agent.run(query, message_history=messages)
    new_messages = result.new_messages()

    print_tool_calls(new_messages)
    print_assistant_text(new_messages)

    messages.extend(new_messages)



=== Query 1 ===
USER: What are the main threats to capybara populations?
TOOL CALL: search({"query":"capybara threats","limit":5})
TOOL RETURN: search
TOOL CALL: search({"query":"capybara","limit":5})
TOOL RETURN: search
TOOL CALL: get_page({"page_title":"Capybara"})
TOOL RETURN: get_page
ASSISTANT: The main threats to capybara populations include:

1. **Hunting**: Capybaras are hunted for their meat and hides, which is a significant threat in some regions of South America where hunting is prevalent.

2. **Habitat Loss**: Their natural habitats are often destroyed due to agricultural expansion, urban development, and land conversion for livestock. This leads to a reduction in their living spaces and food availability.

3. **Human-Wildlife Conflict**: In some areas, capybaras are perceived as competition for livestock grazing, leading to them being killed by farmers.

4. **Predation**: Natural predators such as big cats (jaguars and cougars), caimans, and anacondas pose a constant thre

In [9]:
messages[-1]


ModelResponse(parts=[TextPart(content='The main threats to capybara populations include:\n\n1. **Hunting**: Capybaras are hunted for their meat and hides, which is a significant threat in some regions of South America where hunting is prevalent.\n\n2. **Habitat Loss**: Their natural habitats are often destroyed due to agricultural expansion, urban development, and land conversion for livestock. This leads to a reduction in their living spaces and food availability.\n\n3. **Human-Wildlife Conflict**: In some areas, capybaras are perceived as competition for livestock grazing, leading to them being killed by farmers.\n\n4. **Predation**: Natural predators such as big cats (jaguars and cougars), caimans, and anacondas pose a constant threat to capybara populations, particularly to younger individuals.\n\n5. **Urbanization**: Capybaras have adapted to urban environments to some degree, but urban encroachment can still pose risks, including vehicle strikes and conflicts with humans.\n\nDesp