In [221]:
from openai import OpenAI

openai_client = OpenAI()


In [222]:
import json
import re
import requests
from urllib.parse import quote_plus, unquote, urlparse

WIKIPEDIA_SEARCH_API = "https://en.wikipedia.org/w/api.php"
WIKIPEDIA_RAW_API = "https://en.wikipedia.org/w/index.php"
USER_AGENT = "tool-call-loop-wikipedia/1.0 (learning project)"



In [223]:
def _title_from_url(url: str) -> str:
    parsed = urlparse(url)
    if "/wiki/" in parsed.path:
        return unquote(parsed.path.split("/wiki/", 1)[1]).replace("_", " ")
    return ""


def search(query: str, limit: int | None = None):
    # Keep output close to Wikipedia API response shape for easier model consumption.
    url = (
        "https://en.wikipedia.org/w/api.php"
        f"?action=query&format=json&list=search&srsearch={quote_plus(query)}"
    )
    if limit is not None:
        url += f"&srlimit={limit}"

    r = requests.get(url, timeout=15, headers={"User-Agent": USER_AGENT})
    r.raise_for_status()
    data = r.json()

    return {
        "batchcomplete": data.get("batchcomplete", ""),
        "continue": data.get("continue"),
        "query": data.get("query", {}),
    }
def get_page(page_title: str):
    title = page_title
    if page_title.startswith("http://") or page_title.startswith("https://"):
        extracted = _title_from_url(page_title)
        if extracted:
            title = extracted

    url = f"{WIKIPEDIA_RAW_API}?title={quote_plus(title)}&action=raw"
    r = requests.get(url, timeout=15, headers={"User-Agent": USER_AGENT})
    r.raise_for_status()

    return {
        "title": title,
        "url": url,
        "content": r.text,
        "source": "wikipedia",
    }


def count_page_characters(page_title: str):
    page = get_page(page_title)
    return {
        "title": page["title"],
        "url": page["url"],
        "character_count": len(page["content"]),
        "source": "wikipedia",
    }

def count_titles_with_term(query: str, term: str, limit: int | None = None):
    payload = search(query=query, limit=limit)
    search_items = payload.get("query", {}).get("search", [])

    pattern = re.compile(rf"\b{re.escape(term)}\b", flags=re.IGNORECASE)
    matched_titles = []
    for item in search_items:
        title = item.get("title", "")
        if pattern.search(title):
            matched_titles.append(title)

    return {
        "query": query,
        "term": term,
        "title_match_count": len(matched_titles),
        "matched_titles": matched_titles,
        "search_returned_count": len(search_items),
        "source": "wikipedia",
    }


def make_call(tool_call):
    arguments = json.loads(tool_call.arguments)
    name = tool_call.name

    if name == "search":
        result = search(**arguments)
    elif name == "get_page":
        result = get_page(**arguments)
    elif name == "count_page_characters":
        result = count_page_characters(**arguments)
    elif name == "count_titles_with_term":
        result = count_titles_with_term(**arguments)
    else:
        result = f'not found tool "{name}"'

    return {
        "type": "function_call_output",
        "call_id": tool_call.call_id,
        "output": json.dumps(result),
    }









In [224]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search Wikipedia and return a near-original API payload with query.searchinfo, query.search, and continue.",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query for Wikipedia"
            },
        },
        "required": ["query"]
    }
}

get_page_tool = {
    "type": "function",
    "name": "get_page",
    "description": "Fetch full raw content for a Wikipedia page title or URL.",
    "parameters": {
        "type": "object",
        "properties": {
            "page_title": {
                "type": "string",
                "description": "Wikipedia page title (for example: 'Capybara') or full URL"
            }
        },
        "required": ["page_title"]
    }
}

count_page_characters_tool = {
    "type": "function",
    "name": "count_page_characters",
    "description": "Get exact character count for a Wikipedia page title or URL.",
    "parameters": {
        "type": "object",
        "properties": {
            "page_title": {
                "type": "string",
                "description": "Wikipedia page title (for example: 'Capybara') or full URL"
            }
        },
        "required": ["page_title"]
    }
}

count_titles_with_term_tool = {
    "type": "function",
    "name": "count_titles_with_term",
    "description": "Count how many returned search result titles contain a term as a whole word (case-insensitive).",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query to run on Wikipedia"
            },
            "term": {
                "type": "string",
                "description": "Word to match in titles"
            },
            "limit": {
                "type": "integer",
                "description": "Optional srlimit for search"
            }
        },
        "required": ["query", "term"]
    }
}

tools = [search_tool, get_page_tool, count_page_characters_tool, count_titles_with_term_tool]








In [225]:
instructions = """
You're a Wikipedia research assistant.

Answer the user question using Wikipedia search results and page content.

Process:
1) Do at least 3 searches with different query phrasings.
2) Fetch at least one relevant page with get_page.
3) For any character-count or length question, you MUST call count_page_characters and use only its numeric output.
4) For title-only counting questions, you MUST call count_titles_with_term and use only its numeric output.
5) Synthesize a final answer from fetched data only.

Counting rule:
- If asked how many results were returned by a search call, use len(query.search).
- Use query.searchinfo.totalhits only when asked for total matches across all pages.

If the data is insufficient, say what is missing.
"""

# question = "How many characters are in the Wikipedia page for Capybara?"
# question = "Test the search function with the query 'capybara'. How many results were returned in this API response?"
# question = "How many of the results contain the word capybara (case-insensitive) in their title?"
# question = "How many of the results contain the word capybara (case-insensitive) in their title?"
# question = "What is this page about? https://en.wikipedia.org/wiki/Capybara"
question = "What are the main threats to capybara populations?"

message_history = [
    {"role": "system", "content": instructions},
    {"role": "user", "content": question},
]






In [226]:
iteration_number = 1
max_iterations = 12
needs_character_count = any(
    phrase in question.lower() for phrase in ["how many characters", "character count", "length"]
)
has_count_call = False
needs_title_count = ("in their title" in question.lower() and "how many" in question.lower())
has_title_count_call = False

while iteration_number <= max_iterations:
    response = openai_client.responses.create(
        model="gpt-4o-mini",
        input=message_history,
        tools=tools,
    )

    print(f"iteration {iteration_number}...")
    message_history.extend(response.output)

    has_function_calls = False

    for message in response.output:
        if message.type == "function_call":
            print(f"executing {message.name}({message.arguments})...")
            if message.name == "count_page_characters":
                has_count_call = True
            if message.name == "count_titles_with_term":
                has_title_count_call = True
            tool_call_output = make_call(message)
            message_history.append(tool_call_output)
            has_function_calls = True

        if message.type == "message":
            text = message.content[0].text
            print("ASSISTANT:", text)

    print()

    if not has_function_calls:
        if needs_character_count and not has_count_call:
            message_history.append({
                "role": "system",
                "content": "You must call count_page_characters before answering this question.",
            })
            iteration_number += 1
            continue
        if needs_title_count and not has_title_count_call:
            message_history.append({
                "role": "system",
                "content": "You must call count_titles_with_term for title-only counting questions before answering.",
            })
            iteration_number += 1
            continue
        break

    iteration_number += 1




iteration 1...
executing search({"query":"capybara threats to population"})...
executing search({"query":"capybara conservation status"})...
executing search({"query":"capybara habitat loss"})...
executing get_page({"page_title":"Capybara"})...

iteration 2...
executing count_page_characters({"page_title":"Capybara"})...

iteration 3...
ASSISTANT: The primary threats to capybara populations include:

1. **Habitat Loss**: Capybaras inhabit savannas and forests near bodies of water, which are increasingly threatened by agricultural expansion, urbanization, and deforestation. These activities lead to habitat fragmentation and loss, reducing the areas where capybaras can thrive.

2. **Hunting**: In some regions, capybaras are hunted for their meat and hide. While they are not considered a threatened species overall—with a stable population in many areas—hunting can significantly impact local populations, especially where regulations are lax or enforcement is weak.

3. **Human-Wildlife Conf

In [227]:
message_history[-1]


ResponseOutputMessage(id='msg_08e877930d497123006993668a2d588199a0bb94c3e4544c5b', content=[ResponseOutputText(annotations=[], text='The primary threats to capybara populations include:\n\n1. **Habitat Loss**: Capybaras inhabit savannas and forests near bodies of water, which are increasingly threatened by agricultural expansion, urbanization, and deforestation. These activities lead to habitat fragmentation and loss, reducing the areas where capybaras can thrive.\n\n2. **Hunting**: In some regions, capybaras are hunted for their meat and hide. While they are not considered a threatened species overall—with a stable population in many areas—hunting can significantly impact local populations, especially where regulations are lax or enforcement is weak.\n\n3. **Human-Wildlife Conflict**: Capybaras are sometimes viewed as pests, competing with livestock for resources. This perception can lead to deliberate culling by farmers aiming to protect their agriculture.\n\n4. **Predation**: Natura