# Lesson 3: Chatbot Example

In [1]:
#!/usr/bin/env python3
# Copyright 2025 LA (c)
"""Chatbot Example MCP."""


'Chatbot Example MCP.'

In [2]:
import json
from collections.abc import Callable
from pathlib import Path
from typing import Any, Literal, Self, cast

import anthropic
import arxiv  # pyright: ignore[reportMissingTypeStubs]
import pydantic
from dotenv import load_dotenv


In [3]:
anthropic.__version__

'0.60.0'

In [4]:
# 2. Constants
PAPER_DIR = Path("papers")

In [5]:
# 3. Pydantic Models


class SearchPapersArgs(pydantic.BaseModel):
    """Defines the arguments required for the 'search_papers' tool.

    This model validates the input for searching papers, ensuring that a
    topic is provided and the number of results is a valid integer.

    Attributes:
        topic: The topic to search for on arXiv.
        max_results: The maximum number of papers to retrieve.
    """

    topic: str
    max_results: int = 5


class ExtractInfoArgs(pydantic.BaseModel):
    """Defines the argument required for the 'extract_info' too.

    Attributes:
        paper_id (str): The ID of the paper to look for.
    """

    paper_id: str


class Tool(pydantic.BaseModel):
    """A model for defining a tool for the Anthropic API.

    Attributes:
        name: The name of the tool.
        description: A short description of what the tool does.
        input_schema: The JSON schema defining the tool's input parameters.
    """

    name: str
    description: str
    input_schema: dict[str, Any]

    @classmethod
    def from_function(
        cls,
        func: Callable[..., Any],
        arg_model: type[pydantic.BaseModel],
    ) -> Self:
        """Creates a Tool instance from a function and a Pydantic model.

        This method inspects a function and its corresponding argument model
        to build a complete and validated tool definition.

        Args:
            func: The function to be converted into a tool.
            arg_model: The Pydantic model that defines the function's arguments.

        Returns:
            An instance of the Tool class, configured with the function's
            metadata and argument schema.
        """
        description = ""

        # Checks is docstring exists
        if func.__doc__:
            description = next((s for s in func.__doc__.splitlines() if s.strip()), "")

        return cls(
            name=func.__name__,
            description=description,
            input_schema=arg_model.model_json_schema(),
        )


In [6]:
# Type Aliases
ToolArgs = SearchPapersArgs | ExtractInfoArgs
ToolName = Literal["search_papers", "extract_info"]

In [7]:
@pydantic.validate_call
def search_papers(topic: str, max_results: int = 5) -> list[str]:
    """Search for papers on arXiv based on topic and store their informations.

    Args:
        topic (str): The topic of the search for
        max_results (int, optional): Maximum number of result to retrieve. Defaults to 5.

    Returns:
        list[str]: List of paper IDs found in the search.
    """
    # User arxiv to find the papers
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query=topic,
        max_results=max_results,
        sort_by=arxiv.SortCriterion.Relevance,
    )

    papers = client.results(search)

    # Create directory for this topic
    path: Path = Path(PAPER_DIR) / topic.lower().replace(" ", "_")
    path.mkdir(parents=True, exist_ok=True)

    file_path: Path = path / "papers_info.json"

    try:
        with file_path.open(mode="r", encoding="utf-8") as json_file:
            papers_info = json.load(fp=json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info
    paper_ids: list[str] = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            "title": paper.title,
            "authors": [author.name for author in paper.authors],
            "summary": paper.summary,
            "pdf_url": paper.pdf_url,
            "published": str(paper.published.date()),
        }

        papers_info[paper.get_short_id()] = paper_info

    # Save updated papers_info to json file
    with file_path.open(mode="w", encoding="utf-8") as json_file:
        json.dump(obj=papers_info, fp=json_file, indent=2)

    print(f"Results are saved in: {file_path}")

    return paper_ids

In [8]:
search_papers(topic="computer")

Results are saved in: papers\computer\papers_info.json


['1310.7911v2',
 'math/9711204v1',
 '2208.00733v1',
 '2504.07020v1',
 '2403.03925v1']

## Extract Info

The second tool looks for information about a specific paper across all topic directories inside the papers directory.

In [9]:
# Refactor using a guard clause


@pydantic.validate_call
def extract_info(paper_id: str) -> str:
    """Search and retrieve information about a specific paper.

    The Function scans all subdirectories within the main paper directory
    for "papers_info.json" file and searches the file for the specific paper

    Args:
        paper_id (str): ID of paper to find.

    Returns:
        str: JSON-formatted string of the paper's information if found.
             Otherwise a message that the paper was not found.
    """
    base_dir = Path(PAPER_DIR)

    # Use glob to find all papers_info.json files in subdirectories
    for file_path in base_dir.glob("*/papers_info.json"):
        try:
            # Use the Path Object's read_text method
            papers_info = json.loads(file_path.read_text(encoding="utf-8"))

            if paper_id in papers_info:
                # Return the specific paper's info, nicely formatted
                return json.dumps(papers_info[paper_id], indent=2)

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from {file_path}: {e}")

    return f"There's no saved information related to paper {paper_id}."

In [10]:
extract_info(paper_id="1310.7911v2")

'{\n  "title": "Compact manifolds with computable boundaries",\n  "authors": [\n    "Zvonko Iljazovic"\n  ],\n  "summary": "We investigate conditions under which a co-computably enumerable closed set\\nin a computable metric space is computable and prove that in each locally\\ncomputable computable metric space each co-computably enumerable compact\\nmanifold with computable boundary is computable. In fact, we examine the notion\\nof a semi-computable compact set and we prove a more general result: in any\\ncomputable metric space each semi-computable compact manifold with computable\\nboundary is computable. In particular, each semi-computable compact\\n(boundaryless) manifold is computable.",\n  "pdf_url": "http://arxiv.org/pdf/1310.7911v2",\n  "published": "2013-10-29"\n}'

## Tools Schema

Schema of each tool which you will provide to the LLM

In [11]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for",
                },
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve",
                    "default": 5,
                },
            },
            "required": ["topic"],
        },
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for",
                },
            },
            "required": ["paper_id"],
        },
    },
]

## Tool Mapping

This code handles tool mapping and execution.

In [12]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info,
}

In [13]:
# @pydantic.validate_call
def execute_tool(tool_name: ToolName, tool_args: ToolArgs) -> str:
    """Selects and executes a tool with validated arguments.

    This function acts as a dispatcher. It uses Pydantic to validate the tool
    name and its corresponding arguments, then calls the appropriate
    tool function and normalizes the output into a string.

    Args:
        tool_name: The name of the tool to execute.
        tool_args: A Pydantic model containing the validated
            arguments for the specified tool.

    Returns:
        A formatted string representation of the tool's result.
    """
    # Look up the function to call (search_papers or extract_info)
    func_to_call = mapping_tool_function[tool_name]

    # Convert the Pydantic model back to a dict and call the function
    # result = mapping_tool_function[tool_name](**tool_args)  # noqa: ERA001
    result = func_to_call(**tool_args.model_dump())

    if isinstance(result, list):
        result = ", ".join(result)

    elif isinstance(result, dict):
        # Convert dictionaries to formatted JSON strings
        result = json.dumps(result, indent=2)

    else:
        # For any other type, convert using str()
        result = str(result)

    return result

## ChatBot Code

The chatbot handles the user's queries one by one, but it does not persist memory across the queries.

In [14]:
load_dotenv()
client = anthropic.Anthropic()

### Query Processing

In [15]:
# Automatically generate the correctly typed tools list
tools = [
    Tool.from_function(search_papers, SearchPapersArgs),
    Tool.from_function(extract_info, ExtractInfoArgs),
]

# Convert the list of Tool objects to a list of dicts for the API
tools_for_api: list[anthropic.types.ToolParam] = [
    cast("anthropic.types.ToolParam", tool.model_dump()) for tool in tools
]

In [16]:
def process_query(query: str) -> None:
    """Sends a query to the model and handles the tool use conversation.

    Args:
        query: The user's input string.
    """
    messages: list[anthropic.types.MessageParam] = [
        {
            "role": "user",
            "content": query,
        },
    ]

    while True:
        response = client.messages.create(
            max_tokens=2024,
            model="claude-3-sonnet-20240229",
            tools=tools_for_api,
            messages=messages,
        )

        messages.append({"role": response.role, "content": response.content})

        # If the model's response does not require a tool, break the loop.
        if response.stop_reason != "tool_use":
            break

        # If the model requires a tool, prepare the results.
        tool_results: list[anthropic.types.ToolResultBlockParam] = []
        for content_block in response.content:
            if content_block.type == "tool_use":
                tool_name = content_block.name
                tool_args_dict = content_block.input
                tool_id = content_block.id

                print(f"Calling tool {tool_name} with args {tool_args_dict}")

                # Validate the arguments against the correct Pydantic model
                if tool_name == "search_papers":
                    validated_args = SearchPapersArgs.model_validate(tool_args_dict)
                    result = execute_tool(tool_name=tool_name, tool_args=validated_args)
                elif tool_name == "extract_info":
                    validated_args = ExtractInfoArgs.model_validate(tool_args_dict)
                    result = execute_tool(tool_name=tool_name, tool_args=validated_args)
                else:
                    result = f"Error: Unknown tool {tool_name}."

                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tool_id,
                    "content": result,
                })

        # Add the tool results to the conversation history for the next turn
        messages.append({"role": "user", "content": tool_results})

    # Print the final text response from the assistant
    for content in response.content:
        if content.type == "text":
            print(content.text)

## Chat Loop

In [17]:
def chat_loop() -> None:
    """Initiates a chat loop for interacting with the user."""
    print("Type your query or 'quit' to exit.")

    while True:
        try:
            query = input("\nQuery: ").strip()

            if query.lower() == "quit":
                break

            process_query(query=query)
            print("\n")

        # 1. Expected error first
        except pydantic.ValidationError as e:
            print(f"\nValidation Error: The model returned invalid arguments. \n{e}")

        # 2. Exit Exception
        except KeyboardInterrupt:
            print("\nExiting chat loop.")
