Referenced  - MCP build apps with Anthropic

#### Importing Libraries

In [25]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import anthropic

In [26]:
PAPER_DIR = "papers"

Below function will extract information based on the topic provided

In [27]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    #use arxiv to find papers
    client = arxiv.Client()
    
    # search for most relavant articles matchinng queryed topic
    search = arxiv.Search(
        query=topic,
        max_results=max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )
    
    papers = client.results(search)
    
    #create directory to store papers
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")
    
    #try to load existing papers
    try:
        with open(file_path, "r") as f:
            papers_info = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}
        
    #process each paper and add to papers_info
    
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            "title": paper.title,
            "summary": paper.summary,
            "authors": [author.name for author in paper.authors],
            "published": paper.published.isoformat(),
            "updated": paper.updated.isoformat(),
            "pdf_url": paper.pdf_url,
            "doi": paper.doi
        }
        papers_info[paper.get_short_id()] = paper_info
        
    #save papers info to json file
    with open(file_path, "w") as f:
        json.dump(papers_info, f, indent = 2)
    
    print(f"Results are saved in {file_path}")
    
    return paper_ids

In [28]:
search_papers("machine learning")

Results are saved in papers/machine_learning/papers_info.json


['1909.03550v1',
 '1811.04422v1',
 '1707.04849v1',
 '1909.09246v1',
 '2301.09753v1']

In [29]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
    
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
            
    return f"There's is no saved information related to paper ID {paper_id}."
            

In [30]:
extract_info('2301.09753v1')

'{\n  "title": "Towards Modular Machine Learning Solution Development: Benefits and Trade-offs",\n  "summary": "Machine learning technologies have demonstrated immense capabilities in\\nvarious domains. They play a key role in the success of modern businesses.\\nHowever, adoption of machine learning technologies has a lot of untouched\\npotential. Cost of developing custom machine learning solutions that solve\\nunique business problems is a major inhibitor to far-reaching adoption of\\nmachine learning technologies. We recognize that the monolithic nature\\nprevalent in today\'s machine learning applications stands in the way of\\nefficient and cost effective customized machine learning solution development.\\nIn this work we explore the benefits of modular machine learning solutions and\\ndiscuss how modular machine learning solutions can overcome some of the major\\nsolution engineering limitations of monolithic machine learning solutions. We\\nanalyze the trade-offs between modular

#### Tool Schema

In [31]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                }, 
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

#### Tool Mapping

In [32]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    
    result = mapping_tool_function[tool_name](**tool_args)
    
    if result is None:
        result = "operation completed but no result found"
        
    elif isinstance(result, list):
        result = ", ".join(result)
        
    elif isinstance(result, dict):
        #first convert to formatted json
        result = json.dumps(result, indent=2)
        
    else:
        result = str(result)
        
    return result

#### Chatbot code

In [33]:
load_dotenv()
client = anthropic.Anthropic()

#### Query processing

In [34]:
def process_query(query):
    
    messages = [{'role': 'user', 'content': query}]
    
    response = client.messages.create(max_tokens = 2020, 
                                      model = 'claude-3-7-sonnet-20250219',
                                      messages = messages,
                                      tools = tools)
    
    process_query = True
    
    while process_query:
        assistant_content = []
        
        for content in response.content:
            if content.type == "text":
                
                print(content.text)
                assistant_content.append(content)
                
                if len(response.content) == 1:
                    proccess_query = False
                    
            elif content.type == "tool_use":
                
                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})
                
                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                
                print(f"calling tool {tool_name} with args {tool_args}")
                result = execute_tool(tool_name, tool_args)
                
                messages.append({'role': 'tool', 'content': [
                    {
                        'type' : "tool_result",
                        "tool_use_id": tool_id,
                        "content": result}
                ]})
                
                response = client.messages.create(max_tokens = 2020, 
                                                  model = 'claude-3-7-sonnet-20250219',
                                                  tools = tools,
                                                  messages = messages)
                
                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    process_query = False

In [35]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break
    
            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [36]:
chat_loop()

Type your queries or 'quit' to exit.



Query:  hi



Error: Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', 'message': 'Your credit balance is too low to access the Anthropic API. Please go to Plans & Billing to upgrade or purchase credits.'}}



Query:  quit
