In [1]:
!pip install arxiv

[0mLooking in indexes: https://mirrors.aliyun.com/pypi/simple/


In [6]:
!pip install anthropic

Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple
Collecting anthropic
  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/5c/49/b570250e36471effbc146d22ffb111e775f11ff2d8b503b32526f25a8f23/anthropic-0.82.0-py3-none-any.whl (456 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m456.3/456.3 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mInstalling collected packages: anthropic
Successfully installed anthropic-0.82.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49

In [7]:
import arxiv
import json
import os 
from typing import List
from dotenv import load_dotenv
import anthropic

In [1]:
PAPER_DIR="papers"

In [21]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.

    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)

    Returns:
        List of paper IDs found in the search
    """
    client = arxiv.Client()
    
    search = arxiv.Search(
        query=topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )
    papers = client.results(search)
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    # 如果目录存在，跳过
    os.makedirs(path, exist_ok=True)
    file_path = os.path.join(path, "papers_info.json")
    
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}
    
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            # 列表推倒式
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
        
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
        
    print(f"Results are saved in : {file_path}")
    return paper_ids
        
        

In [9]:
serarch_papers("computers")

Results are saved in : papers/computers/papers_info.json


['1312.3300v1', '2207.05241v1', '2601.11095v1', '2012.10468v1', '2009.00041v1']

In [18]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.

    Args:
        paper_id: The ID of the paper to look for

    Returns:
        JSON string with paper information if found, error message if not found
    """
    for item in os.listdir(PAPER_DIR):
        print(item)
        item_path = os.path.join(PAPER_DIR, item)
        # if os.path.isdir(item_path)
        print(item_path)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."
    
extract_info("1312.3300v1")
    

computers
papers/computers


'{\n  "title": "Numerical Reproducibility and Parallel Computations: Issues for Interval Algorithms",\n  "authors": [\n    "Nathalie Revol",\n    "Philippe Th\\u00e9veny"\n  ],\n  "summary": "What is called \\"numerical reproducibility\\" is the problem of getting the same result when the scientific computation is run several times, either on the same machine or on different machines, with different types and numbers of processing units, execution environments, computational loads etc. This problem is especially stringent for HPC numerical simulations. In what follows, the focus is on parallel implementations of interval arithmetic using floating-point arithmetic. For interval computations, numerical reproducibility is of course an issue for testing and debugging purposes. However, as long as the computed result encloses the exact and unknown result, the inclusion property, which is the main property of interval arithmetic, is satisfied and getting bit for bit identical results may not

In [22]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {
                    "type": "string",
                    "description": "The topic to search for"
                },
                "max_results": {
                    "type": "integer",
                    "description": "Maximum number of results to retrieve",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

In [23]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    result = mapping_tool_function[tool_name](**tool_args)
    
    if result is None:
        result = "The operation completed but didn't return any results."
    elif isinstance(result, list):
        result = ", ".join(result)
    elif isinstance(result, dict):
        result = json.dumps(result, indent=2)
    else:
        result = str(result)
    return result

In [24]:
client = anthropic.Anthropic(
    api_key='sk-gU0ZENytPGeVavwgBBrIbk2P2mb3WrjaWwerTjtJciFPc74l',
    base_url='https://api.302.ai/v1'
)

In [25]:
def process_query(query):

    messages = [{'role': 'user', 'content': query}]

    response = client.messages.create(max_tokens = 2024,
                                      model = 'claude-3-7-sonnet-20250219',
                                      tools = tools,
                                      messages = messages)

    process_query = True
    while process_query:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':

                print(content.text)
                assistant_content.append(content)

                if len(response.content) == 1:
                    process_query = False

            elif content.type == 'tool_use':

                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})

                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")

                result = execute_tool(tool_name, tool_args)
                messages.append({"role": "user",
                 "content": [
                     {
                         "type": "tool_result",
                         "tool_use_id": tool_id,
                         "content": result
                     }
                 ]})

                response = client.messages.create(max_tokens = 2024,
                                                  model = 'claude-3-7-sonnet-20250219',
                                                  tools = tools,
                                                  messages = messages)

                if len(response.content) == 1 and response.content[0].type == "text":
                    print(response.content[0].text)
                    process_query = False

In [27]:
def chat_loop():
    print("Type your queries or 'quit' to exit.")
    while True:
        try:
            query = input("\nQuery: ").strip()
            if query.lower() == 'quit':
                break

            process_query(query)
            print("\n")
        except Exception as e:
            print(f"\nError: {str(e)}")

In [None]:
chat_loop()

Type your queries or 'quit' to exit.



Query:  hi


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = client.messages.create(max_tokens = 2024,


Hello! I'm here to help you find and learn about research papers on arXiv. I can search for papers on specific topics or look up information about individual papers if you have their arXiv IDs.

What would you like to explore today? For example, I could search for recent papers on topics like quantum computing, machine learning, climate science, or any other research area you're interested in.





Query:  search for papers on algebra


I'll help you search for papers on algebra. I can use the search_papers function to find recent papers on this topic. 
Calling tool search_papers with args {'topic': 'algebra'}
Results are saved in : papers/algebra/papers_info.json

Error: argument 'by_alias': 'NoneType' object cannot be converted to 'PyBool'


Please migrate to a newer model. Visit https://docs.anthropic.com/en/docs/resources/model-deprecations for more information.
  response = client.messages.create(max_tokens = 2024,



Query:  The IDs are here 2411.11095v3


I'll help you look up information about this specific paper. I'll use the paper ID you provided to extract information about it.
Calling tool extract_info with args {'paper_id': '2411.11095v3'}
algebra
papers/algebra

Error: argument 'by_alias': 'NoneType' object cannot be converted to 'PyBool'



Query:  The IDS are 1312.3300v1


I'll search for information about this specific paper ID for you. Let me extract the details about arXiv paper 1312.3300v1.
Calling tool extract_info with args {'paper_id': '1312.3300v1'}
algebra
papers/algebra
computers
papers/computers

Error: argument 'by_alias': 'NoneType' object cannot be converted to 'PyBool'
