# Making Chatbot

## Import Libraries

In [48]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import anthropic

# Tool functions

In [50]:
PAPER_DIR = "papers"

In [51]:
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

In [52]:
search_papers("Chaeeun Ryu")

Results are saved in: papers/chaeeun_ryu/papers_info.json


['2407.20234v1',
 '2407.06506v2',
 '2504.13674v1',
 '2505.02722v1',
 '2305.18952v5']

In [53]:
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."

In [54]:
ret_ids = ['2407.20234v1',
 '2407.06506v2',
 '2504.13674v1',
 '2505.02722v1',
 '2305.18952v5']

In [55]:
for paper_id in ret_ids:
    info = extract_info(paper_id)
    print(f"Information for {paper_id}:\n{info}\n")

Information for 2407.20234v1:
{
  "title": "Exploring Factors Affecting Student Learning Satisfaction during COVID-19 in South Korea",
  "authors": [
    "Jiwon Han",
    "Chaeeun Ryu",
    "Gayathri Nadarajan"
  ],
  "summary": "Understanding students' preferences and learning satisfaction during COVID-19\nhas focused on learning attributes such as self-efficacy, performance, and\nengagement. Although existing efforts have constructed statistical models\ncapable of accurately identifying significant factors impacting learning\nsatisfaction, they do not necessarily explain the complex relationships among\nthese factors in depth. This study aimed to understand several facets related\nto student learning preferences and satisfaction during the pandemic such as\nindividual learner characteristics, instructional design elements and social\nand environmental factors. Responses from 302 students from Sungkyunkwan\nUniversity, South Korea were collected between 2021 and 2022. Information\ngat

# Tool Schema

In [56]:
tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv based on a topic and store their information.",
        "input_schema":{
            "type": "object",
            "proprties": {
                "topic":{
                    "type": "string",
                    "description": "The topic to search for"
                },
                "max_results":{
                    "type": "integer",
                    "description": "Maximum number of results to retrieve (default: 5)",
                    "default": 5
                }
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Search for information about a specific paper across all topic directories.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {
                    "type": "string",
                    "description": "The ID of the paper to look for"
                }
            },
            "required": ["paper_id"]
        }
    }
]

# Tool Mapping

In [57]:
mapping_tool_functions = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

In [58]:
def execute_tool(tool_name, tool_args):
    result = mapping_tool_functions[tool_name](**tool_args)
    
    if result is None:
        return "The operation completed but didn't return any results."
    elif isinstance(result, list):
        return ', '.join(result)
    elif isinstance(result, dict):
        return json.dumps(result, indent=2)
    else:
        return str(result)

# Build Chatbot

In [59]:
# load_dotenv()

In [60]:
# api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
    raise ValueError("API key is missing. Please check your .env file.")
else:
    print("API key loaded successfully.")

API key loaded successfully.


In [61]:
client = anthropic.Anthropic(api_key=api_key)

In [62]:
import anthropic

# api_key = "your_actual_api_key_here"  # Replace with your key
# client = anthropic.Anthropic(api_key=api_key)

try:
    response = client.messages.create(
        max_tokens=10,
        model="claude-3-7",
        messages=[{"role": "user", "content": "Test"}]
    )
    print("Test successful:", response)
except Exception as e:
    print("Error:", e)

Error: Error code: 404 - {'type': 'error', 'error': {'type': 'not_found_error', 'message': 'model: claude-3-7'}}


In [71]:
def process_query(query):
    messages = [
        {
            "role": "user",
            "content": query
        }
    ]
    response = client.messages.create(max_tokens=2024,
                                      model = "claude-3-7-sonnet-20250219",
                                      tools = tools,
                                      messages=messages)
    
    process_query = True
    while process_query:
        assistant_content = []
        
        for content in response.content:
            if content.type == 'text':
                print(content.text)
                assistant_content.append(content)

                if len(response.content) == 1:
                    process_query = False
            elif content.type == 'tool_use':
                assistant_content.append(content)
                messages.append({
                    "role": "assistant",
                    "content": assistant_content
                })
                
                tool_id = content.id
                tool_args = content.input
                tool_name = content.name
                print(f"Calling tool {tool_name} with args {tool_args}")
                
                result = execute_tool(tool_name, tool_args)
                messages.append({
                    "role": "user",
                    "content":[
                        {
                            "type": "tool_result",
                            "tool_use_id": tool_id,
                            "content": result
                        }
                    ]
                })
                response = client.messages.create(
                    max_tokens=2024,
                    model="claude-3-7-sonnet-20250219",
                    tools = tools,
                    messages=messages
                )
                
                if len(response.content) == 1 and response.content[0].type == 'text':
                    print(response.content[0].text)
                    process_query = False

# Chat Loop

In [72]:
def chat_loop():
    print("Welcome to the Paper Search Assistant!")
    print("Type your queries or 'quit' to exit.")
    
    while True:
        try:
            query = input("You: ")
            if query.lower() == 'quit':
                break
            process_query(query)
            print()
        except Exception as e:
            print(f"An error occurred: {str(e)}")
            continue

In [73]:
chat_loop()

Welcome to the Paper Search Assistant!
Type your queries or 'quit' to exit.
Hello! I'd be happy to help you today. Is there something specific you'd like to know about? For example, I can search for scientific papers on arXiv on a particular topic or help you find information about a specific paper if you have its ID.

If you're interested in research papers, just let me know what topic you're curious about, and I can search for relevant publications for you. How can I assist you today?

I'd be happy to search for papers on model context protocol for you. Let me do that search right away.
Calling tool search_papers with args {'topic': 'model context protocol'}
Results are saved in: papers/model_context_protocol/papers_info.json
I've found 5 papers related to "model context protocol". Let me extract more information about each of these papers so you can see what they're about:
Calling tool extract_info with args {'paper_id': '2505.02279v1'}
Calling tool extract_info with args {'paper_id