In [9]:
%%capture

%pip install -U arxiv dotenv google google-generativeai

In [7]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv

In [34]:
PAPER_DIR = "papers"

In [24]:
def search_papers(topic: str, max_results: int= 5) -> List[str]:
  """
  Search for papers on arXiv based on a topic and store their information.

  Args:
    topic: The topic to search for
    max_results : Maximum number of results to retrieve(default:5)

    Returns:
      List of paper IDs found in the search
    """

  #Use arxiv to find the papers
  client = arxiv.Client()

  #Search for the most relevant articles matching the queried topic
  search = arxiv.Search(
      query = topic,
      max_results = max_results,
      sort_by = arxiv.SortCriterion.Relevance
  )

  papers = client.results(search)

  #Create directory for this topic
  path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
  os.makedirs(path, exist_ok=True)

  file_path = os.path.join(path, "papers_info.json")

  #Try to load existing papers info
  try:
    with open(file_path, "r") as json_file:
      papers_info = json.load(json_file)
  except (FileNotFoundError, json.JSONDecodeError):
    papers_info = {}

  #Process each paper and add to papers_info
  paper_ids = []
  for paper in papers:
    paper_ids.append(paper.get_short_id())
    paper_info = {
        'title': paper.title,
        'authors': [author.name for author in paper.authors],
        'summary': paper.summary,
        'pdf_url': paper.pdf_url,
        'published': str(paper.published.date())
    }
    papers_info[paper.get_short_id()] = paper_info

  #Save updated papers_info to json file
  with open(file_path, "w") as json_file:
    json.dump(papers_info, json_file, indent=2)

  print(f"Results are saved in: {file_path}")

  return paper_ids

In [35]:
search_papers("alphaevolve")

Results are saved in: papers/alphaevolve/papers_info.json


['2505.16105v1', '2103.16196v2']

In [44]:
def extract_info(paper_id: str) -> str:
  """
  Search for information about a specific paper across all topic directories.

  Args:
    paper_id: The ID of the paper to look for

  Returns:
    JSON string with paper information if found, error message if not found
  """

  print(os.listdir(PAPER_DIR))
  for item in os.listdir(PAPER_DIR):
    item_path = os.path.join(PAPER_DIR, item)
    if os.path.isdir(item_path):
      file_path = os.path.join(item_path, "papers_info.json")
      if os.path.isfile(file_path):
        try:
          with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
            if paper_id in papers_info:
              return json.dumps(papers_info[paper_id], indent=2)
        except (FileNotFoundError, json.JSONDecodeError) as e:
          print(f"Error reading {file_path}: {str(e)}")
          continue

  return f"There is no saved information related to paper {paper_id}."

In [45]:
extract_info('2505.16105v1')

['llm', 'alphaevolve']


'{\n  "title": "Sums and differences of sets (improvement over AlphaEvolve)",\n  "authors": [\n    "Robert Gerbicz"\n  ],\n  "summary": "On May 14, 2025, DeepMind announced that AlphaEvolve, a large language model\\napplied to a set of mathematical problems, had matched or exceeded the best\\nknown bounds on several problems. In the case of the sum and difference of sets\\nproblem, AlphaEvolve, using a set of $54265$ integers, improved the known lower\\nbound of $\\\\theta=1.14465$ to $\\\\theta=1.1584$. In this paper, we present an\\nimproved bound $\\\\theta=1.173050$ using an explicit construction of a U set that\\ncontains more than $10^{43546}$ elements. For fast integer and floating-point\\narithmetic, we used the (free) GMP library.",\n  "pdf_url": "http://arxiv.org/pdf/2505.16105v1",\n  "published": "2025-05-22"\n}'

In [55]:
!source ~/.zshrc

In [66]:
import google.generativeai as genai

# Configure your API key
import json

# Load config
with open('config.json', 'r') as f:
    config = json.load(f)

api_key = config['api_key']
genai.configure(api_key=api_key) # Replace with your actual API key

from google.generativeai.protos import FunctionDeclaration, Tool, Schema, Type
tools = [
    Tool(
        function_declarations=[
            FunctionDeclaration(
                name="search_papers",
                description="Search for papers on arXiv based on a topic and store their information.",
                parameters=Schema(
                    type=Type.OBJECT,
                    properties={
                        "topic": Schema(
                            type=Type.STRING,
                            description="The topic to search for"
                        ),
                        "max_results": Schema(
                            type=Type.INTEGER,
                            description="Maximum number of results to retrieve",
                            # default=5
                        )
                    },
                    required=["topic"]
                ),
            )
        ]
    ),
    Tool(
        function_declarations=[
            FunctionDeclaration(
                name="extract_info",
                description="Search for information about a specific paper across all topic directories.",
                parameters=Schema(
                    type=Type.OBJECT,
                    properties={
                        "paper_id": Schema(
                            type=Type.STRING,
                            description="The ID of the paper to look for"
                        )
                    },
                    required=["paper_id"]
                ),
            )
        ]
    ),
]


def process_query(query):

  #Define the Gemini model
  model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20', tools=tools)

  #Start a chat session
  chat_session = model.start_chat(history=[])

  #Send the user query
  response = chat_session.send_message(query)

  # print(f"Intial responses from chatbot: {response}")

  process_query=True
  while process_query:
    assistant_content = []
    tool_calls_made = False

    #Process the response
    for part in response.candidates[0].content.parts:
      if part.text:
        print(part.text)
        assistant_content.append({"text": part.text})
        if len(response.candidates[0].content.parts) == 1 and not part.function_call:
          process_query = False

      if part.function_call:
        tool_calls_made = True
        tool_name = part.function_call.name
        tool_args = part.function_call.args

        print(f"Calling tool {tool_name} with args {tool_args}")

        #Execute tool
        result = execute_tool(tool_name, tool_args)

        #Send the tool result back to the model
        response = chat_session.send_message(genai.protos.Content(
            parts=[genai.protos.Part(
                function_response=genai.protos.FunctionResponse(
                    name=tool_name,
                    response={'content': result}
                )
            )]
        ))

      if not tool_calls_made and len(response.candidates[0].content.parts) == 1 and response.candidates[0].content.parts[0].text:
            process_query = False

      if tool_calls_made and not response.candidates[0].content.parts[0].text and not response.candidates[0].content.parts[0].function_call:
             process_query = False

mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):

  result = mapping_tool_function[tool_name](**tool_args)

  if result is None:
      result = "The operation completed but didn't return any results."

  elif isinstance(result, list):
      result = ', '.join(result)

  elif isinstance(result, dict):
      # Convert dictionaries to formatted JSON strings
      result = json.dumps(result, indent=2)

  else:
      # For any other type, convert using str()
      result = str(result)
  return result

# Example usage:
# process_query_gemini("Find me 3 recent papers on large language models.", tools, execute_tool)
# process_query_gemini("Extract information for paper ID 2305.01234.", tools, execute_tool)
# process_query_gemini("Tell me a simple joke.", tools, execute_tool)

In [67]:
def chat_loop():
  print("Type your queries or 'quit' to exit.")
  while True:
    try:
      query = input("\nQuery: ").strip()
      if query.lower() == 'quit':
        break

      process_query(query)
      print("\n")
    except Exception as e:
      print(f"\nError: {str(e)}")

In [68]:
chat_loop()

Type your queries or 'quit' to exit.
Hello! I'm a bot that can help you with your academic research. I can search for papers on arXiv and extract information about them. What would you like to do?



