<a href="https://colab.research.google.com/github/muskan469/mcp_chatbot/blob/main/MCP_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install arxiv

Collecting arxiv
  Downloading arxiv-2.2.0-py3-none-any.whl.metadata (6.3 kB)
Collecting feedparser~=6.0.10 (from arxiv)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser~=6.0.10->arxiv)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading arxiv-2.2.0-py3-none-any.whl (11 kB)
Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6046 sha256=0664bdc7765a8a7faed4aa6f5fa997f01055189a50e038ea21e7dd279a72ea6b
  Stored in directory: /root/.cache/pip/wheels/3b/25/2a/105d6a15df6914f4d15047691c6c28f9052cc1173e40285d03
Successfully built sgmllib3k
Installing collected packag

In [None]:
pip install dotenv

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting python-dotenv (from dotenv)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv, dotenv
Successfully installed dotenv-0.9.9 python-dotenv-1.1.0


In [None]:
pip install google-generativeai



In [None]:
pip install anthropic

Collecting anthropic
  Downloading anthropic-0.52.0-py3-none-any.whl.metadata (25 kB)
Downloading anthropic-0.52.0-py3-none-any.whl (286 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/286.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.1/286.1 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.52.0


In [None]:
import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import google.generativeai as genai

In [None]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GEMINI_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
PAPER_DIR = "papers"

In [None]:
def search_papers(topic: str, max_results: int= 5) -> List[str]:
  """
  Search for papers on arXiv based on a topic and store their information.

  Args:
    topic: The topic to search for
    max_results : Maximum number of results to retrieve(default:5)

    Returns:
      List of paper IDs found in the search
    """

  #Use arxiv to find the papers
  client = arxiv.Client()

  #Search for the most relevant articles matching the queried topic
  search = arxiv.Search(
      query = topic,
      max_results = max_results,
      sort_by = arxiv.SortCriterion.Relevance
  )

  papers = client.results(search)

  #Create directory for this topic
  path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
  os.makedirs(path, exist_ok=True)

  file_path = os.path.join(path, "papers_info.json")

  #Try to load existing papers info
  try:
    with open(file_path, "r") as json_file:
      papers_info = json.load(json_file)
  except (FileNotFoundError, json.JSONDecodeError):
    papers_info = {}

  #Process each paper and add to papers_info
  paper_ids = []
  for paper in papers:
    paper_ids.append(paper.get_short_id())
    paper_info = {
        'title': paper.title,
        'authors': [author.name for author in paper.authors],
        'summary': paper.summary,
        'pdf_url': paper.pdf_url,
        'published': str(paper.published.date())
    }
    papers_info[paper.get_short_id()] = paper_info

  #Save updated papers_info to json file
  with open(file_path, "w") as json_file:
    json.dump(papers_info, json_file, indent=2)

  print(f"Results are saved in: {file_path}")

  return paper_ids

In [None]:
search_papers("alphaevolve")

Results are saved in: papers/alphaevolve/papers_info.json


['2505.16105v1', '2103.16196v2']

In [None]:
def extract_info(paper_id: str) -> str:
  """
  Search for information about a specific paper across all topic directories.

  Args:
    paper_id: The ID of the paper to look for

  Returns:
    JSON string with paper information if found, error message if not found
  """

  for item in os.listdir(PAPER_DIR):
    item_path = os.path.join(PAPER_DIR, item)
    if os.path.isdir(item_path):
      file_path = os.path.join(item_path, "papers_info.json")
      if os.path.isfile(file_path):
        try:
          with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
            if paper_id in papers_info:
              return json.dumps(papers_info[paper_id], indent=2)
        except (FileNotFoundError, json.JSONDecodeError) as e:
          print(f"Error reading {file_path}: {str(e)}")
          continue

    return f"There is no saved information related to paper {paper_id}."


In [None]:
extract_info("2505.16105v1")

'{\n  "title": "Sums and differences of sets (improvement over AlphaEvolve)",\n  "authors": [\n    "Robert Gerbicz"\n  ],\n  "summary": "On May 14, 2025, DeepMind announced that AlphaEvolve, a large language model\\napplied to a set of mathematical problems, had matched or exceeded the best\\nknown bounds on several problems. In the case of the sum and difference of sets\\nproblem, AlphaEvolve, using a set of $54265$ integers, improved the known lower\\nbound of $\\\\theta=1.14465$ to $\\\\theta=1.1584$. In this paper, we present an\\nimproved bound $\\\\theta=1.173050$ using an explicit construction of a U set that\\ncontains more than $10^{43546}$ elements. For fast integer and floating-point\\narithmetic, we used the (free) GMP library.",\n  "pdf_url": "http://arxiv.org/pdf/2505.16105v1",\n  "published": "2025-05-22"\n}'

# Tool schema

In [None]:
from google.generativeai.protos import FunctionDeclaration, Tool, Schema, Type
tools = [
    Tool(
        function_declarations=[
            FunctionDeclaration(
                name="search_papers",
                description="Search for papers on arXiv based on a topic and store their information.",
                parameters=Schema(
                    type=Type.OBJECT,
                    properties={
                        "topic": Schema(
                            type=Type.STRING,
                            description="The topic to search for"
                        ),
                        "max_results": Schema(
                            type=Type.INTEGER,
                            description="Maximum number of results to retrieve",
                            # default=5
                        )
                    },
                    required=["topic"]
                ),
            )
        ]
    ),
    Tool(
        function_declarations=[
            FunctionDeclaration(
                name="extract_info",
                description="Search for information about a specific paper across all topic directories.",
                parameters=Schema(
                    type=Type.OBJECT,
                    properties={
                        "paper_id": Schema(
                            type=Type.STRING,
                            description="The ID of the paper to look for"
                        )
                    },
                    required=["paper_id"]
                ),
            )
        ]
    ),
]

# Tool Mapping

In [None]:
mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):

  result = mapping_tool_function[tool_name](**tool_args)

  if result is None:
      result = "The operation completed but didn't return any results."

  elif isinstance(result, list):
      result = ', '.join(result)

  elif isinstance(result, dict):
      # Convert dictionaries to formatted JSON strings
      result = json.dumps(result, indent=2)

  else:
      # For any other type, convert using str()
      result = str(result)
  return result

# Chatbot Code

In [None]:
# load_dotenv()
# client = anthropic.Anthropic()

In [None]:
#Define the Gemini model

def trial_function():
# model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20', tools=tools)
  model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20', tools=tools)
# , tools=tools)

#Start a chat session
  chat_session = model.start_chat(history=[])

#Send the user query
  response = chat_session.send_message("What is United States Of America?")

  print(response)

trial_function()


response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "I cannot answer general knowledge questions. My purpose is to assist with tasks related to searching and extracting information about research papers."
              }
            ],
            "role": "model"
          },
          "finish_reason": "STOP",
          "index": 0
        }
      ],
      "usage_metadata": {
        "prompt_token_count": 137,
        "candidates_token_count": 24,
        "total_token_count": 161
      },
      "model_version": "models/gemini-2.5-flash-preview-05-20"
    }),
)


# Query processing

In [None]:
def process_query(query):

  #Define the Gemini model
  model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20', tools=tools)

  #Start a chat session
  chat_session = model.start_chat(history=[])

  #Send the user query
  response = chat_session.send_message(query)

  # print(f"Intial responses from chatbot: {response}")

  process_query=True
  while process_query:
    assistant_content = []
    tool_calls_made = False

    #Process the response
    for part in response.candidates[0].content.parts:
      if part.text:
        print(part.text)
        assistant_content.append({"text": part.text})
        if len(response.candidates[0].content.parts) == 1 and not part.function_call:
          process_query = False

      if part.function_call:
        tool_calls_made = True
        tool_name = part.function_call.name
        tool_args = part.function_call.args

        print(f"Calling tool {tool_name} with args {tool_args}")

        #Execute tool
        result = execute_tool(tool_name, tool_args)

        #Send the tool result back to the model
        response = chat_session.send_message(genai.protos.Content(
            parts=[genai.protos.Part(
                function_response=genai.protos.FunctionResponse(
                    name=tool_name,
                    response={'content': result}
                )
            )]
        ))

      if not tool_calls_made and len(response.candidates[0].content.parts) == 1 and response.candidates[0].content.parts[0].text:
            process_query = False

      if tool_calls_made and not response.candidates[0].content.parts[0].text and not response.candidates[0].content.parts[0].function_call:
             process_query = False

# Chat loop

In [None]:
def chat_loop():
  print("Type your queries or 'quit' to exit.")
  while True:
    try:
      query = input("\nQuery: ").strip()
      if query.lower() == 'quit':
        break

      process_query(query)
      print("\n")
    except Exception as e:
      print(f"\nError: {str(e)}")


In [None]:
chat_loop()

Type your queries or 'quit' to exit.

Query: give me paper on nuclear physics
Calling tool search_papers with args <proto.marshal.collections.maps.MapComposite object at 0x78a7f9100310>
Results are saved in: papers/nuclear_physics/papers_info.json
I found some papers on nuclear physics. Would you like to know more about any of them?




Query: Yes, tell me the ids of the paper
Please specify the topic you want to search for.



Query: Tell me about the first paper on nuclear physics
I cannot tell you which was the very first paper on nuclear physics without more specific information. Can you tell me what aspects of nuclear physics you are interested in, or perhaps the author or time period?



Query: papers for nuclear physics
Calling tool search_papers with args <proto.marshal.collections.maps.MapComposite object at 0x78a7f903a0d0>
Results are saved in: papers/nuclear_physics/papers_info.json
Here are some papers for nuclear physics: 1701.03564v1, 1701.02756v1, 0902.4560v1, 1802.07478