Authors: Licong Xu and Boris Bolliet (Cambridge)

In [8]:
from pathlib import Path

from mcp import ClientSession, StdioServerParameters
from mcp.client.sse import sse_client
from mcp.client.stdio import stdio_client

from autogen import LLMConfig
from autogen.agentchat import AssistantAgent
from autogen.mcp import create_toolkit
import json
import anyio
import asyncio

# Only needed for Jupyter notebooks
import nest_asyncio
nest_asyncio.apply()

from autogen.agentchat.group import (
    AgentNameTarget,
    AgentTarget,
    AskUserTarget,
    ContextExpression,
    ContextStr,
    ContextStrLLMCondition,
    ContextVariables,
    ExpressionAvailableCondition,
    ExpressionContextCondition,
    GroupChatConfig,
    GroupChatTarget,
    Handoffs,
    NestedChatTarget,
    OnCondition,
    OnContextCondition,
    ReplyResult,
    RevertToUserTarget,
    SpeakerSelectionResult,
    StayTarget,
    StringAvailableCondition,
    StringContextCondition,
    StringLLMCondition,
    TerminateTarget,
)

from autogen.agentchat.group.patterns import (
    DefaultPattern,
    ManualPattern,
    AutoPattern,
    RandomPattern,
    RoundRobinPattern,
)


from autogen import ConversableAgent, UpdateSystemMessage
from autogen.agents.experimental import DocAgent
import os
import copy
from typing import Any, Dict, List
from pydantic import BaseModel, Field


from autogen.agentchat import initiate_group_chat, a_initiate_group_chat

In [9]:
# Path to the arxiv MCP server
mcp_server_path = Path("mcp_arxiv.py")


In [12]:
joker_message = """
You are an expert physics researcher who is summarising paper results for a new grad student. 
{joke_constraints}
""".strip()



from pydantic import BaseModel,Field

class JokeResponse(BaseModel):
    joke_instructions: str = Field(..., description="Return the most relevant papers in an array, with each paper in JSON format i.e. { title: '...', ..., keywords: [...] }")     
    joke: str = Field(..., description="JSON papers in order of relevance to the query")
    joke_explanation: str = Field(..., description="explanation of significance of papers")
    def format(self) -> str:
        return "\n".join([
            "**Instructions:**",
            "",
            self.joke_instructions,
            "",
            "**Paper Array (ranked by importance):**",
            "",
            self.joke,
            "",
            "** Explanation:**",
            "",
            self.joke_explanation
        ])


default_llm_config = {'cache_seed': 42,
                     'temperature': 1.,
                     'top_p': 0.05,
                     'config_list': [{'model': 'gpt-4o-mini',
                                      'api_key': os.getenv('OPENAI_API_KEY'),
                                      'api_type': 'openai'}],
                     'timeout': 1200}

joker_config_list = copy.deepcopy(default_llm_config)
joker_config_list['config_list'][0]['response_format'] = JokeResponse


joker =  ConversableAgent(
    name="Scientist",
    system_message=joker_message,
    # llm_config=LLMConfig(model="gpt-4o", 
    #                      api_type="openai",
    #                      response_format=JokeResponse
    #                     ),
    llm_config = joker_config_list,
    update_agent_state_before_reply=[UpdateSystemMessage(joker_message),],
)

workflow_context = ContextVariables(data={
    "joke_constraints": "Stop requests if one fails.",
})


query = "supergraph perturbation theory"

task = """
Find and summarise papers relevant to {query}. Use your physics expertise to stylize only 3 search queries relevant to the topic yielding 2 articles each. Only download or get_info on at most 5 papers. Return the most relevant papers (with arxiv link) in order with a summary of their abstract.
If a paper download or request fails, make no more attempts to download papers. Query for papers always in "double quotes" to ensure it is read as a string. Never make the same query twice.
""".format(query=query)

initial_agent = joker

In [13]:
async def create_toolkit_and_run(session: ClientSession) -> None:
    # Create a toolkit with available MCP tools
    toolkit = await create_toolkit(session=session)
    mcp_agent = ConversableAgent(name="mcp_agent", 
                             system_message=r"""
Find arxiv papers relevant to the query. 
                             """,
                             llm_config=LLMConfig(model="gpt-4o-mini", 
                                                  api_type="openai",
                                                  tool_choice="required"
                                                 ))
    # Register MCP tools with the agent
    toolkit.register_for_llm(mcp_agent)
    
    toolkit.register_for_execution(mcp_agent)

    # joker.handoffs.set_after_work(AgentTarget(mcp_agent))
    joker.handoffs.set_after_work(TerminateTarget())
    
    mcp_agent.handoffs.set_after_work(AgentTarget(joker))


    mcp_agent.handoffs.add_llm_conditions([
            OnCondition(
                target=AgentTarget(joker),
                condition=StringLLMCondition(prompt="3 papers have been downloaded, or 5 papers have been looked at. Also exit if any download or query fails. Use the data found so far."),
                # available=StringAvailableCondition(context_variable="requires_login"),
            ),
    ])


    agents=[joker,
            mcp_agent,
               ]
    
    for agent in agents:
        agent.reset()
    print("all agents reset")

    import shutil
    import os
    
    def delete_cache_folder():
        cache_path = os.path.join(os.getcwd(), ".cache")
        if os.path.isdir(cache_path):
            shutil.rmtree(cache_path)
            print(".cache folder deleted.")
        else:
            print("No .cache folder found in current directory.")
    
    # delete_cache_folder()

    # Create the pattern
    agent_pattern = DefaultPattern(
      agents=[joker, mcp_agent],
      initial_agent=mcp_agent,
      context_variables=workflow_context,
    )
    

    await a_initiate_group_chat(
            pattern=agent_pattern,
            messages=task,
            max_rounds=40,
        )
    
    # If max_rounds is reached, hand over to scientist
    



server_params = StdioServerParameters(
    command="python",
    args=[str(mcp_server_path), "stdio", "--storage-path", "arxiv_papers"]
)

async with stdio_client(server_params) as (read, write), ClientSession(read, write) as session:
    # Initialize the connection
    await session.initialize()
    await create_toolkit_and_run(session)

all agents reset
[33m_User[0m (to chat_manager):


Find and summarise papers relevant to supergraph perturbation theory. Use your physics expertise to stylize only 3 search queries relevant to the topic yielding 2 articles each. Only download or get_info on at most 5 papers. Return the most relevant papers (with arxiv link) in order with a summary of their abstract.
If a paper download or request fails, make no more attempts to download papers. Query for papers always in "double quotes" to ensure it is read as a string. Never make the same query twice.


--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mmcp_agent[0m (to chat_manager):

[32m***** Suggested tool call (call_udRMnFX6xYiTRkwGb85lEhT7): search_arxiv *****[0m
Arguments: 
{"query": "\"supergraph perturbation theory\"", "max_results": 2}
[32m*********************************************************************