In [1]:
import autogen
import os
from dotenv import load_dotenv
load_dotenv()

# Import AgentNeo components
from agentneo import AgentNeo, Tracer, Evaluation, launch_dashboard

# Initialize AgentNeo session and tracer
neo_session = AgentNeo(session_name="autogen_testing")
try:
    neo_session.create_project(project_name="groupchat")
except:
    neo_session.connect_project(project_name="groupchat")
tracer = Tracer(session=neo_session)
tracer.start()



config_list = [
    {
        "model": "gpt-4o-mini",
        "api_key": os.getenv("OPENAI_API_KEY")
    }
]

gpt4_config = {
    "temperature": 0,
    "config_list": config_list,
    "timeout": 120,
}
user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.",
    code_execution_config=False,
)
engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=gpt4_config,
    system_message="""Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.
Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
""",
)
scientist = autogen.AssistantAgent(
    name="Scientist",
    llm_config=gpt4_config,
    system_message="""Scientist. You follow an approved plan. You are able to categorize papers after seeing their abstracts printed. You don't write code.""",
)
planner = autogen.AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
The plan may involve an engineer who can write code and a scientist who doesn't write code.
Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist.
""",
    llm_config=gpt4_config,
)
executor = autogen.UserProxyAgent(
    name="Executor",
    system_message="Executor. Execute the code written by the engineer and report the result.",
    human_input_mode="NEVER",
    code_execution_config={
        "last_n_messages": 3,
        "work_dir": "paper",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
)
critic = autogen.AssistantAgent(
    name="Critic",
    system_message="Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL.",
    llm_config=gpt4_config,
)
groupchat = autogen.GroupChat(
    agents=[user_proxy, engineer, scientist, planner, executor, critic], messages=[], max_round=50
)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_config)

# Wrap the main interaction in a trace
@tracer.trace_agent(name='GroupChatInteraction')
def run_group_chat():
    user_proxy.initiate_chat(
    manager,
    message="""
find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.
""",
)

# Run the group chat
run_group_chat()

# Stop the tracer
tracer.stop()

flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


Project 'groupchat' found.
Tracing Started.
[33mAdmin[0m (to chat_manager):


find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.


--------------------------------------------------------------------------------
[32m
Next speaker: Planner
[0m
[33mPlanner[0m (to chat_manager):

**Plan to Find Papers on LLM Applications from arXiv**

**Objective:** To gather and present recent research papers on Large Language Model (LLM) applications from arXiv, specifically from the last week, and organize them into a markdown table categorized by different domains.

### Steps:

1. **Data Collection (Engineer)**
   - The engineer will write a script to scrape the arXiv API for papers published in the last week that are tagged with relevant keywords related to LLM applications. This will include filtering by categories such as Natural Language Processing, Machine Learning, and Artificial Intelligence.

2. **Data Analysis (Scientist)**
   - Th

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32m
Next speaker: Scientist
[0m


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[33mScientist[0m (to chat_manager):

Great! The script executed successfully, and the markdown table has been created. You should now have a file named `llm_papers.md` containing the recent papers on LLM applications from arXiv, organized into a structured table with the specified columns.

### Next Steps:
1. **Review the Markdown File:** Open `llm_papers.md` to check the contents and ensure that the information is accurate and well-organized.
2. **Share or Present the Findings:** You can now share the markdown file with your team or use it for further analysis or presentations.
3. **Consider Future Updates:** If this is a recurring task, think about how often you want to run the script to gather new papers and whether you need to automate this process.

If you need any further assistance or modifications, feel free to ask!

--------------------------------------------------------------------------------


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32m
Next speaker: Admin
[0m


DEBUG:agentneo.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 1


Tracing Completed.
Data saved to the database and JSON file.



In [2]:
# Execute metrics
from agentneo import Execution
exe = Execution(session=neo_session, trace_id=1)
exe.execute(metric_list=['goal_decomposition_efficiency', 'goal_fulfillment_rate', 'tool_correctness_metric', 'tool_call_success_rate_metric'])
metric_results = exe.get_results()

[92m14:51:26 - LiteLLM:INFO[0m: utils.py:2931 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m14:51:29 - LiteLLM:INFO[0m: utils.py:1008 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m14:51:29 - LiteLLM:INFO[0m: utils.py:2931 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m14:51:32 - LiteLLM:INFO[0m: utils.py:1008 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m14:51:32 - LiteLLM:INFO[0m: utils.py:2931 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLL

In [5]:
metric_results

[{'metric_name': 'goal_decomposition_efficiency',
  'score': 0.85,
  'reason': 'The sub-tasks generated are relevant and effectively cover the main aspects of the original goal. Each sub-task is clearly defined and can be executed independently, contributing to the overall goal of managing and presenting information. The logical sequence is maintained, and the tasks are granular enough to allow for focused execution. However, there could be more emphasis on the review process to ensure accuracy before sharing, which would enhance completeness.',
  'result_detail': {'metric_name': 'goal_fulfillment_rate',
   'config': {},
   'result': {'originalGoal': 'Manage and facilitate a structured approach to categorizing and presenting information on recent papers in the field of LLM applications.',
    'subtasks': ['Review the Markdown File: Open `llm_papers.md` to check the contents and ensure that the information is accurate and well-organized.',
     'Share or Present the Findings: Share the 

In [6]:
# Launch dashboard
launch_dashboard(port=3000)

INFO:root:Node.js version: v20.17.0
INFO:root:npm version: 10.8.2
INFO:root:Attempting to install React dependencies...
INFO:root:Port 3000 is busy. Finding an available port...
INFO:root:Using port 3004
INFO:root:Dashboard output: 



up to date, audited 606 packages in 897ms

168 packages are looking for funding
  run `npm fund` for details

found 0 vulnerabilities


INFO:root:Dashboard launched successfully. Access it at: http://localhost:3004
