In [1]:
import autogen
import os
from dotenv import load_dotenv
load_dotenv()

import os
os.chdir('../..')

# Import Curestry components
from curestry import Curestry, Tracer, launch_dashboard

# Initialize Curestry session and tracer
neo_session = Curestry(session_name="autogen_testing")
try:
    neo_session.create_project(project_name="groupchat")
except:
    neo_session.connect_project(project_name="groupchat")
tracer = Tracer(session=neo_session)
tracer.start()



config_list = [
    {
        "model": "gpt-4o-mini",
        "api_key": os.getenv("OPENAI_API_KEY")
    }
]

gpt4_config = {
    "temperature": 0,
    "config_list": config_list,
    "timeout": 120,
}
user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.",
    code_execution_config=False,
)
engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=gpt4_config,
    system_message="""Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.
Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
""",
)
scientist = autogen.AssistantAgent(
    name="Scientist",
    llm_config=gpt4_config,
    system_message="""Scientist. You follow an approved plan. You are able to categorize papers after seeing their abstracts printed. You don't write code.""",
)
planner = autogen.AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
The plan may involve an engineer who can write code and a scientist who doesn't write code.
Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a scientist.
""",
    llm_config=gpt4_config,
)
executor = autogen.UserProxyAgent(
    name="Executor",
    system_message="Executor. Execute the code written by the engineer and report the result.",
    human_input_mode="NEVER",
    code_execution_config={
        "last_n_messages": 3,
        "work_dir": "paper",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
)
critic = autogen.AssistantAgent(
    name="Critic",
    system_message="Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL.",
    llm_config=gpt4_config,
)
groupchat = autogen.GroupChat(
    agents=[user_proxy, engineer, scientist, planner, executor, critic], messages=[], max_round=50
)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=gpt4_config)

# Wrap the main interaction in a trace
@tracer.trace_agent(name='GroupChatInteraction')
def run_group_chat():
    user_proxy.initiate_chat(
    manager,
    message="""
find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.
""",
)

# Run the group chat
run_group_chat()

# Stop the tracer
tracer.stop()

flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.
INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"


Project 'groupchat' found.
Tracing Started.
[33mAdmin[0m (to chat_manager):


find papers on LLM applications from arxiv in the last week, create a markdown table of different domains.


--------------------------------------------------------------------------------
[32m
Next speaker: Planner
[0m
[33mPlanner[0m (to chat_manager):

**Plan to Find Papers on LLM Applications from arXiv**

**Objective:** To gather and present recent research papers on Large Language Model (LLM) applications from arXiv, specifically from the last week, and organize them into a markdown table categorized by different domains.

### Steps:

1. **Data Collection (Performed by Engineer):**
   - The engineer will write a script to scrape the arXiv website for papers published in the last week that focus on LLM applications. This will involve:
     - Accessing the arXiv API to filter papers by date and keywords related to LLM applications.
     - Collecting relevant metadata such as title, authors, abstract

DEBUG:curestry.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 25


Tracing Completed.
Data saved to the database and JSON file.



In [2]:
# Execute metrics
from curestry import Evaluation
exe = Evaluation(session=neo_session, trace_id=tracer.trace_id)
exe.evaluate(metric_list=['goal_decomposition_efficiency', 'goal_fulfillment_rate', 'tool_call_correctness_rate', 'tool_call_success_rate'])
metric_results = exe.get_results()

[92m13:25:10 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai


INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m13:25:13 - LiteLLM:INFO[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m13:25:13 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m13:25:13 - LiteLLM:INFO[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m13:25:13 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/cha

Error extracting query: list index out of range


In [3]:
metric_results

[{'metric_name': 'goal_decomposition_efficiency',
  'score': 0.0,
  'reason': 'No subtasks detected. Without any generated sub-tasks or a clear original goal, it is impossible to evaluate the efficiency or effectiveness of the goal decomposition process.',
  'result_detail': {'metric_name': 'goal_fulfillment_rate',
   'config': {},
   'result': {'originalGoal': 'No original goal provided',
    'subtasks': [],
    'score': 0.0,
    'reason': 'No subtasks detected. Without any generated sub-tasks or a clear original goal, it is impossible to evaluate the efficiency or effectiveness of the goal decomposition process.'}},
  'config': {},
  'start_time': '2024-10-22T13:25:10.417898',
  'end_time': '2024-10-22T13:25:15.651641',
  'duration': 5.233743},
 {'metric_name': 'goal_fulfillment_rate',
  'score': 0.0,
  'reason': 'The user query indicates that no specific query was found, which suggests that the user did not provide any input or request for information. Similarly, the system response

In [4]:
# Launch dashboard
launch_dashboard(port=3000)

INFO:root:Dashboard launched successfully. Access it at: http://localhost:3000
