In [1]:
import os
from dotenv import load_dotenv
from autogen import ConversableAgent, config_list_from_json

import os
os.chdir('../..')


from curestry import Curestry, Tracer, launch_dashboard

# Load environment variables
load_dotenv()

# Initialize Curestry session
neo_session = Curestry(session_name="autogen_testing")
try:
    neo_session.create_project(project_name="calculator")
except:
    neo_session.connect_project(project_name="calculator")

# Initialize Tracer
tracer = Tracer(session=neo_session)
tracer.start()

# Define the calculator function
@tracer.trace_tool(name='calculator')
def calculator(a: int, b: int, operator: str) -> int:
    if operator == "+":
        return a + b
    elif operator == "-":
        return a - b
    elif operator == "*":
        return a * b
    elif operator == "/":
        return int(a / b)
    else:
        raise ValueError("Invalid operator")

# OpenAI configuration
config_list = [
    {
        "model": "gpt-4o-mini",
        "api_key": os.getenv("OPENAI_API_KEY")
    }
]


# Create the assistant agent
@tracer.trace_agent(name='AssistantAgent')
def create_assistant():
    return ConversableAgent(
        name="Assistant",
        system_message="You are a helpful AI assistant. "
        "You can help with simple calculations. "
        "Return 'TERMINATE' when the task is done.",
        llm_config={"config_list": config_list},
    )

# Create the user proxy agent
@tracer.trace_agent(name='UserProxyAgent')
def create_user_proxy():
    return ConversableAgent(
        name="User",
        llm_config=False,
        is_termination_msg=lambda msg: msg.get("content") is not None and "TERMINATE" in msg["content"],
        human_input_mode="NEVER",
    )

# Main function to run the conversation
# @tracer.trace_function(name='run_conversation')
@tracer.trace_agent(name='run_conversation')
def run_conversation():
    assistant = create_assistant()
    user_proxy = create_user_proxy()

    assistant.register_for_llm(name="calculator", description="A simple calculator")(calculator)
    user_proxy.register_for_execution(name="calculator")(calculator)

    # Start the conversation
    user_proxy.initiate_chat(assistant, message="What is (1423 - 123) / 3 + (32 + 23) * 5?")

# Run the conversation
run_conversation()

# Stop tracing
tracer.stop()


flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.
INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"
DEBUG:curestry.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 27
DEBUG:curestry.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 28


Project 'calculator' found.
Tracing Started.
[33mUser[0m (to Assistant):

What is (1423 - 123) / 3 + (32 + 23) * 5?

--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mAssistant[0m (to User):

[32m***** Suggested tool call (call_N9QFCFvQdP4mQk1cDuv4zf5w): calculator *****[0m
Arguments: 
{"a": 1423, "b": 123, "operator": "-"}
[32m***************************************************************************[0m
[32m***** Suggested tool call (call_wxRKjONkuZBFHvRRinNhtqrS): calculator *****[0m
Arguments: 
{"a": 32, "b": 23, "operator": "+"}
[32m***************************************************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION calculator...[0m
[35m
>>>>>>>> EXECUTING FUNCTION calculator...[0m
[33mUser[0m (to Assistant):

[33mUser[0m (to Assistant):

[32m***** Response from calling tool (call

DEBUG:curestry.tracing.agent_tracer:Successfully updated and committed AgentCallModel with id 26


Tracing Completed.
Data saved to the database and JSON file.



In [2]:
# Execute metrics
from curestry import Evaluation
exe = Evaluation(session=neo_session, trace_id=tracer.trace_id)
exe.evaluate(metric_list=['goal_decomposition_efficiency', 'goal_fulfillment_rate', 'tool_call_correctness_rate', 'tool_call_success_rate'])
metric_results = exe.get_results()

[92m13:25:50 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m13:25:51 - LiteLLM:INFO[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m13:25:51 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[92m13:25:52 - LiteLLM:INFO[0m: utils.py:938 - Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
[92m13:25:52 - LiteLLM:INFO[0m: utils.py:2740 - 
LiteLLM completion() model= gpt-4o-mini; provider = openai
INFO:LiteLLM:
LiteLLM 

Error extracting query: list index out of range


In [3]:
metric_results

[{'metric_name': 'goal_decomposition_efficiency',
  'score': 0.0,
  'reason': 'No sub-tasks detected. The AI did not generate any coherent sub-goals to guide the execution of the arithmetic operations, which indicates a failure in goal decomposition. Without defined sub-tasks, it is impossible to evaluate the efficiency or effectiveness of the process.',
  'result_detail': {'metric_name': 'goal_fulfillment_rate',
   'config': {},
   'result': {'originalGoal': 'Perform a series of mathematical calculations using a calculator function.',
    'subtasks': [],
    'score': 0.0,
    'reason': 'No sub-tasks detected. The AI did not generate any coherent sub-goals to guide the execution of the arithmetic operations, which indicates a failure in goal decomposition. Without defined sub-tasks, it is impossible to evaluate the efficiency or effectiveness of the process.'}},
  'config': {},
  'start_time': '2024-10-22T13:25:50.105213',
  'end_time': '2024-10-22T13:25:53.861823',
  'duration': 3.756

In [4]:
# Launch dashboard
launch_dashboard(port=3000)

INFO:root:Port 3000 is busy. Finding an available port...
INFO:root:Using port 3001
INFO:root:Dashboard launched successfully. Access it at: http://localhost:3001
