In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# add root to sys path
import os
import sys

current_dir = os.getcwd()
root = os.path.abspath(os.path.join(current_dir,".."))
if root not in sys.path:
    sys.path.append(root)

In [3]:
# Imports

ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY

import agent
from langchain_core.messages import AIMessage, HumanMessage

graph = agent.graph

orchestrator = agent.orchestrator
run_control_flow = agent.run_control_flow
generate_answer = agent.generate_answer
create_sql_query_or_queries = agent.create_sql_query_or_queries
extract_analytical_intent = agent.extract_analytical_intent

# Import initialization components
from src.init.initialization import (
    llm, llm_fast, create_config, tracer,
    objects_documentation, sql_dialect, connection_string
)

In [4]:
# Evaluation Questions with Expected Outcomes
eval_questions = [
    {
        'question': 'aum by practice segments',
        'expected_outcome': 'Query for assets under management and not aggregate it over time.'
    },
    {
        'question': 'Payments associated with advisors from firm Cedar Capital LLC',
        'expected_outcome': 'Enter disambiguation node. Payment can mean net revenue (Revenue retained by Capital Partners) or payout (Dollar amount paid to advisor). Ask user which one they prefer.'
    },
    {
        'question': 'net revenue associated with Cedar Capital advisors',
        'expected_outcome': 'Filter for sum of net revenue for advisors belonging to the firm Cedar Capital LLC'
    },
    {
        'question': 'distribution of advisor ID 8',
        'expected_outcome': 'Enter disambiguation node. Distribution (advisor payout after tech fees are deducted) is not in tables. Offer net revenue (Revenue retained by Capital Partners) or payout (Dollar amount paid to advisor), ask which one user prefers.'
    },
    {
        'question': 'producing assets by practice segments',
        'expected_outcome': 'Query for advisory assets. Say that advisory assets is Assets in Managed Portfolio and SMA business lines.'
    },
    {
        'question': 'liquid assets by practice segments',
        'expected_outcome': 'Say it doesn\'t have access to liquid assets (assets easily converted to cash) but here are the results for advisory assets (Assets in Managed Portfolio and SMA business lines).'
    },
    {
        'question': 'For Oak Wealth, what was their EOM asset value and payout?',
        'expected_outcome': 'Query for EOM assets and payout for Oak Wealth firm.'
    },
    {
        'question': 'What is distinct count of active advisors?',
        'expected_outcome': 'Query for distinct count of advisors with advisor_status = Active and to_date = 9999-12-31.'
    },
    {
        'question': 'List all advisors affiliated with firm Crescent Wealth LLC.',
        'expected_outcome': 'Query for advisors belonging to firm Crescent Wealth LLC.'
    },
    {
        'question': 'Group advisors by tenure group. Compare average total assets, net revenue across these groups. Which group shows the highest revenue per advisor?',
        'expected_outcome': 'Group by tenure, calculate avg assets and net revenue, identify highest revenue per advisor group.'
    }
]

In [5]:
# Run Evaluations
from datetime import datetime

# Initialize results list
eval_results = []

# Run each evaluation question
for i, eval_item in enumerate(eval_questions):
    question = eval_item['question']
    expected_outcome = eval_item['expected_outcome']
    
    print(f"\n{'='*80}")
    print(f"Running eval {i+1}/{len(eval_questions)}: {question}")
    print(f"{'='*80}")
    
    # Reset test_state for each question
    test_state = {
        'objects_documentation': objects_documentation,
        'sql_dialect': sql_dialect,
        'messages_log': [],
        'intermediate_steps': [],
        'analytical_intent': [],
        'current_question': question,
        'current_sql_queries': [],
        'generate_answer_details': {
            'key_assumptions': [],
            'agent_questions': [],
            'ambiguity_explanation': ''
        },
        'llm_answer': AIMessage(content=''),
        'scenario': '',
        'search_terms_output': []
    }
    
    # Create config with "Run Evals" prefix
    config, thread_id = create_config(f'Run Evals - Q{i+1}', True)
    
    try:
        # Invoke graph
        result = graph.invoke(test_state, config=config)
        
        # Extract relevant state details
        eval_result = {
            'eval_number': i + 1,
            'timestamp': datetime.now().isoformat(),
            'question': question,
            'expected_outcome': expected_outcome,
            'scenario': result.get('scenario', ''),
            'analytical_intent': result.get('analytical_intent', []),
            'current_sql_queries': result.get('current_sql_queries', []),
            'generate_answer_details': result.get('generate_answer_details', {}),
            'agent_answer': result.get('llm_answer').content if result.get('llm_answer') else '',
            'thread_id': thread_id,
            'status': 'success'
        }
        
        # Print summary
        print(f"✓ Completed - Scenario: {eval_result['scenario']}")
        print(f"  Answer preview: {eval_result['agent_answer'][:100]}...")
        
    except Exception as e:
        # Capture errors
        eval_result = {
            'eval_number': i + 1,
            'timestamp': datetime.now().isoformat(),
            'question': question,
            'expected_outcome': expected_outcome,
            'scenario': '',
            'analytical_intent': [],
            'current_sql_queries': [],
            'generate_answer_details': {},
            'agent_answer': '',
            'thread_id': thread_id,
            'status': 'error',
            'error_message': str(e)
        }
        print(f"✗ Error: {str(e)}")
    
    eval_results.append(eval_result)

# Display summary
print(f"\n{'='*80}")
print(f"✓ Evaluations complete!")
print(f"{'='*80}")
print(f"\nSummary:")
print(f"  Total questions: {len(eval_results)}")
print(f"  Successful: {sum(1 for r in eval_results if r['status'] == 'success')}")
print(f"  Errors: {sum(1 for r in eval_results if r['status'] == 'error')}")
print(f"\nScenario distribution:")
for scenario in ['A', 'B', 'C', 'D']:
    count = sum(1 for r in eval_results if r.get('scenario') == scenario)
    if count > 0:
        print(f"  Scenario {scenario}: {count}")

# eval_results is now available for further analysis in subsequent cells


Running eval 1/10: aum by practice segments


NameError: name 'objects_documentation' is not defined

In [None]:
# Inspect individual eval results
import json

def display_eval(eval_number):
    """Display detailed results for a specific eval"""
    result = eval_results[eval_number - 1]
    
    print(f"{'='*80}")
    print(f"EVAL #{result['eval_number']}")
    print(f"{'='*80}\n")
    print(f"Question: {result['question']}\n")
    print(f"Expected Outcome: {result['expected_outcome']}\n")
    print(f"{'-'*80}")
    print(f"Scenario: {result['scenario']}")
    print(f"Status: {result['status']}")
    
    if result['status'] == 'error':
        print(f"\nError: {result.get('error_message', 'Unknown error')}")
    else:
        print(f"\nAnalytical Intent:")
        for intent in result['analytical_intent']:
            print(f"  - {intent}")
        
        print(f"\nSQL Queries: {len(result['current_sql_queries'])} query(ies)")
        for i, query in enumerate(result['current_sql_queries'], 1):
            print(f"  Query {i}: {query.get('query', '')[:100]}...")
        
        print(f"\nGenerate Answer Details:")
        print(f"  Key Assumptions: {len(result['generate_answer_details'].get('key_assumptions', []))}")
        for assumption in result['generate_answer_details'].get('key_assumptions', []):
            print(f"    - {assumption}")
        print(f"  Agent Questions: {result['generate_answer_details'].get('agent_questions', [])}")
        print(f"  Ambiguity Explanation: {result['generate_answer_details'].get('ambiguity_explanation', '')}")
        
        print(f"\nAgent Answer:")
        print(f"  {result['agent_answer']}")
    
    print(f"\n{'='*80}\n")

# Example usage: display_eval(1) to see results for first eval