# Agent Framework and LLM API Comparison

This notebook demonstrates and compares various agent frameworks and LLM APIs for the same basic use case.
We'll measure performance metrics like:
- Time to first token
- Overall completion time
- Total tokens used
- Success rate

## Use Case: Customer Research Agent
We'll create an agent that researches a company and provides key insights for B2B sales preparation.

In [None]:
import os
import time
import json
import asyncio
from datetime import datetime
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, asdict
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv
from pathlib import Path
import yaml

# Load environment variables
env_local_path = '../../.env.local'
load_dotenv(env_local_path)

# Load model configuration
config_path = Path('../../config.yml')
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Extract model IDs from config
MODEL_IDS = {
    'openai': config['models']['openai']['default'],
    'anthropic': config['models']['anthropic']['default'],
    'xai': config['models']['xai']['default'],
    'google': config['models']['google']['default'],
    'strands': config['models']['strands']['default']
}

print("Loaded model configuration:")
for provider, model_id in MODEL_IDS.items():
    print(f"  {provider}: {model_id}")

In [13]:
@dataclass
class PerformanceMetrics:
    """Simple metrics tracking"""
    framework: str
    model: str
    start_time: float
    completion_time: float = 0
    success: bool = False
    error_message: str = ""
    response_content: str = ""
    
    def total_time(self) -> float:
        return self.completion_time - self.start_time

# Simple metrics tracker
metrics = []

def track_performance(framework: str, model: str):
    """Decorator to track performance metrics"""
    def decorator(func):
        async def wrapper():
            metric = PerformanceMetrics(framework=framework, model=model, start_time=time.time())
            try:
                response = await func()
                metric.completion_time = time.time()
                metric.response_content = response
                metric.success = True
                metrics.append(metric)
                print(f"✅ {framework} completed in {metric.total_time():.2f}s\n")
                if not framework == "Strands Agents":
                    print(response)
            except Exception as e:
                metric.completion_time = time.time()
                metric.error_message = str(e)
                metrics.append(metric)
                print(f"❌ {framework} failed: {e}")
        return wrapper
    return decorator

In [14]:
# Check API key availability
api_keys = {
    'OpenAI': bool(os.getenv('OPENAI_API_KEY')),
    'Anthropic': bool(os.getenv('ANTHROPIC_API_KEY')),
    'XAI': bool(os.getenv('XAI_API_KEY')),
    'Google': bool(os.getenv('GOOGLE_API_KEY') or os.getenv('GEMINI_API_KEY')),
}

print("API Key Availability:", {k: "✅" if v else "❌" for k, v in api_keys.items()})

API Key Availability: {'OpenAI': '✅', 'Anthropic': '✅', 'XAI': '✅', 'Google': '✅'}


In [15]:
# Common prompt for all frameworks
RESEARCH_PROMPT = """Research the company "Glean" and provide:
1. Company overview (3-4 sentences)
2. Key products/services
3. Recent developments
4. B2B sales opportunities

Keep it concise."""

## 1. Direct OpenAI API Implementation

In [None]:
@track_performance("OpenAI Direct", MODEL_IDS['openai'])
async def test_openai_direct():
    from openai import AsyncOpenAI
    
    async with AsyncOpenAI() as client:
        response = await client.chat.completions.create(
            model=MODEL_IDS['openai'],
            messages=[{"role": "user", "content": RESEARCH_PROMPT}]
        )
        return response.choices[0].message.content

await test_openai_direct()

## 2. Direct Anthropic API Implementation

In [None]:
@track_performance("Anthropic Direct", MODEL_IDS['anthropic'])
async def test_anthropic_direct():
    from anthropic import AsyncAnthropic
    
    async with AsyncAnthropic() as client:
        response = await client.messages.create(
            model=MODEL_IDS['anthropic'],
            max_tokens=1000,
            messages=[{"role": "user", "content": RESEARCH_PROMPT}]
        )
        return response.content[0].text

await test_anthropic_direct()

## 3. LangChain Implementation

In [None]:
@track_performance("LangChain", MODEL_IDS['openai'])
async def test_langchain():
    from langchain_openai import ChatOpenAI
    
    llm = ChatOpenAI(model=MODEL_IDS['openai'])
    response = await llm.ainvoke(RESEARCH_PROMPT)
    return response.content

await test_langchain()

## 4. Strands Agents Implementation

In [None]:
@track_performance("Strands Agents", MODEL_IDS['strands'])
async def test_strands_agents():
    from strands import Agent
    
    agent = Agent(system_prompt="You are a helpful B2B sales research assistant.")
    response = agent(RESEARCH_PROMPT)
    return str(response.data if hasattr(response, 'data') else response)

await test_strands_agents()

## 5. Direct xAI API Implementation

In [None]:
@track_performance("xAI Direct", MODEL_IDS['xai'])
async def test_xai_direct():
    from openai import AsyncOpenAI
    
    client = AsyncOpenAI(
        api_key=os.getenv('XAI_API_KEY'),
        base_url="https://api.x.ai/v1"
    )
    
    response = await client.chat.completions.create(
        model=MODEL_IDS['xai'],
        messages=[{"role": "user", "content": RESEARCH_PROMPT}]
    )
    return response.choices[0].message.content

await test_xai_direct()

## 6. Direct Gemini API Implementation

In [None]:
@track_performance("Gemini Direct", MODEL_IDS['google'])
async def test_gemini_direct():
    from google import genai
    
    client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY') or os.getenv('GEMINI_API_KEY'))
    response = client.models.generate_content(
        model=MODEL_IDS['google'],
        contents=RESEARCH_PROMPT
    )
    return response.text

await test_gemini_direct()

## 7. Performance Comparison and Analysis

In [None]:
# Display results
df = pd.DataFrame([vars(m) for m in metrics])
df['total_time'] = df.apply(lambda x: x['completion_time'] - x['start_time'], axis=1)

print("\n📊 Performance Results:")
print(df[['framework', 'model', 'success', 'total_time']].to_string(index=False))

# Save results to configured path
results_path = Path(config['comparison']['results_path'])
results_file = results_path / 'agent_framework_comparison_results.csv'
df.to_csv(results_file, index=False)
print(f"\n💾 Results saved to: {results_file}")

# Quick visualization
if df['success'].any():
    successful = df[df['success']]
    successful.plot.bar(x='framework', y='total_time', legend=False)
    plt.title('Response Time Comparison')
    plt.ylabel('Time (seconds)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()