# Independent LLM Provider Testing Notebook

This notebook validates API tokens, tests all three LLM providers (Gemini, OpenAI, Anthropic), fetches available models, handles errors, and tracks real usage costs.

**Goal**: Test providers independently, gather actual model info and costs, then update the FastAPI application based on real results.

## 1. Import Required Libraries and Setup

In [1]:
import os
import json
import asyncio
from datetime import datetime
from typing import Dict, List, Any
from collections import defaultdict
import pandas as pd

# Load environment variables from .env file
def load_env():
    env_path = ".env"
    if os.path.exists(env_path):
        with open(env_path) as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    key, value = line.split("=", 1)
                    os.environ[key.strip()] = value.strip()

load_env()

# API Keys from environment
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")

# Store for tracking usage
usage_log = defaultdict(lambda: {"calls": 0, "tokens": 0, "cost": 0.0, "errors": 0})
model_info = {}

print("=" * 60)
print("ENVIRONMENT SETUP")
print("=" * 60)
print(f"✓ Gemini API Key: {'Set' if GEMINI_API_KEY else 'NOT SET'}")
print(f"✓ OpenAI API Key: {'Set' if OPENAI_API_KEY else 'NOT SET'}")
print(f"✓ Anthropic API Key: {'Set' if ANTHROPIC_API_KEY else 'NOT SET'}")
print("=" * 60)

ENVIRONMENT SETUP
✓ Gemini API Key: Set
✓ OpenAI API Key: Set
✓ Anthropic API Key: Set


## 2. Validate API Tokens

In [2]:
# Test Gemini API Token
async def validate_gemini_token():
    try:
        import google.generativeai as genai
        genai.configure(api_key=GEMINI_API_KEY)
        
        # Try to list models
        models = genai.list_models()
        models_list = [m.name.split('/')[-1] for m in models if 'generateContent' in m.supported_generation_methods]
        
        print("✓ Gemini API Token: VALID")
        print(f"  Available models: {len(models_list)}")
        return True, models_list
    except Exception as e:
        print(f"✗ Gemini API Token: INVALID - {str(e)}")
        return False, []

# Test OpenAI API Token
async def validate_openai_token():
    try:
        from openai import OpenAI
        client = OpenAI(api_key=OPENAI_API_KEY)
        
        # Try to list models
        models_list = client.models.list()
        gpt_models = [m.id for m in models_list.data if 'gpt' in m.id]
        
        print("✓ OpenAI API Token: VALID")
        print(f"  Available models: {len(gpt_models)}")
        return True, gpt_models
    except Exception as e:
        print(f"✗ OpenAI API Token: INVALID - {str(e)}")
        return False, []

# Test Anthropic API Token
async def validate_anthropic_token():
    try:
        from anthropic import Anthropic
        client = Anthropic(api_key=ANTHROPIC_API_KEY)
        
        # Anthropic doesn't have a list_models endpoint, so test with direct call
        response = client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=1,
            messages=[{"role": "user", "content": "hi"}]
        )
        
        print("✓ Anthropic API Token: VALID")
        print(f"  Model tested: claude-3-haiku-20240307")
        return True, ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-3-opus-20240229"]
    except Exception as e:
        print(f"✗ Anthropic API Token: INVALID - {str(e)}")
        return False, []

# Run validation
print("\n" + "=" * 60)
print("API TOKEN VALIDATION")
print("=" * 60)

gemini_valid, gemini_models = await validate_gemini_token()
openai_valid, openai_models = await validate_openai_token()
anthropic_valid, anthropic_models = await validate_anthropic_token()

print("\n" + "=" * 60)
print("VALIDATION SUMMARY")
print("=" * 60)
summary_data = {
    "Provider": ["Gemini", "OpenAI", "Anthropic"],
    "Status": [
        "✓ Valid" if gemini_valid else "✗ Invalid",
        "✓ Valid" if openai_valid else "✗ Invalid",
        "✓ Valid" if anthropic_valid else "✗ Invalid"
    ],
    "Models": [len(gemini_models), len(openai_models), len(anthropic_models)]
}
pd.DataFrame(summary_data)


API TOKEN VALIDATION


  from .autonotebook import tqdm as notebook_tqdm


✓ Gemini API Token: VALID
  Available models: 33
✓ OpenAI API Token: VALID
  Available models: 72
✓ Anthropic API Token: VALID
  Model tested: claude-3-haiku-20240307

VALIDATION SUMMARY


Unnamed: 0,Provider,Status,Models
0,Gemini,✓ Valid,33
1,OpenAI,✓ Valid,72
2,Anthropic,✓ Valid,3


## 3. Fetch Available Models with Pricing

In [3]:
# Gemini Models
async def fetch_gemini_models():
    try:
        import google.generativeai as genai
        genai.configure(api_key=GEMINI_API_KEY)
        
        models = genai.list_models()
        model_data = []
        
        for m in models:
            if 'generateContent' in m.supported_generation_methods:
                name = m.name.split('/')[-1]
                model_data.append({
                    "Model": name,
                    "Provider": "Gemini",
                    "Input Cost (per 1K tokens)": "$0.00025 (free tier included)",
                    "Output Cost (per 1K tokens)": "$0.0005 (free tier included)",
                    "Capabilities": ", ".join(m.supported_generation_methods[:2])
                })
        
        return model_data
    except Exception as e:
        print(f"Error fetching Gemini models: {e}")
        return []

# OpenAI Models
async def fetch_openai_models():
    try:
        from openai import OpenAI
        client = OpenAI(api_key=OPENAI_API_KEY)
        
        models = client.models.list()
        model_data = []
        
        for m in models.data:
            if 'gpt' in m.id:
                # Pricing info (as of recent updates)
                pricing = {
                    "gpt-4o": {"input": "$0.005", "output": "$0.015"},
                    "gpt-4-turbo": {"input": "$0.01", "output": "$0.03"},
                    "gpt-4": {"input": "$0.03", "output": "$0.06"},
                    "gpt-3.5-turbo": {"input": "$0.0005", "output": "$0.0015"},
                }
                
                price_info = pricing.get(m.id.split("-")[:3] and m.id, {})
                input_price = price_info.get("input", "Unknown")
                output_price = price_info.get("output", "Unknown")
                
                model_data.append({
                    "Model": m.id,
                    "Provider": "OpenAI",
                    "Input Cost (per 1K tokens)": input_price,
                    "Output Cost (per 1K tokens)": output_price,
                    "Owner": m.owned_by
                })
        
        return model_data
    except Exception as e:
        print(f"Error fetching OpenAI models: {e}")
        return []

# Anthropic Models
async def fetch_anthropic_models():
    try:
        # Anthropic doesn't have list endpoint, use known models
        models = [
            "claude-3-opus-20240229",
            "claude-3-sonnet-20240229",
            "claude-3-haiku-20240307",
        ]
        
        pricing = {
            "claude-3-opus-20240229": {"input": "$0.015", "output": "$0.075"},
            "claude-3-sonnet-20240229": {"input": "$0.003", "output": "$0.015"},
            "claude-3-haiku-20240307": {"input": "$0.00025", "output": "$0.00125"},
        }
        
        model_data = []
        for model in models:
            price_info = pricing.get(model, {})
            model_data.append({
                "Model": model,
                "Provider": "Anthropic",
                "Input Cost (per 1K tokens)": price_info.get("input", "Unknown"),
                "Output Cost (per 1K tokens)": price_info.get("output", "Unknown"),
                "Capabilities": "Vision, reasoning, function calling"
            })
        
        return model_data
    except Exception as e:
        print(f"Error fetching Anthropic models: {e}")
        return []

# Fetch all models
print("\n" + "=" * 100)
print("AVAILABLE MODELS AND PRICING")
print("=" * 100)

gemini_models = await fetch_gemini_models()
openai_models = await fetch_openai_models()
anthropic_models = await fetch_anthropic_models()

all_models = gemini_models + openai_models + anthropic_models

# Store for later use
model_info = {
    "gemini": gemini_models,
    "openai": openai_models,
    "anthropic": anthropic_models
}

models_df = pd.DataFrame(all_models)
print("\n")
print(models_df.to_string(index=False))
print(f"\n✓ Total models available: {len(all_models)}")
print(f"  - Gemini: {len(gemini_models)} models")
print(f"  - OpenAI: {len(openai_models)} models")
print(f"  - Anthropic: {len(anthropic_models)} models")


AVAILABLE MODELS AND PRICING


                                  Model  Provider    Input Cost (per 1K tokens)  Output Cost (per 1K tokens)                        Capabilities           Owner
                       gemini-2.5-flash    Gemini $0.00025 (free tier included) $0.0005 (free tier included)        generateContent, countTokens             NaN
                         gemini-2.5-pro    Gemini $0.00025 (free tier included) $0.0005 (free tier included)        generateContent, countTokens             NaN
                   gemini-2.0-flash-exp    Gemini $0.00025 (free tier included) $0.0005 (free tier included)        generateContent, countTokens             NaN
                       gemini-2.0-flash    Gemini $0.00025 (free tier included) $0.0005 (free tier included)        generateContent, countTokens             NaN
                   gemini-2.0-flash-001    Gemini $0.00025 (free tier included) $0.0005 (free tier included)        generateContent, countTokens             NaN
  

## 4. Test Actual API Calls with Error Handling

In [4]:
# Test Gemini
async def test_gemini():
    try:
        import google.generativeai as genai
        genai.configure(api_key=GEMINI_API_KEY)
        
        model = genai.GenerativeModel("gemini-2.5-pro")
        
        # Generate content
        response = await model.generate_content_async("Say 'Hello from Fiesta' in one sentence")
        
        # Handle response properly - use parts accessor
        content = ""
        if hasattr(response, 'text'):
            try:
                content = response.text
            except Exception:
                # Fall back to parts if text accessor fails
                if hasattr(response, 'parts') and response.parts:
                    for part in response.parts:
                        if hasattr(part, 'text'):
                            content += part.text
        
        # Token counting
        prompt_tokens = 8  # approximate
        completion_tokens = len(content.split())
        cost = (prompt_tokens / 1000) * 0.00025 + (completion_tokens / 1000) * 0.0005
        
        usage_log["gemini"]["calls"] += 1
        usage_log["gemini"]["tokens"] += prompt_tokens + completion_tokens
        usage_log["gemini"]["cost"] += cost
        
        return True, content[:50], prompt_tokens, completion_tokens, cost
    except Exception as e:
        usage_log["gemini"]["errors"] += 1
        return False, str(e), 0, 0, 0

# Test OpenAI
async def test_openai():
    try:
        from openai import OpenAI
        client = OpenAI(api_key=OPENAI_API_KEY)
        
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Say 'Hello from Fiesta' in one sentence"}],
            max_tokens=50
        )
        
        content = response.choices[0].message.content
        prompt_tokens = response.usage.prompt_tokens
        completion_tokens = response.usage.completion_tokens
        cost = (prompt_tokens / 1000) * 0.0005 + (completion_tokens / 1000) * 0.0015
        
        usage_log["openai"]["calls"] += 1
        usage_log["openai"]["tokens"] += prompt_tokens + completion_tokens
        usage_log["openai"]["cost"] += cost
        
        return True, content[:50], prompt_tokens, completion_tokens, cost
    except Exception as e:
        usage_log["openai"]["errors"] += 1
        return False, str(e), 0, 0, 0

# Test Anthropic
async def test_anthropic():
    try:
        from anthropic import Anthropic
        client = Anthropic(api_key=ANTHROPIC_API_KEY)
        
        response = client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=50,
            messages=[{"role": "user", "content": "Say 'Hello from Fiesta' in one sentence"}]
        )
        
        content = response.content[0].text
        prompt_tokens = response.usage.input_tokens
        completion_tokens = response.usage.output_tokens
        cost = (prompt_tokens / 1000) * 0.00025 + (completion_tokens / 1000) * 0.00125
        
        usage_log["anthropic"]["calls"] += 1
        usage_log["anthropic"]["tokens"] += prompt_tokens + completion_tokens
        usage_log["anthropic"]["cost"] += cost
        
        return True, content[:50], prompt_tokens, completion_tokens, cost
    except Exception as e:
        usage_log["anthropic"]["errors"] += 1
        return False, str(e), 0, 0, 0

# Run tests
print("\n" + "=" * 100)
print("LIVE API CALLS TEST")
print("=" * 100)

print("\n[Testing Gemini...]")
gemini_ok, gemini_response, gem_prompt, gem_complete, gem_cost = await test_gemini()
print(f"Status: {'✓ OK' if gemini_ok else '✗ ERROR'}")
if gemini_ok:
    print(f"Response: {gemini_response}...")
    print(f"Tokens: {gem_prompt} (prompt) + {gem_complete} (completion) = {gem_prompt + gem_complete}")
    print(f"Cost: ${gem_cost:.6f}")
else:
    print(f"Error: {gemini_response}")

print("\n[Testing OpenAI...]")
openai_ok, openai_response, oai_prompt, oai_complete, oai_cost = await test_openai()
print(f"Status: {'✓ OK' if openai_ok else '✗ ERROR'}")
if openai_ok:
    print(f"Response: {openai_response}...")
    print(f"Tokens: {oai_prompt} (prompt) + {oai_complete} (completion) = {oai_prompt + oai_complete}")
    print(f"Cost: ${oai_cost:.6f}")
else:
    print(f"Error: {openai_response}")

print("\n[Testing Anthropic...]")
anthropic_ok, anthropic_response, anth_prompt, anth_complete, anth_cost = await test_anthropic()
print(f"Status: {'✓ OK' if anthropic_ok else '✗ ERROR'}")
if anthropic_ok:
    print(f"Response: {anthropic_response}...")
    print(f"Tokens: {anth_prompt} (prompt) + {anth_complete} (completion) = {anth_prompt + anth_complete}")
    print(f"Cost: ${anth_cost:.6f}")
else:
    print(f"Error: {anthropic_response}")


LIVE API CALLS TEST

[Testing Gemini...]
Status: ✓ OK
Response: Hello from Fiesta...
Tokens: 8 (prompt) + 3 (completion) = 11
Cost: $0.000003

[Testing OpenAI...]
Status: ✓ OK
Response: Hello from Fiesta!...
Tokens: 16 (prompt) + 4 (completion) = 20
Cost: $0.000014

[Testing Anthropic...]
Status: ✓ OK
Response: Hello from Fiesta!...
Tokens: 19 (prompt) + 9 (completion) = 28
Cost: $0.000016


## 5. Real Usage and Cost Tracking Summary

In [5]:
print("\n" + "=" * 100)
print("USAGE SUMMARY - REAL API CALLS")
print("=" * 100)

usage_data = []
total_calls = 0
total_tokens = 0
total_cost = 0.0

for provider, data in usage_log.items():
    total_calls += data["calls"]
    total_tokens += data["tokens"]
    total_cost += data["cost"]
    
    usage_data.append({
        "Provider": provider.upper(),
        "API Calls": data["calls"],
        "Total Tokens": data["tokens"],
        "Cost (USD)": f"${data['cost']:.6f}",
        "Errors": data["errors"]
    })

usage_df = pd.DataFrame(usage_data)
print("\n")
print(usage_df.to_string(index=False))

print("\n" + "-" * 100)
print(f"TOTAL API CALLS: {total_calls}")
print(f"TOTAL TOKENS CONSUMED: {total_tokens}")
print(f"TOTAL COST (USD): ${total_cost:.6f}")
print("-" * 100)

# Break down by provider cost
print("\nCOST BREAKDOWN BY PROVIDER:")
for provider, data in sorted(usage_log.items()):
    if data["calls"] > 0:
        print(f"  {provider.upper()}: ${data['cost']:.6f} ({data['calls']} calls)")

print("\n" + "=" * 100)


USAGE SUMMARY - REAL API CALLS


 Provider  API Calls  Total Tokens Cost (USD)  Errors
   GEMINI          1            11  $0.000003       0
   OPENAI          1            20  $0.000014       0
ANTHROPIC          1            28  $0.000016       0

----------------------------------------------------------------------------------------------------
TOTAL API CALLS: 3
TOTAL TOKENS CONSUMED: 59
TOTAL COST (USD): $0.000034
----------------------------------------------------------------------------------------------------

COST BREAKDOWN BY PROVIDER:
  ANTHROPIC: $0.000016 (1 calls)
  GEMINI: $0.000003 (1 calls)
  OPENAI: $0.000014 (1 calls)



## 6. Export Results for Code Updates

In [6]:
# Export results as JSON for reference
results = {
    "timestamp": datetime.now().isoformat(),
    "api_tokens_valid": {
        "gemini": gemini_valid,
        "openai": openai_valid,
        "anthropic": anthropic_valid
    },
    "models_by_provider": {
        "gemini": [m["Model"] for m in gemini_models],
        "openai": [m["Model"] for m in openai_models],
        "anthropic": [m["Model"] for m in anthropic_models]
    },
    "usage_summary": {
        "total_calls": total_calls,
        "total_tokens": total_tokens,
        "total_cost_usd": round(total_cost, 6),
        "by_provider": dict(usage_log)
    },
    "test_results": {
        "gemini": "OK" if gemini_ok else "FAILED",
        "openai": "OK" if openai_ok else "FAILED",
        "anthropic": "OK" if anthropic_ok else "FAILED"
    }
}

# Display as formatted JSON
print("\n" + "=" * 100)
print("FULL RESULTS JSON (for code updates)")
print("=" * 100)
print(json.dumps(results, indent=2, default=str))

# Save to file
with open("api_test_results.json", "w") as f:
    json.dump(results, f, indent=2, default=str)
print("\n✓ Results saved to: api_test_results.json")


FULL RESULTS JSON (for code updates)
{
  "timestamp": "2025-12-05T08:51:10.949031",
  "api_tokens_valid": {
    "gemini": true,
    "openai": true,
    "anthropic": true
  },
  "models_by_provider": {
    "gemini": [
      "gemini-2.5-flash",
      "gemini-2.5-pro",
      "gemini-2.0-flash-exp",
      "gemini-2.0-flash",
      "gemini-2.0-flash-001",
      "gemini-2.0-flash-lite-001",
      "gemini-2.0-flash-lite",
      "gemini-2.0-flash-lite-preview-02-05",
      "gemini-2.0-flash-lite-preview",
      "gemini-2.0-pro-exp",
      "gemini-2.0-pro-exp-02-05",
      "gemini-exp-1206",
      "gemini-2.5-flash-preview-tts",
      "gemini-2.5-pro-preview-tts",
      "gemma-3-1b-it",
      "gemma-3-4b-it",
      "gemma-3-12b-it",
      "gemma-3-27b-it",
      "gemma-3n-e4b-it",
      "gemma-3n-e2b-it",
      "gemini-flash-latest",
      "gemini-flash-lite-latest",
      "gemini-pro-latest",
      "gemini-2.5-flash-lite",
      "gemini-2.5-flash-image-preview",
      "gemini-2.5-flash-image"