## Notebook 00: Setup & Verification


**Prerequisites:**
- Install dependencies: `pip install -r requirements.txt` or `uv sync`


In [1]:
# Setup imports and environment
import sys
sys.path.append('..')

from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Verify API keys
providers = {
            "OpenAI": os.getenv("OPENAI_API_KEY"),
            "Google Gemini": os.getenv("GEMINI_API_KEY"),
            "Groq": os.getenv("GROQ_API_KEY"),
            }

print("API Key Status:")
print("-" * 50)
for provider, key in providers.items():
    status = "Found" if key else "Missing"
    preview = f"(...{key[-8:]})" if key else ""
    print(f"{provider:20s} {status:10s} {preview}")


API Key Status:
--------------------------------------------------
OpenAI               Found      (...esp_VQoA)
Google Gemini        Found      (...ukLZ55NY)
Groq                 Found      (...L0uHiohh)


### Import Utilities

Import our custom utilities for token counting, logging, and LLM client abstraction.


In [2]:
from utils.prompts import render, PROMPTS, list_prompts
from utils.llm_client import LLMClient
from utils.logging_utils import log_llm_call, get_log_summary
from utils.router import pick_model
from utils.token_utils import count_messages_tokens, reconcile_usage

print(f"Available prompts: {len(list_prompts())}")
print(f"Prompts: {', '.join(list_prompts())}")


Available prompts: 11
Prompts: skeleton.v1, zero_shot.v1, few_shot.v1, cot_reasoning.v1, tot_reasoning.v1, json_extract.v1, tool_call.v1, overflow_summarize.v1, rate_limit_retry.v1, style_persona.v1, router_classify.v1


### Test 1: Hello World with Token Tracking

Let's send a simple hello-world request to each provider and compare:
- **Estimated tokens** (via tiktoken)
- **Actual tokens** (from provider API)


In [4]:
import pandas as pd

def test_provider(provider_name):
    """Test a provider with a simple hello world."""
    print(f"\n{'='*60}")
    print(f"Testing: {provider_name}")
    print('='*60)
    
    # Pick appropriate model
    model = pick_model(provider_name.lower(), "general")
    print(f"Model: {model}")
    
    # Create client
    client = LLMClient(provider=provider_name.lower(), model=model)
    
    # Simple message
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Say 'Hello, World!' and nothing else."}
    ]
    
    # Call API
    try:
        response = client.chat(messages, max_tokens=20, temperature=0.0)
        
        print(f"\nResponse: {response['text']}")
        print(f"Latency: {response['latency_ms']} ms")
        print(f"\nToken Usage:")
        print(f"  Estimated Input:  {response['usage']['input_tokens_est']:>6} tokens")
        print(f"  Actual Prompt:    {response['usage']['prompt_tokens_actual']:>6} tokens")
        print(f"  Actual Completion:{response['usage']['completion_tokens_actual']:>6} tokens")
        print(f"  Total (actual):   {response['usage']['total_tokens_actual']:>6} tokens")
        
        # Calculate accuracy
        if response['usage']['prompt_tokens_actual']:
            est = response['usage']['input_tokens_est']
            act = response['usage']['prompt_tokens_actual']
            accuracy = (1 - abs(est - act) / act) * 100
            print(f"\nEstimation Accuracy: {accuracy:.1f}%")
        
        # Log to CSV
        log_llm_call(
            provider=provider_name.lower(),
            model=model,
            technique="hello_world",
            latency_ms=response['latency_ms'],
            usage=response['usage'],
            retry_count=response['meta']['retry_count'],
            backoff_ms_total=response['meta']['backoff_ms_total'],
        )
        
        return True
    except Exception as e:
        print(f" Error: {e}")
        return False

# Test all providers that have keys
results = {}
for provider, key in providers.items():
    if key:
        provider_name = provider.split()[0].lower()  # "Google Gemini" -> "google"
        if provider_name == "google":
            provider_name = "google"
        results[provider] = test_provider(provider_name)
    else:
        print(f"\n{'='*60}")
        print(f"Skipping {provider} (no API key)")
        print('='*60)



Testing: openai
Model: gpt-4o-mini

Response: Hello, World!
Latency: 1256 ms

Token Usage:
  Estimated Input:      24 tokens
  Actual Prompt:        27 tokens
  Actual Completion:     4 tokens
  Total (actual):       31 tokens

Estimation Accuracy: 88.9%

Testing: google
Model: gemini-2.0-flash-exp
 Error: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.0-flash-exp\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash-exp\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, li

In [6]:
# View recent logs
try:
    logs_df = pd.read_csv("../logs/runs.csv")
    print(f"\nTotal LLM calls logged: {len(logs_df)}")
    print("\nMost recent calls:")
    print(logs_df.tail(3).to_string())
    
    # Summary statistics
    summary = get_log_summary()
    print(f"\n{'='*60}")
    print("Session Summary:")
    print('='*60)
    for key, value in summary.items():
        if isinstance(value, dict):
            print(f"{key}:")
            for k, v in value.items():
                print(f"  {k}: {v}")
        else:
            print(f"{key}: {value}")
except FileNotFoundError:
    print("No logs found yet. Run some LLM calls first!")



Total LLM calls logged: 4

Most recent calls:
                    timestamp provider                 model    technique  latency_ms  input_tokens_est  context_tokens_est  total_est  prompt_tokens_actual  completion_tokens_actual  total_tokens_actual  retry_count  backoff_ms_total  overflow_handled cost_estimate_usd  notes
1  2026-01-15T00:13:04.292217     groq  llama-3.1-8b-instant  hello_world         436                24                   0         27                    51                         5                   56            0                 0             False        ~$0.000003    NaN
2  2026-01-15T00:13:48.328290   openai           gpt-4o-mini  hello_world        1256                24                   0         27                    27                         4                   31            0                 0             False        ~$0.000006    NaN
3  2026-01-15T00:13:53.013981     groq  llama-3.1-8b-instant  hello_world         193                24                