# AI Config AI Metrics - Cookbook

Comprehensive tests for all code in SKILL.md and references.

Prerequisites: `LAUNCHDARKLY_SDK_KEY`, `LAUNCHDARKLY_API_TOKEN`, AI Config created (see `aiconfig-create`)

In [1]:
%pip install launchdarkly-server-sdk launchdarkly-server-sdk-ai openai anthropic tiktoken requests python-dotenv -q

import os
from pathlib import Path
from dotenv import load_dotenv

def find_repo_root(start_path: Path = None) -> Path:
    current = start_path or Path.cwd()
    for parent in [current] + list(current.parents):
        if (parent / '.git').exists():
            return parent
    return current

repo_root = find_repo_root()
load_dotenv(repo_root / '.env')
print(f"[OK] Loaded environment from {repo_root / '.env'}")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m26.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/opt/python@3.11/bin/python3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[OK] Loaded environment from /Users/ld_scarlett/Documents/Github/agent-skills/.env


In [2]:
# SDK initialization (see aiconfig-sdk for details)
from ldclient import Context
from ldai.client import LDAIClient, AICompletionConfigDefault
import ldclient
from ldclient.config import Config

SDK_KEY = os.environ.get("LAUNCHDARKLY_SDK_KEY")
ldclient.set_config(Config(SDK_KEY))
ld_client = ldclient.get()
ai_client = LDAIClient(ld_client)

print(f"[OK] SDK initialized: {ld_client.is_initialized()}")

# Get config for testing tracker methods
context = Context.builder("cookbook-test-user").build()
config = ai_client.completion_config(
    "content-assistant",
    context,
    AICompletionConfigDefault(enabled=False),
    {}
)
print(f"[OK] Got config: enabled={config.enabled}, model={config.model.name if config.model else 'N/A'}")

[OK] SDK initialized: True
[OK] Got config: enabled=True, model=gpt-4


---
## Test Tracker Methods
From: `SKILL.md` lines 69-82 (Quick Reference table)

In [3]:
from ldai.tracker import TokenUsage
import time

tracker = config.tracker
print(f"[OK] Got tracker: {type(tracker).__name__}")

# Test track_success() - SKILL.md line 81
tracker.track_success()
print(f"[OK] track_success()")

# Test track_error() - SKILL.md line 82
tracker.track_error()
print(f"[OK] track_error()")

# Test track_tokens(TokenUsage) - SKILL.md line 78
tokens = TokenUsage(total=30, input=10, output=20)
tracker.track_tokens(tokens)
print(f"[OK] track_tokens(TokenUsage(total=30, input=10, output=20))")

# Test track_duration(int) - SKILL.md line 79
tracker.track_duration(150)
print(f"[OK] track_duration(150)")

# Test track_time_to_first_token(int) - SKILL.md line 80
tracker.track_time_to_first_token(25)
print(f"[OK] track_time_to_first_token(25)")

# Test track_duration_of(fn) - SKILL.md line 77
def slow_fn():
    time.sleep(0.1)
    return "result"

result = tracker.track_duration_of(slow_fn)
print(f"[OK] track_duration_of(fn) -> {result}")

# Test track_openai_metrics(fn) - SKILL.md line 75 (tested in OpenAI section)
# Test track_bedrock_converse_metrics(res) - SKILL.md line 76 (tested in Bedrock section)

ld_client.flush()
print(f"[OK] Flushed metrics")

[OK] Got tracker: LDAIConfigTracker
[OK] track_success()
[OK] track_error()
[OK] track_tokens(TokenUsage(total=30, input=10, output=20))
[OK] track_duration(150)
[OK] track_time_to_first_token(25)
[OK] track_duration_of(fn) -> result
[OK] Flushed metrics


---
## OpenAI Tracking
From: `references/openai-tracking.md`

In [4]:
# From: references/openai-tracking.md lines 13-31
import openai

def call_with_tracking(config, user_prompt: str):
    """OpenAI call with automatic metrics tracking."""
    if not config.enabled:
        return None

    response = config.tracker.track_openai_metrics(
        lambda: openai.chat.completions.create(
            model=config.model.name,
            messages=[
                {"role": "system", "content": config.messages[0].content},
                {"role": "user", "content": user_prompt}
            ]
        )
    )

    return response.choices[0].message.content

print("[OK] call_with_tracking() defined (openai-tracking.md:13-31)")

[OK] call_with_tracking() defined (openai-tracking.md:13-31)


In [5]:
# From: references/openai-tracking.md lines 35-55
def call_with_tracking_safe(config, user_prompt: str):
    """OpenAI call with metrics and error tracking."""
    if not config.enabled:
        return None

    try:
        response = config.tracker.track_openai_metrics(
            lambda: openai.chat.completions.create(
                model=config.model.name,
                messages=[
                    {"role": "system", "content": config.messages[0].content},
                    {"role": "user", "content": user_prompt}
                ]
            )
        )
        return response.choices[0].message.content

    except Exception as e:
        config.tracker.track_error()
        raise

print("[OK] call_with_tracking_safe() defined (openai-tracking.md:35-55)")

[OK] call_with_tracking_safe() defined (openai-tracking.md:35-55)


In [6]:
# Test OpenAI tracking
print("=== Testing OpenAI Tracking ===")

openai_key = os.environ.get("OPENAI_API_KEY")
if openai_key and config.enabled:
    openai.api_key = openai_key
    
    # Test basic tracking (openai-tracking.md:13-31)
    result = call_with_tracking(config, "Say hello in one word")
    if result:
        print(f"[OK] call_with_tracking: {result[:50]}")
    
    # Test safe tracking (openai-tracking.md:35-55)
    result = call_with_tracking_safe(config, "Say goodbye in one word")
    if result:
        print(f"[OK] call_with_tracking_safe: {result[:50]}")
    
    ld_client.flush()
    print("[OK] Flushed OpenAI metrics")
else:
    print("[INFO] OPENAI_API_KEY not set or config disabled - skipping live test")
    print("[OK] Functions defined and ready for use")

=== Testing OpenAI Tracking ===
[OK] call_with_tracking: "Hi"
[OK] call_with_tracking_safe: "Farewell"
[OK] Flushed OpenAI metrics


---
## Anthropic Tracking
From: `references/anthropic-tracking.md`

In [7]:
# From: references/anthropic-tracking.md lines 13-45
from ldai.tracker import TokenUsage

def anthropic_call_with_tracking(config, user_prompt: str):
    """Anthropic call with manual metrics tracking."""
    import anthropic
    client = anthropic.Anthropic()

    if not config.enabled:
        return None

    tracker = config.tracker

    # Track duration of the call
    response = tracker.track_duration_of(
        lambda: client.messages.create(
            model=config.model.name,
            max_tokens=1024,
            messages=[{"role": "user", "content": user_prompt}]
        )
    )

    # Manually track tokens using TokenUsage object
    if hasattr(response, 'usage'):
        tokens = TokenUsage(
            total=response.usage.input_tokens + response.usage.output_tokens,
            input=response.usage.input_tokens,
            output=response.usage.output_tokens
        )
        tracker.track_tokens(tokens)

    tracker.track_success()
    return response.content[0].text

print("[OK] anthropic_call_with_tracking() defined (anthropic-tracking.md:13-45)")

[OK] anthropic_call_with_tracking() defined (anthropic-tracking.md:13-45)


In [8]:
# From: references/anthropic-tracking.md lines 49-76
def anthropic_call_with_system_prompt(config, user_prompt: str):
    """Anthropic call using system prompt from AI Config."""
    import anthropic
    client = anthropic.Anthropic()

    if not config.enabled:
        return None

    tracker = config.tracker
    system_content = config.messages[0].content if config.messages else ""

    response = tracker.track_duration_of(
        lambda: client.messages.create(
            model=config.model.name,
            max_tokens=1024,
            system=system_content,
            messages=[{"role": "user", "content": user_prompt}]
        )
    )

    if hasattr(response, 'usage'):
        tokens = TokenUsage(
            total=response.usage.input_tokens + response.usage.output_tokens,
            input=response.usage.input_tokens,
            output=response.usage.output_tokens
        )
        tracker.track_tokens(tokens)

    tracker.track_success()
    return response.content[0].text

print("[OK] anthropic_call_with_system_prompt() defined (anthropic-tracking.md:49-76)")

[OK] anthropic_call_with_system_prompt() defined (anthropic-tracking.md:49-76)


In [9]:
# From: references/anthropic-tracking.md lines 80-110
def anthropic_call_with_tracking_safe(config, user_prompt: str):
    """Anthropic call with metrics and error tracking."""
    import anthropic
    client = anthropic.Anthropic()

    if not config.enabled:
        return None

    tracker = config.tracker

    try:
        response = tracker.track_duration_of(
            lambda: client.messages.create(
                model=config.model.name,
                max_tokens=1024,
                messages=[{"role": "user", "content": user_prompt}]
            )
        )

        if hasattr(response, 'usage'):
            tokens = TokenUsage(
                total=response.usage.input_tokens + response.usage.output_tokens,
                input=response.usage.input_tokens,
                output=response.usage.output_tokens
            )
            tracker.track_tokens(tokens)

        tracker.track_success()
        return response.content[0].text

    except Exception as e:
        tracker.track_error()
        raise

print("[OK] anthropic_call_with_tracking_safe() defined (anthropic-tracking.md:80-110)")

[OK] anthropic_call_with_tracking_safe() defined (anthropic-tracking.md:80-110)


In [10]:
# Test Anthropic tracking
print("=== Testing Anthropic Tracking ===")

anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
if anthropic_key and config.enabled:
    print("[INFO] ANTHROPIC_API_KEY found - live test would require Anthropic model in config")
    print("[OK] Functions defined and ready for use")
else:
    print("[INFO] ANTHROPIC_API_KEY not set or config disabled - skipping live test")
    print("[OK] Functions defined and ready for use")

=== Testing Anthropic Tracking ===
[INFO] ANTHROPIC_API_KEY found - live test would require Anthropic model in config
[OK] Functions defined and ready for use


---
## Bedrock Tracking
From: `references/bedrock-tracking.md`

In [11]:
# From: references/bedrock-tracking.md lines 12-35
def bedrock_call_with_tracking(config, user_prompt: str):
    """Bedrock Converse call with automatic metrics tracking."""
    import boto3
    bedrock = boto3.client("bedrock-runtime")

    if not config.enabled:
        return None

    tracker = config.tracker

    # Make the Bedrock call
    response = tracker.track_duration_of(
        lambda: bedrock.converse(
            modelId=config.model.name,
            messages=[{"role": "user", "content": [{"text": user_prompt}]}]
        )
    )

    # Track Bedrock-specific metrics from response
    tracker.track_bedrock_converse_metrics(response)

    return response["output"]["message"]["content"][0]["text"]

print("[OK] bedrock_call_with_tracking() defined (bedrock-tracking.md:12-35)")

[OK] bedrock_call_with_tracking() defined (bedrock-tracking.md:12-35)


In [12]:
# From: references/bedrock-tracking.md lines 39-58
def bedrock_call_with_system_prompt(config, user_prompt: str):
    """Bedrock call using system prompt from AI Config."""
    import boto3
    bedrock = boto3.client("bedrock-runtime")

    if not config.enabled:
        return None

    tracker = config.tracker
    system_content = config.messages[0].content if config.messages else ""

    response = tracker.track_duration_of(
        lambda: bedrock.converse(
            modelId=config.model.name,
            system=[{"text": system_content}],
            messages=[{"role": "user", "content": [{"text": user_prompt}]}]
        )
    )

    tracker.track_bedrock_converse_metrics(response)

    return response["output"]["message"]["content"][0]["text"]

print("[OK] bedrock_call_with_system_prompt() defined (bedrock-tracking.md:39-58)")

[OK] bedrock_call_with_system_prompt() defined (bedrock-tracking.md:39-58)


In [13]:
# From: references/bedrock-tracking.md lines 62-83
def bedrock_call_with_tracking_safe(config, user_prompt: str):
    """Bedrock call with metrics and error tracking."""
    import boto3
    bedrock = boto3.client("bedrock-runtime")

    if not config.enabled:
        return None

    tracker = config.tracker

    try:
        response = tracker.track_duration_of(
            lambda: bedrock.converse(
                modelId=config.model.name,
                messages=[{"role": "user", "content": [{"text": user_prompt}]}]
            )
        )

        tracker.track_bedrock_converse_metrics(response)
        return response["output"]["message"]["content"][0]["text"]

    except Exception as e:
        tracker.track_error()
        raise

print("[OK] bedrock_call_with_tracking_safe() defined (bedrock-tracking.md:62-83)")

[OK] bedrock_call_with_tracking_safe() defined (bedrock-tracking.md:62-83)


In [14]:
# Test Bedrock tracking
print("=== Testing Bedrock Tracking ===")

try:
    import boto3
    print("[INFO] boto3 available - live test would require AWS credentials and Bedrock model in config")
    print("[OK] Functions defined and ready for use")
except ImportError:
    print("[INFO] boto3 not installed - skipping Bedrock tests")
    print("[OK] Functions defined and ready for use")

=== Testing Bedrock Tracking ===
[INFO] boto3 available - live test would require AWS credentials and Bedrock model in config
[OK] Functions defined and ready for use


---
## Streaming Tracking
From: `references/streaming-tracking.md`

In [15]:
# From: references/streaming-tracking.md lines 14-59
import time
import openai
from ldai.tracker import TokenUsage

def call_streaming_with_tracking(config, user_prompt: str):
    """Streaming call with TTFT and duration tracking."""
    if not config.enabled:
        return None

    tracker = config.tracker
    start_time = time.time()
    first_token_time = None

    stream = openai.chat.completions.create(
        model=config.model.name,
        messages=[{"role": "user", "content": user_prompt}],
        stream=True
    )

    response_text = ""
    for chunk in stream:
        if first_token_time is None and chunk.choices[0].delta.content:
            first_token_time = time.time()
            ttft_ms = int((first_token_time - start_time) * 1000)
            tracker.track_time_to_first_token(ttft_ms)

        if chunk.choices[0].delta.content:
            response_text += chunk.choices[0].delta.content

    # Track final metrics (milliseconds)
    duration_ms = int((time.time() - start_time) * 1000)
    tracker.track_duration(duration_ms)
    tracker.track_success()

    # Estimate tokens (or use tiktoken for accuracy)
    estimated_input = len(user_prompt.split()) * 2
    estimated_output = len(response_text.split()) * 2
    tokens = TokenUsage(
        total=estimated_input + estimated_output,
        input=estimated_input,
        output=estimated_output
    )
    tracker.track_tokens(tokens)

    return response_text

print("[OK] call_streaming_with_tracking() defined (streaming-tracking.md:14-59)")

[OK] call_streaming_with_tracking() defined (streaming-tracking.md:14-59)


In [16]:
# From: references/streaming-tracking.md lines 63-115
import tiktoken

def call_streaming_accurate_tokens(config, user_prompt: str):
    """Streaming with accurate token counting using tiktoken."""
    if not config.enabled:
        return None

    tracker = config.tracker
    start_time = time.time()
    first_token_time = None

    # Get encoder for the model
    try:
        enc = tiktoken.encoding_for_model(config.model.name)
    except KeyError:
        enc = tiktoken.get_encoding("cl100k_base")

    stream = openai.chat.completions.create(
        model=config.model.name,
        messages=[{"role": "user", "content": user_prompt}],
        stream=True
    )

    response_text = ""
    for chunk in stream:
        if first_token_time is None and chunk.choices[0].delta.content:
            first_token_time = time.time()
            ttft_ms = int((first_token_time - start_time) * 1000)
            tracker.track_time_to_first_token(ttft_ms)

        if chunk.choices[0].delta.content:
            response_text += chunk.choices[0].delta.content

    # Track final metrics
    duration_ms = int((time.time() - start_time) * 1000)
    tracker.track_duration(duration_ms)
    tracker.track_success()

    # Accurate token counts
    input_tokens = len(enc.encode(user_prompt))
    output_tokens = len(enc.encode(response_text))
    tokens = TokenUsage(
        total=input_tokens + output_tokens,
        input=input_tokens,
        output=output_tokens
    )
    tracker.track_tokens(tokens)

    return response_text

print("[OK] call_streaming_accurate_tokens() defined (streaming-tracking.md:63-115)")

[OK] call_streaming_accurate_tokens() defined (streaming-tracking.md:63-115)


In [17]:
# From: references/streaming-tracking.md lines 119-154
def call_streaming_safe(config, user_prompt: str):
    """Streaming call with error tracking."""
    if not config.enabled:
        return None

    tracker = config.tracker
    start_time = time.time()
    first_token_time = None

    try:
        stream = openai.chat.completions.create(
            model=config.model.name,
            messages=[{"role": "user", "content": user_prompt}],
            stream=True
        )

        response_text = ""
        for chunk in stream:
            if first_token_time is None and chunk.choices[0].delta.content:
                first_token_time = time.time()
                ttft_ms = int((first_token_time - start_time) * 1000)
                tracker.track_time_to_first_token(ttft_ms)

            if chunk.choices[0].delta.content:
                response_text += chunk.choices[0].delta.content

        duration_ms = int((time.time() - start_time) * 1000)
        tracker.track_duration(duration_ms)
        tracker.track_success()

        return response_text

    except Exception as e:
        tracker.track_error()
        raise

print("[OK] call_streaming_safe() defined (streaming-tracking.md:119-154)")

[OK] call_streaming_safe() defined (streaming-tracking.md:119-154)


In [18]:
# Test streaming tracking
print("=== Testing Streaming Tracking ===")

openai_key = os.environ.get("OPENAI_API_KEY")
if openai_key and config.enabled:
    openai.api_key = openai_key
    
    # Test basic streaming (streaming-tracking.md:14-59)
    result = call_streaming_with_tracking(config, "Say hello in one word")
    if result:
        print(f"[OK] call_streaming_with_tracking: {result[:50]}")
    
    # Test accurate token streaming (streaming-tracking.md:63-115)
    result = call_streaming_accurate_tokens(config, "Say goodbye in one word")
    if result:
        print(f"[OK] call_streaming_accurate_tokens: {result[:50]}")
    
    # Test safe streaming (streaming-tracking.md:119-154)
    result = call_streaming_safe(config, "Say thanks in one word")
    if result:
        print(f"[OK] call_streaming_safe: {result[:50]}")
    
    ld_client.flush()
    print("[OK] Flushed streaming metrics")
else:
    print("[INFO] OPENAI_API_KEY not set or config disabled - skipping live test")
    print("[OK] Functions defined and ready for use")

=== Testing Streaming Tracking ===
[OK] call_streaming_with_tracking: Hello
[OK] call_streaming_accurate_tokens: Goodbye
[OK] call_streaming_safe: Thanks!
[OK] Flushed streaming metrics


---
## Metrics API
From: `references/metrics-api.md`

In [19]:
# From: references/metrics-api.md lines 24-69
import requests
import time
import os

def get_ai_config_metrics(project_key: str, config_key: str, env: str = "production", hours: int = 24):
    """Get AI Config metrics for the last N hours."""
    API_TOKEN = os.environ.get("LAUNCHDARKLY_API_TOKEN")

    now = int(time.time())
    start = now - (hours * 3600)

    url = f"https://app.launchdarkly.com/api/v2/projects/{project_key}/ai-configs/{config_key}/metrics"

    params = {
        "from": start,
        "to": now,
        "env": env
    }

    headers = {
        "Authorization": API_TOKEN,
        "LD-API-Version": "beta"
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        metrics = response.json()
        print(f"[OK] Metrics for {config_key} (last {hours} hours, {env}):")
        print(f"     Generations: {metrics.get('generationCount', 0):,}")
        print(f"     Success: {metrics.get('generationSuccessCount', 0):,}")
        print(f"     Errors: {metrics.get('generationErrorCount', 0):,}")
        print(f"     Input Tokens: {metrics.get('inputTokens', 0):,}")
        print(f"     Output Tokens: {metrics.get('outputTokens', 0):,}")
        print(f"     Total Tokens: {metrics.get('totalTokens', 0):,}")
        print(f"     Input Cost: ${metrics.get('inputCost', 0):.4f}")
        print(f"     Output Cost: ${metrics.get('outputCost', 0):.4f}")
        print(f"     Duration (ms): {metrics.get('durationMs', 0):,}")
        print(f"     TTFT (ms): {metrics.get('timeToFirstTokenMs', 0):,}")
        print(f"     Thumbs Up: {metrics.get('thumbsUp', 0)}")
        print(f"     Thumbs Down: {metrics.get('thumbsDown', 0)}")
        return metrics
    else:
        print(f"[ERROR] Failed to get metrics: {response.status_code}")
        return None

print("[OK] get_ai_config_metrics() defined (metrics-api.md:24-69)")

[OK] get_ai_config_metrics() defined (metrics-api.md:24-69)


In [20]:
# Test metrics API
print("=== Testing Metrics API ===")

api_token = os.environ.get("LAUNCHDARKLY_API_TOKEN")
if api_token:
    # Test get_ai_config_metrics (metrics-api.md:24-69)
    metrics = get_ai_config_metrics("support-ai", "content-assistant", hours=24)
    if metrics:
        print("[OK] Metrics retrieved successfully")
    else:
        print("[INFO] No metrics available (endpoint may not be active yet)")
else:
    print("[INFO] LAUNCHDARKLY_API_TOKEN not set - skipping API test")
    print("[OK] Function defined and ready for use")

=== Testing Metrics API ===
[OK] Metrics for content-assistant (last 24 hours, production):
     Generations: 0
     Success: 0
     Errors: 0
     Input Tokens: 0
     Output Tokens: 0
     Total Tokens: 0
     Input Cost: $0.0000
     Output Cost: $0.0000
     Duration (ms): 0
     TTFT (ms): 0
     Thumbs Up: 0
     Thumbs Down: 0
[OK] Metrics retrieved successfully


In [21]:
# Cleanup
ld_client.close()
print("[OK] SDK client closed")

[OK] SDK client closed
