# AI Config Online Evals - Cookbook

Prerequisites: `LAUNCHDARKLY_SDK_KEY`, `OPENAI_API_KEY`, judges enabled via LaunchDarkly UI

In [16]:
%pip install launchdarkly-server-sdk launchdarkly-server-sdk-ai launchdarkly-server-sdk-ai-openai openai python-dotenv -q

import os
from pathlib import Path
from dotenv import load_dotenv

def find_repo_root(start_path: Path = None) -> Path:
    current = start_path or Path.cwd()
    for parent in [current] + list(current.parents):
        if (parent / '.git').exists():
            return parent
    return current

repo_root = find_repo_root()
load_dotenv(repo_root / '.env')
print(f"[OK] Loaded environment from {repo_root / '.env'}")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m26.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/opt/python@3.11/bin/python3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[OK] Loaded environment from /Users/ld_scarlett/Documents/Github/agent-skills/.env


In [17]:
# SDK initialization (see aiconfig-sdk for details)
from ldclient import Context
from ldclient.config import Config
from ldai.client import LDAIClient, AICompletionConfigDefault
import ldclient

SDK_KEY = os.environ.get("LAUNCHDARKLY_SDK_KEY")
ldclient.set_config(Config(SDK_KEY))
ld_client = ldclient.get()
ai_client = LDAIClient(ld_client)
print(f"[OK] SDK initialized: {ld_client.is_initialized()}")

[OK] SDK initialized: True


---
## SDK: Check Judge Configuration
From: `SKILL.md` lines 29-60

In [18]:
def check_judges(ai_client, config_key: str, user_id: str):
    """Check which judges are attached to a config."""
    context = Context.builder(user_id).build()
    config = ai_client.completion_config(
        config_key,
        context,
        AICompletionConfigDefault(enabled=False),
        {}
    )

    if config.judge_configuration and config.judge_configuration.judges:
        print("[OK] Judges attached:")
        for judge in config.judge_configuration.judges:
            print(f"     - {judge.key}: {int(judge.sampling_rate * 100)}% sampling")
    else:
        print("[INFO] No judges configured")

    return config.judge_configuration

print("[OK] check_judges() defined")

[OK] check_judges() defined


In [19]:
# Test check_judges
print("=== Testing check_judges ===")
judge_config = check_judges(ai_client, "content-assistant", "cookbook-user")

=== Testing check_judges ===
[OK] Judges attached:
     - ld-ai-judge-accuracy-1770164301356: 100% sampling
     - ld-ai-judge-relevance-1770164301550: 100% sampling
     - ld-ai-judge-toxicity-1770164301695: 100% sampling


---
## SDK: Automatic Evaluation with create_chat
From: `SKILL.md` lines 62-97

**Important:** `create_chat()` passes model parameters directly to the provider. LaunchDarkly uses `maxTokens` (camelCase) but OpenAI expects `max_tokens` (snake_case). For this test, we use a variation without `maxTokens` parameters (targeting set to `cookbook-test` variation for `cookbook-user`).

In [20]:
# Note: asyncio import needed for notebook execution
import asyncio
from ldai.client import AICompletionConfigDefault, ModelConfig, ProviderConfig, LDMessage

async def generate_with_automatic_evaluation(ai_client, config_key: str, user_id: str, prompt: str):
    """Generate AI response with automatic judge evaluation using create_chat."""
    context = Context.builder(user_id).build()

    chat = await ai_client.create_chat(
        config_key,
        context,
        AICompletionConfigDefault(
            enabled=True,
            model=ModelConfig("gpt-4"),
            provider=ProviderConfig("openai"),
            messages=[LDMessage(role='system', content='You are a helpful assistant.')]
        )
    )

    if not chat:
        return None

    # Invoke chat - judges evaluate automatically (1-2 min delay)
    response = await chat.invoke(prompt)

    # Results appear in Monitoring tab as:
    # $ld:ai:judge:accuracy, $ld:ai:judge:relevance, $ld:ai:judge:toxicity
    return response.message.content

print("[OK] generate_with_automatic_evaluation() defined")

[OK] generate_with_automatic_evaluation() defined


In [21]:
# Test generate_with_automatic_evaluation
# NOTE: cookbook-user is targeted to cookbook-test variation (no maxTokens parameter)
print("=== Testing generate_with_automatic_evaluation ===")
openai_key = os.environ.get("OPENAI_API_KEY")
if openai_key:
    result = await generate_with_automatic_evaluation(ai_client, "content-assistant", "cookbook-user", "Say hello")
    if result:
        print(f"[OK] Response: {result[:100]}...")
        ld_client.flush()
        print("[OK] Events flushed - check Monitoring tab in 1-2 minutes")
    else:
        print("[INFO] Config not enabled or create_chat returned None")
else:
    print("[INFO] OPENAI_API_KEY not set - skipping live test")

=== Testing generate_with_automatic_evaluation ===
[OK] Response: Hello! How can I assist you today?
...
[OK] Events flushed - check Monitoring tab in 1-2 minutes


---
## Viewing Results

Judge evaluation results appear in the **Monitoring tab** of your AI Config:

1. Go to **AI Configs** in LaunchDarkly
2. Select your config (`content-assistant`)
3. Click **Monitoring** tab
4. View judge scores: `$ld:ai:judge:accuracy`, `$ld:ai:judge:relevance`, `$ld:ai:judge:toxicity`

Results take 1-2 minutes to appear after generation.