# Offline Evals via Logs

## Initialize Maxim SDK

In [None]:
import os
from typing import Dict, List
from maxim import Maxim
from maxim.models import (
    LocalData,
    YieldedOutput,
    QueryBuilder,
)
from maxim.models.dataset import DataStructure

# Initialize Maxim with prompt management enabled
maxim = Maxim({
    "api_key": os.getenv("MAXIM_API_KEY"),
    "prompt_management": True,  # Required for fetching prompts
})

WORKSPACE_ID = os.getenv("MAXIM_WORKSPACE_ID")
DATASET_ID = os.getenv("MAXIM_DATASET_ID")  # For hosted dataset example
PROMPT_ID = os.getenv("MAXIM_PROMPT_ID")    # For prompt management example

## Local Dataset and Local Agent

Define your test data and agent logic entirely in code.

In [None]:
# Define test data locally
local_dataset: List[LocalData] = [
    {
        "input": "What is the capital of France?",
        "expected_output": "Paris",
    },
    {
        "input": "Explain photosynthesis in one sentence.",
        "expected_output": "A process where plants convert sunlight to energy",
    },
]

data_structure: DataStructure = {
    "input": "INPUT",
    "expected_output": "EXPECTED_OUTPUT",
}

In [None]:
from openai import OpenAI

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def run_local_agent(data: LocalData) -> YieldedOutput:
    """
    Your local agent implementation.
    Called for each entry in the test dataset.
    """
    user_input = data.get("Input") or data.get("input") or ""
    
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant. Be concise."},
            {"role": "user", "content": user_input}
        ],
        max_tokens=100,
    )
    
    return YieldedOutput(data=response.choices[0].message.content)

In [None]:
# Run test with local dataset + local agent
result = (
    maxim.create_test_run(
        name="Local Dataset + Local Agent",
        in_workspace_id=WORKSPACE_ID
    )
    .with_data(local_dataset)
    .with_data_structure(data_structure)
    .yields_output(run_local_agent)
    .with_evaluators("Clarity", "Output Relevance")
    .run()
)

print(f"View results: {result.test_run_result.link}")

## Hosted Dataset on Maxim and Local Agent

Use a dataset stored on Maxim's platform. Pass the dataset ID instead of local data.

In [None]:
# Run test with hosted dataset
result = (
    maxim.create_test_run(
        name="Hosted Dataset + Local Agent",
        in_workspace_id=WORKSPACE_ID
    )
    .with_data(DATASET_ID)  # Pass dataset ID from Maxim
    .yields_output(run_local_agent)
    .with_evaluators("Clarity", "Output Relevance")
    .run()
)

print(f"View results: {result.test_run_result.link}")

## Fetch Prompt from Maxim

Use `maxim.get_prompt()` to fetch a deployed prompt and run it.

In [None]:
# Fetch prompt from Maxim using deployment variables
prompt = maxim.get_prompt(
    id=PROMPT_ID,
    rule=QueryBuilder()
        .and_()
        .deployment_var("Environment", "prod")
        .build(),
)

if prompt:
    print(f"Fetched prompt: {prompt.name}")
    print(f"Model: {prompt.model}")
    print(f"Provider: {prompt.provider}")
else:
    print("No matching prompt found")

In [None]:
def run_maxim_prompt(data: LocalData) -> YieldedOutput:
    """
    Run the prompt fetched from Maxim.
    """
    user_input = data.get("Input") or data.get("input") or ""
    
    # prompt.run() uses the model/provider configured in Maxim
    response = prompt.run(user_input)
    
    return YieldedOutput(data=response.choices[0].message.content)

In [None]:
# Run test using the fetched prompt
if prompt:
    result = (
        maxim.create_test_run(
            name="Maxim Prompt Test",
            in_workspace_id=WORKSPACE_ID
        )
        .with_data(local_dataset)
        .with_data_structure(data_structure)
        .yields_output(run_maxim_prompt)
        .with_evaluators("Clarity", "Output Relevance")
        .run()
    )

    print(f"View results: {result.test_run_result.link}")

If you already know the prompt version ID, use `with_prompt_version_id()` instead of `yields_output()`.

In [None]:
PROMPT_VERSION_ID = os.getenv("MAXIM_PROMPT_VERSION_ID")

# Run test with a specific prompt version
result = (
    maxim.create_test_run(
        name="Prompt Version Test",
        in_workspace_id=WORKSPACE_ID
    )
    .with_data(local_dataset)
    .with_data_structure(data_structure)
    .with_prompt_version_id(PROMPT_VERSION_ID)  # Use prompt stored on Maxim
    .with_evaluators("Clarity", "Output Relevance")
    .run()
)

print(f"View results: {result.test_run_result.link}")

## Hosted Dataset and Prompt

The simplest configuration - everything hosted on Maxim.

In [None]:
# Everything hosted on Maxim
result = (
    maxim.create_test_run(
        name="Fully Hosted Test",
        in_workspace_id=WORKSPACE_ID
    )
    .with_data(DATASET_ID)                      # Hosted dataset
    .with_prompt_version_id(PROMPT_VERSION_ID)  # Hosted prompt
    .with_evaluators("Clarity", "Bias")         # Built-in evaluators
    .run()
)

print(f"View results: {result.test_run_result.link}")

## Related Resources

- [Local Agent Testing](https://www.getmaxim.ai/docs/offline-evals/via-sdk/local-agent)
- [Prompt Management](https://www.getmaxim.ai/docs/offline-evals/via-sdk/prompts/prompt-management)
- [Datasets](https://www.getmaxim.ai/docs/library/datasets/import-or-create-datasets)
- [Pre-built Evaluators](https://www.getmaxim.ai/docs/library/evaluators/pre-built-evaluators/overview)