# LLAMATOR MCP Server Tutorial

Examples for calling:
- the HTTP API
- the MCP Streamable HTTP endpoint (JSON-RPC)

All examples use `curl` and assume the service is already running.

## 0) Setup

This cell loads configuration from environment variables.

Supported variables:
- `LLAMATOR_MCP_BASE_URL` (optional)
- `LLAMATOR_MCP_HTTP_PUBLIC_PORT` / `LLAMATOR_MCP_HTTP_PORT` (optional)
- `LLAMATOR_MCP_API_KEY` (optional)
- `LLAMATOR_MCP_MCP_MOUNT_PATH` (optional)
- `LLAMATOR_MCP_TEST_MCP_PROTOCOL_VERSION` (optional)

If a local `.env` file exists, it will be loaded automatically.

In [None]:
from __future__ import annotations

import json
import os
import subprocess
import time
import urllib.parse
from dataclasses import dataclass
from typing import Any

from dotenv import load_dotenv

load_dotenv()


def env_str(name: str, fallback: str) -> str:
    """
    Read an env var as a stripped string.

    :param name: Environment variable name.
    :param fallback: Fallback value when the variable is missing.
    :return: Stripped string.
    """
    return os.getenv(name, fallback).strip()


def build_auth_header_args(api_key: str) -> list[str]:
    """
    Build curl header args for X-API-Key.

    :param api_key: API key value.
    :return: curl args list.
    """
    if not api_key:
        return []
    return ["-H", f"X-API-Key: {api_key}"]


def run_cmd(cmd: list[str], input_text: str | None) -> str:
    """
    Run a subprocess command and return stdout.

    :param cmd: Command list.
    :param input_text: Optional stdin payload.
    :return: Stdout text.
    :raises RuntimeError: If the command fails.
    """
    try:
        proc = subprocess.run(
            cmd,
            input=input_text,
            text=True,
            capture_output=True,
            check=True,
        )
    except subprocess.CalledProcessError as exc:
        stderr = (exc.stderr or "").strip()
        stdout = (exc.stdout or "").strip()
        details = "\n".join([x for x in [stdout, stderr] if x])
        raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{details}") from exc
    return proc.stdout


def parse_json(text: str) -> Any:
    """
    Parse JSON from text.

    :param text: JSON text.
    :return: Parsed JSON value.
    :raises ValueError: If parsing fails.
    """
    return json.loads(text)


def require_non_empty(value: str, label: str) -> str:
    """
    Ensure a string is non-empty.

    :param value: Input string.
    :param label: Label for error message.
    :return: The original string.
    :raises ValueError: If empty.
    """
    v = value.strip()
    if not v:
        raise ValueError(f"{label} must be non-empty")
    return v


@dataclass(frozen=True, slots=True)
class TutorialConfig:
    """
    Resolved tutorial configuration.

    :param base_url: HTTP base URL.
    :param api_key: Optional API key.
    :param auth_header_args: curl args for auth header.
    :param mcp_mount_path: MCP mount path.
    :param mcp_endpoint: Full MCP endpoint URL.
    :param mcp_protocol_version: MCP protocol version.
    """

    base_url: str
    api_key: str
    auth_header_args: list[str]
    mcp_mount_path: str
    mcp_endpoint: str
    mcp_protocol_version: str


HTTP_PUBLIC_PORT: str = env_str("LLAMATOR_MCP_HTTP_PUBLIC_PORT", "")
HTTP_PORT: str = env_str("LLAMATOR_MCP_HTTP_PORT", "")
PORT: str = HTTP_PUBLIC_PORT or HTTP_PORT or "8000"

BASE_URL: str = env_str("LLAMATOR_MCP_BASE_URL", f"http://localhost:{PORT}").rstrip("/")
BASE_URL = require_non_empty(BASE_URL, "BASE_URL")

API_KEY: str = env_str("LLAMATOR_MCP_API_KEY", "")
AUTH_HEADER_ARGS: list[str] = build_auth_header_args(API_KEY)

MCP_MOUNT_PATH: str = env_str("LLAMATOR_MCP_MCP_MOUNT_PATH", "/mcp") or "/mcp"
MCP_ENDPOINT: str = f"{BASE_URL}{MCP_MOUNT_PATH.rstrip('/')}/"

MCP_PROTOCOL_VERSION: str = env_str("LLAMATOR_MCP_TEST_MCP_PROTOCOL_VERSION", "2025-03-26")

CFG = TutorialConfig(
    base_url=BASE_URL,
    api_key=API_KEY,
    auth_header_args=AUTH_HEADER_ARGS,
    mcp_mount_path=MCP_MOUNT_PATH,
    mcp_endpoint=MCP_ENDPOINT,
    mcp_protocol_version=MCP_PROTOCOL_VERSION,
)

print(f"Base URL: {CFG.base_url}")
print(f"MCP endpoint: {CFG.mcp_endpoint}")
print(f"API key enabled: {bool(CFG.api_key)}")
print(f"MCP protocol version: {CFG.mcp_protocol_version}")


## 1) HTTP API

### 1.1 Healthcheck

Use this endpoint to confirm the server is reachable.

In [None]:
cmd: list[str] = [
    "curl",
    "-sS",
    "-i",
    "-X",
    "GET",
    f"{CFG.base_url}/v1/health",
    "-H",
    "Accept: application/json",
    *CFG.auth_header_args,
]

print(f"Running: {' '.join(cmd)}")
print(run_cmd(cmd, input_text=None))


### 1.2 Create a run (preset)

A run request contains:
- `tested_model`: the model to test
- `plan`: which tests to run

This example uses `plan.preset_name`.

In [None]:
payload_preset: dict[str, object] = {
    "tested_model": {
        "kind": "openai",
        "base_url": env_str("LLAMATOR_MCP_TEST_TESTED_BASE_URL", "http://host.docker.internal:1234/v1"),
        "model": env_str("LLAMATOR_MCP_TEST_TESTED_MODEL", "llm"),
        "api_key": env_str("LLAMATOR_MCP_TEST_TESTED_API_KEY", "lm-studio") or None,
        "temperature": 0.2,
        "system_prompts": ["You are a helpful assistant."],
        "model_description": "Example tested model",
    },
    "run_config": {
        "enable_reports": False,
    },
    "plan": {
        "preset_name": "owasp:llm10",
        "num_threads": 1,
    },
}

tested_model_obj: dict[str, object] = payload_preset["tested_model"]  # type: ignore[assignment]
if tested_model_obj.get("api_key") is None:
    tested_model_obj.pop("api_key", None)

print("Request payload:")
print(json.dumps(payload_preset, ensure_ascii=False, indent=2, sort_keys=True))


In [None]:
payload_text: str = json.dumps(payload_preset, ensure_ascii=False)

cmd: list[str] = [
    "curl",
    "-sS",
    "-X",
    "POST",
    f"{CFG.base_url}/v1/tests/runs",
    "-H",
    "Accept: application/json",
    "-H",
    "Content-Type: application/json",
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
out: str = run_cmd(cmd, input_text=payload_text)

created_preset: object = parse_json(out)
if not isinstance(created_preset, dict):
    raise ValueError("Expected a JSON object from /v1/tests/runs.")

print("Response:")
print(json.dumps(created_preset, ensure_ascii=False, indent=2, sort_keys=True))

JOB_ID_PRESET: str = require_non_empty(str(created_preset.get("job_id", "")), "job_id")
print(f"Created job_id: {JOB_ID_PRESET}")


### 1.3 Poll job status (preset)

Polling loop:
- request `/v1/tests/runs/{job_id}`
- wait until the job becomes `succeeded` or `failed`

The final response contains `result` and/or `error` fields.

In [None]:
job_id: str = JOB_ID_PRESET
timeout_s: float = 3600.0
poll_interval_s: float = 0.5

deadline: float = time.time() + timeout_s
last_status: str | None = None
final_job: dict[str, object] | None = None

while time.time() < deadline:
    cmd = [
        "curl",
        "-sS",
        "-X",
        "GET",
        f"{CFG.base_url}/v1/tests/runs/{job_id}",
        "-H",
        "Accept: application/json",
        *CFG.auth_header_args,
    ]

    out = run_cmd(cmd, input_text=None)
    payload = parse_json(out)
    if not isinstance(payload, dict):
        raise ValueError("Expected a JSON object from /v1/tests/runs/{job_id}.")

    status: str = str(payload.get("status", "")).strip().lower()
    updated_at: str = str(payload.get("updated_at", "")).strip()

    if status != last_status:
        print(f"Status changed: {status} (updated_at={updated_at})")
        last_status = status

    if status in ("succeeded", "failed"):
        final_job = payload
        break

    time.sleep(poll_interval_s)

if final_job is None:
    raise TimeoutError(f"Job did not finish within {timeout_s} seconds: job_id={job_id}")

print("Final job response:")
print(json.dumps(final_job, ensure_ascii=False, indent=2, sort_keys=True))


### 1.4 List artifacts (preset)

Artifacts are files produced by a run. The API returns metadata for each file.

In [None]:
job_id: str = JOB_ID_PRESET

cmd: list[str] = [
    "curl",
    "-sS",
    "-X",
    "GET",
    f"{CFG.base_url}/v1/tests/runs/{job_id}/artifacts",
    "-H",
    "Accept: application/json",
    *CFG.auth_header_args,
]

print(f"Running: {' '.join(cmd)}")
out: str = run_cmd(cmd, input_text=None)

artifacts_list: object = parse_json(out)
if not isinstance(artifacts_list, dict):
    raise ValueError("Expected a JSON object from /artifacts.")

print("Artifacts list response:")
print(json.dumps(artifacts_list, ensure_ascii=False, indent=2, sort_keys=True))


### 1.5 Resolve a download link for the artifact (preset)

The server returns a JSON object with a temporary `download_url`.

In [None]:
job_id: str = JOB_ID_PRESET

files_val: object = artifacts_list.get("files") if isinstance(artifacts_list, dict) else None
files: list[dict[str, object]] = [x for x in files_val if isinstance(x, dict)] if isinstance(files_val, list) else []

if not files:
    print("No artifacts were produced for this job.")
else:
    first_path: str = str(files[0].get("path", "")).strip()
    if not first_path:
        print("The first artifact entry does not contain a valid path.")
        print(json.dumps(files[0], ensure_ascii=False, indent=2, sort_keys=True))
    else:
        quoted: str = urllib.parse.quote(first_path, safe="/")
        url: str = f"{CFG.base_url}/v1/tests/runs/{job_id}/artifacts/{quoted}"

        cmd = [
            "curl",
            "-sS",
            "-X",
            "GET",
            url,
            "-H",
            "Accept: application/json",
            *CFG.auth_header_args,
        ]

        print(f"Running: {' '.join(cmd)}")
        out = run_cmd(cmd, input_text=None)

        payload = parse_json(out)
        if not isinstance(payload, dict):
            raise ValueError("Expected a JSON object from the artifact download resolver.")

        print("Download link response:")
        print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))


### 1.6 Create a run (explicit basic tests)

Instead of a preset, you can pass `plan.basic_tests`.

Each test has:
- `code_name`
- `params`: a list of `{name, value}` objects

In [None]:
basic_tests: list[tuple[str, dict[str, object]]] = [
    ("repetition_token", {"num_attempts": 1, "repeat_count": 3}),
    ("system_prompt_leakage", {"custom_dataset": None, "multistage_depth": 3, "num_attempts": 1}),
]

payload_basic_tests: dict[str, object] = {
    "tested_model": {
        "kind": "openai",
        "base_url": env_str("LLAMATOR_MCP_TEST_TESTED_BASE_URL", "http://host.docker.internal:1234/v1"),
        "model": env_str("LLAMATOR_MCP_TEST_TESTED_MODEL", "llm"),
        "api_key": env_str("LLAMATOR_MCP_TEST_TESTED_API_KEY", "lm-studio") or None,
        "temperature": 0.3,
        "system_prompts": ["You are a helpful assistant."],
        "model_description": "Example tested model",
    },
    "run_config": {
        "enable_reports": False,
    },
    "plan": {
        "num_threads": 1,
        "basic_tests": [
            {
                "code_name": code_name,
                "params": [{"name": k, "value": v} for k, v in params.items()],
            }
            for code_name, params in basic_tests
        ],
    },
}

tested_model_obj2: dict[str, object] = payload_basic_tests["tested_model"]  # type: ignore[assignment]
if tested_model_obj2.get("api_key") is None:
    tested_model_obj2.pop("api_key", None)

print("Request payload:")
print(json.dumps(payload_basic_tests, ensure_ascii=False, indent=2, sort_keys=True))


In [None]:
payload_text: str = json.dumps(payload_basic_tests, ensure_ascii=False)

cmd: list[str] = [
    "curl",
    "-sS",
    "-X",
    "POST",
    f"{CFG.base_url}/v1/tests/runs",
    "-H",
    "Accept: application/json",
    "-H",
    "Content-Type: application/json",
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
out: str = run_cmd(cmd, input_text=payload_text)

created_basic: object = parse_json(out)
if not isinstance(created_basic, dict):
    raise ValueError("Expected a JSON object from /v1/tests/runs.")

print("Response:")
print(json.dumps(created_basic, ensure_ascii=False, indent=2, sort_keys=True))

JOB_ID_BASIC: str = require_non_empty(str(created_basic.get("job_id", "")), "job_id")
print(f"Created job_id: {JOB_ID_BASIC}")


### 1.7 Poll job status (basic tests)

Same polling logic as the preset run.

In [None]:
job_id: str = JOB_ID_BASIC
timeout_s: float = 3600.0
poll_interval_s: float = 0.5

deadline: float = time.time() + timeout_s
last_status: str | None = None
final_job_basic: dict[str, object] | None = None

while time.time() < deadline:
    cmd = [
        "curl",
        "-sS",
        "-X",
        "GET",
        f"{CFG.base_url}/v1/tests/runs/{job_id}",
        "-H",
        "Accept: application/json",
        *CFG.auth_header_args,
    ]

    out = run_cmd(cmd, input_text=None)
    payload = parse_json(out)
    if not isinstance(payload, dict):
        raise ValueError("Expected a JSON object from /v1/tests/runs/{job_id}.")

    status: str = str(payload.get("status", "")).strip().lower()
    updated_at: str = str(payload.get("updated_at", "")).strip()

    if status != last_status:
        print(f"Status changed: {status} (updated_at={updated_at})")
        last_status = status

    if status in ("succeeded", "failed"):
        final_job_basic = payload
        break

    time.sleep(poll_interval_s)

if final_job_basic is None:
    raise TimeoutError(f"Job did not finish within {timeout_s} seconds: job_id={job_id}")

print("Final job response:")
print(json.dumps(final_job_basic, ensure_ascii=False, indent=2, sort_keys=True))


## 2) MCP (Streamable HTTP JSON-RPC)

The MCP endpoint uses JSON-RPC over HTTP POST.

Typical sequence:
1. `initialize`
2. `notifications/initialized`
3. `tools/list`
4. `tools/call`

### 2.1 Initialize

This call may return `Mcp-Session-Id` in response headers. If present, include it in subsequent calls.

In [None]:
init_msg: dict[str, object] = {
    "jsonrpc": "2.0",
    "id": 1,
    "method": "initialize",
    "params": {
        "protocolVersion": CFG.mcp_protocol_version,
        "capabilities": {},
        "clientInfo": {"name": "curl-notebook", "version": "1.0.0"},
    },
}

payload_text: str = json.dumps(init_msg, ensure_ascii=False)

cmd: list[str] = [
    "curl",
    "-sS",
    "-i",
    "-X",
    "POST",
    CFG.mcp_endpoint,
    "-H",
    "Accept: application/json, text/event-stream",
    "-H",
    "Content-Type: application/json",
    "-H",
    f"MCP-Protocol-Version: {CFG.mcp_protocol_version}",
    "-H",
    f"Origin: {CFG.base_url}",
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
raw: str = run_cmd(cmd, input_text=payload_text)

print("Raw response (headers + body):")
print(raw[:4000])

header_block: str = raw.split("\r\n\r\n", 1)[0]
session_id: str | None = None
for line in header_block.splitlines():
    if line.lower().startswith("mcp-session-id:"):
        session_id = line.split(":", 1)[1].strip()
        break

print(f"Session id: {session_id}")


### 2.2 Send `notifications/initialized`

Standard follow-up after `initialize`.

In [None]:
notif_msg: dict[str, object] = {
    "jsonrpc": "2.0",
    "method": "notifications/initialized",
}

payload_text: str = json.dumps(notif_msg, ensure_ascii=False)

session_header_args: list[str] = ["-H", f"Mcp-Session-Id: {session_id}"] if session_id else []

cmd: list[str] = [
    "curl",
    "-sS",
    "-i",
    "-X",
    "POST",
    CFG.mcp_endpoint,
    "-H",
    "Accept: application/json, text/event-stream",
    "-H",
    "Content-Type: application/json",
    "-H",
    f"MCP-Protocol-Version: {CFG.mcp_protocol_version}",
    "-H",
    f"Origin: {CFG.base_url}",
    *session_header_args,
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
print(run_cmd(cmd, input_text=payload_text)[:2000])


### 2.3 List available tools

The response contains MCP tools and their input schemas.

In [None]:
tools_list_msg: dict[str, object] = {
    "jsonrpc": "2.0",
    "id": 2,
    "method": "tools/list",
    "params": {},
}

payload_text: str = json.dumps(tools_list_msg, ensure_ascii=False)
session_header_args = ["-H", f"Mcp-Session-Id: {session_id}"] if session_id else []

cmd: list[str] = [
    "curl",
    "-sS",
    "-X",
    "POST",
    CFG.mcp_endpoint,
    "-H",
    "Accept: application/json, text/event-stream",
    "-H",
    "Content-Type: application/json",
    "-H",
    f"MCP-Protocol-Version: {CFG.mcp_protocol_version}",
    "-H",
    f"Origin: {CFG.base_url}",
    *session_header_args,
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
out: str = run_cmd(cmd, input_text=payload_text)

tools_payload: object = parse_json(out)
if not isinstance(tools_payload, dict):
    raise ValueError("Expected a JSON object from tools/list.")

print(json.dumps(tools_payload, ensure_ascii=False, indent=2, sort_keys=True))


### 2.4 Call `create_llamator_run`

This tool submits a run and waits for completion.

The output includes:
- `job_id`
- `aggregated`
- `artifacts_download_url` (optional)
- `error_notice` (optional)

In [None]:
mcp_create_msg: dict[str, object] = {
    "jsonrpc": "2.0",
    "id": 3,
    "method": "tools/call",
    "params": {
        "name": "create_llamator_run",
        "arguments": {
            "req": payload_preset,
        },
    },
}

payload_text: str = json.dumps(mcp_create_msg, ensure_ascii=False)
session_header_args = ["-H", f"Mcp-Session-Id: {session_id}"] if session_id else []

cmd: list[str] = [
    "curl",
    "-sS",
    "-X",
    "POST",
    CFG.mcp_endpoint,
    "-H",
    "Accept: application/json, text/event-stream",
    "-H",
    "Content-Type: application/json",
    "-H",
    f"MCP-Protocol-Version: {CFG.mcp_protocol_version}",
    "-H",
    f"Origin: {CFG.base_url}",
    *session_header_args,
    *CFG.auth_header_args,
    "--data-binary",
    "@-",
]

print(f"Running: {' '.join(cmd)}")
out: str = run_cmd(cmd, input_text=payload_text)

mcp_created_payload: object = parse_json(out)
if not isinstance(mcp_created_payload, dict):
    raise ValueError("Expected a JSON object from tools/call.")

print(json.dumps(mcp_created_payload, ensure_ascii=False, indent=2, sort_keys=True))