# Agent Memory DB â€” Evaluation Analysis

Load `eval_results.json` and explore success rates, latency, and task-type breakdown.

**Prerequisites:** Run `make eval` from repo root to generate `eval_results.json`.

In [None]:
import json
from pathlib import Path

root = Path.cwd().parent.parent  # agent_mem_db/
data_path = root / "eval_results.json"
data = json.loads(data_path.read_text())

variants = [k for k in data if isinstance(data.get(k), dict) and k != "failure_cases_naive_wins"]
print("Variants:", variants)

## Success Rate Comparison

In [None]:
for v in variants:
    sr = data[v].get("success_rate", 0)
    print(f"{v:14} {sr:5.1f}%")

## Latency (p50, p95, p99)

In [None]:
for v in variants:
    r = data[v]
    print(f"{v:14} p50={r.get('latency_p50_ms', 0):.2f}ms  p95={r.get('latency_p95_ms', 0):.2f}ms  p99={r.get('latency_p99_ms', 0):.2f}ms")

## By Task Type (Short vs Long)

In [None]:
for v in variants:
    r = data[v]
    short = r.get("short_success", 0)
    long = r.get("long_success", 0)
    print(f"{v:14} Short: {short:5.1f}%  Long: {long:5.1f}%")

## Optional: Bar Chart (requires matplotlib)

In [None]:
try:
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.bar(variants, [data[v].get("success_rate", 0) for v in variants], color=["#e74c3c", "#3498db", "#2ecc71"])
    ax.set_ylabel("Success %")
    ax.set_title("Success Rate by Variant")
    ax.set_ylim(0, 105)
    plt.tight_layout()
    plt.show()
except ImportError:
    print("Install matplotlib for charts: pip install matplotlib")