# Inspect trace trees

This notebook loads JSONL traces (one event per line) and helps you:

- list sessions (`trace_id`),
- inspect a single trace as a simple timeline,
- view a rough parent/child tree when `parent_span_id` is present.

It is designed to work with the synthetic traces under:

- `examples/synthetic_traces/simple_correction_loop.jsonl`
- `examples/synthetic_traces/noisy_mixed_sessions.jsonl`

You can also point it at your own JSONL logs with a similar structure.


In [None]:
import json
from collections import defaultdict
from pathlib import Path
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

DATA_DIR = Path("examples") / "synthetic_traces"
print(f"Using synthetic traces from: {DATA_DIR.resolve()}")


In [None]:
@dataclass
class Event:
    raw: Dict[str, Any]

    @property
    def trace_id(self) -> str:
        return str(self.raw.get("trace_id", ""))

    @property
    def span_id(self) -> str:
        return str(self.raw.get("span_id", ""))

    @property
    def parent_span_id(self) -> Optional[str]:
        parent = self.raw.get("parent_span_id") or self.raw.get("parent_id")
        if parent is None:
            return None
        return str(parent)

    @property
    def timestamp(self) -> Optional[str]:
        ts = self.raw.get("timestamp")
        return str(ts) if ts is not None else None

    @property
    def event_type(self) -> str:
        return str(self.raw.get("event_type", ""))

    @property
    def component(self) -> str:
        return str(self.raw.get("component", ""))

    @property
    def payload(self) -> Dict[str, Any]:
        obj = self.raw.get("payload")
        return obj if isinstance(obj, dict) else {}


In [None]:
def load_jsonl(path: Path) -> List[Event]:
    events: List[Event] = []
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            events.append(Event(obj))
    return events

def group_by_trace(events: List[Event]):
    grouped = defaultdict(list)
    for ev in events:
        grouped[ev.trace_id].append(ev)
    return grouped


In [None]:
# Load one of the synthetic files
file_path = DATA_DIR / "simple_correction_loop.jsonl"
events = load_jsonl(file_path)
traces = group_by_trace(events)
print(f"Loaded {len(events)} events across {len(traces)} sessions from {file_path}")
list(traces.keys())

In [None]:
def print_timeline(events: List[Event], max_rows: int = 40) -> None:
    print(f"Timeline for trace_id={events[0].trace_id} (showing up to {max_rows} events)\n")
    for ev in events[:max_rows]:
        ts = ev.timestamp or "?"
        et = ev.event_type
        comp = ev.component
        extra = ""
        if "latency_ms" in ev.payload:
            extra = f" latency_ms={ev.payload['latency_ms']}"
        print(f"{ts}  [{comp}]  {et}{extra}")


In [None]:
# Pick a trace_id to inspect
trace_id = next(iter(traces))
print_timeline(traces[trace_id])

In [None]:
def build_tree(events: List[Event]):
    children = defaultdict(list)
    roots = []
    for ev in events:
        parent = ev.parent_span_id
        if parent:
            children[parent].append(ev)
        else:
            roots.append(ev)
    return roots, children

def print_tree(ev: Event, children, depth: int = 0) -> None:
    indent = "  " * depth
    label = f"{ev.event_type} ({ev.span_id})"
    if "latency_ms" in ev.payload:
        label += f"  latency_ms={ev.payload['latency_ms']}"
    print(indent + "- " + label)
    for child in children.get(ev.span_id, []):
        print_tree(child, children, depth + 1)


In [None]:
# If parent_span_id relationships exist, this will show a simple tree.
roots, children = build_tree(traces[trace_id])
if not children:
    print("No parent_span_id relationships detected in this trace.")
else:
    for root in roots:
        print_tree(root, children)
