In [1]:
import json, re
from pathlib import Path
import numpy as np
import pandas as pd

repo_root = Path("/Users/wanchunni/Documents/code/rlaif-llm-translator")  # set to your repo root
scene_slug = "37_millennium"

metrics_dir = repo_root / f"sim/results/{scene_slug}/metrics"
context_path = repo_root / f"preprocess/preprocessed_scene/{scene_slug}/preprocessed.jsonl"

if not context_path.exists():
    raise FileNotFoundError(f"Context file not found: {context_path}")
if not metrics_dir.exists():
    raise FileNotFoundError(f"Metrics dir not found: {metrics_dir}")

# Load context rows keyed by scene_index
contexts = {}
with context_path.open() as f:
    for line in f:
        ctx = json.loads(line)
        contexts[int(ctx["scene_index"])] = ctx

# Obstacle counter (first array in npz; tweak key if needed)
_obstacle_cache = {}
def count_obstacles(npz_path: Path):
    npz_path = npz_path.resolve()
    if npz_path in _obstacle_cache:
        return _obstacle_cache[npz_path]
    with np.load(npz_path, allow_pickle=True) as data:
        first_key = next(iter(data.files))
        _obstacle_cache[npz_path] = len(data[first_key])
    return _obstacle_cache[npz_path]

rows = []
for mfile in sorted(metrics_dir.glob("metrics_*.json")):
    m = re.search(r"metrics_(\d+)_", mfile.name)
    if not m:
        continue
    context_id = int(m.group(1))
    ctx = contexts.get(context_id, {})
    metrics_blob = json.loads(mfile.read_text())

    assets = ctx.get("assets", {}) or {}
    obstacle_path = assets.get("anchored_obstacles")
    map_obstacle_count = count_obstacles(repo_root / obstacle_path) if obstacle_path else None

    rows.append({
        "scene_name": ctx.get("scene_id"),
        "context_id": context_id,
        "category": ctx.get("category"),
        "crowd_size": ctx.get("crowd_size"),
        "scenario": ctx.get("scenario"),
        "goal_cnt": len(ctx.get("goals_px", [])),
        "groups": len(ctx.get("groups", [])),
        "toward_event_center": ctx.get("towards_event"),
        "map_obstacle_count": map_obstacle_count,
        **metrics_blob.get("metrics", {}),
    })

df = pd.DataFrame(rows)
metric_cols = [c for c in df.columns if c not in ["scene_name","context_id","category","crowd_size","scenario","goal_cnt","groups","toward_event_center","map_obstacle_count"]]
df[metric_cols] = df[metric_cols].apply(pd.to_numeric, errors="coerce")
df


Unnamed: 0,scene_name,context_id,category,crowd_size,scenario,goal_cnt,groups,toward_event_center,map_obstacle_count,ADE,...,CollisionRate,VD,DDS,GoalRate,goal_achieve_as_long_as_pass_by,SocialDistanceViolations,towards_event_achieve_rate,away_event_achieve_rate,group_stick_together,group_goal_achievement
0,37_Millennium_Park_simplified_obstacle_anchored,0,Ambulatory,192,Late-morning strolls at Millennium Park: visit...,6,29,random,88,,...,0.005828,,,0.619792,0.692708,0.021537,,,0.0,0.0
1,37_Millennium_Park_simplified_obstacle_anchored,1,Ambulatory,62,Warm afternoon foot traffic: small groups wand...,5,7,random,88,,...,0.017381,,,0.741935,0.758065,0.048964,,,0.0,0.0
2,37_Millennium_Park_simplified_obstacle_anchored,2,Ambulatory,296,Evening promenade: visitors take photos and me...,6,45,random,88,,...,0.006559,,,0.655405,0.722973,0.019196,,,0.061115,0.088889
3,37_Millennium_Park_simplified_obstacle_anchored,3,Ambulatory,80,Weekend leisure walkers spread through the ope...,5,12,random,88,,...,0.00927,,,0.65,0.7,0.05121,,,0.08028,0.083333
4,37_Millennium_Park_simplified_obstacle_anchored,4,Ambulatory,160,Morning walkers and joggers circulate gently t...,6,28,random,88,,...,0.010409,,,0.59375,0.7,0.020483,,,0.092819,0.107143
5,37_Millennium_Park_simplified_obstacle_anchored,5,Ambulatory,82,Light sightseeing flow: families and friends w...,5,14,random,88,,...,0.013226,,,0.707317,0.743902,0.057417,,,0.174741,0.214286
6,37_Millennium_Park_simplified_obstacle_anchored,6,Ambulatory,66,Photography enthusiasts wander slowly through ...,5,12,random,88,,...,0.013922,,,0.590909,0.636364,0.045884,,,0.0,0.083333
7,37_Millennium_Park_simplified_obstacle_anchored,7,Ambulatory,44,"Casual visitors spread thinly across paths, lo...",5,7,random,88,,...,0.024336,,,0.977273,0.977273,0.165072,,,0.0,1.0
8,37_Millennium_Park_simplified_obstacle_anchored,8,Ambulatory,90,Lunchtime drift: office workers cross the park...,6,12,random,88,,...,0.008479,,,0.588889,0.677778,0.030117,,,0.0,0.0
9,37_Millennium_Park_simplified_obstacle_anchored,9,Ambulatory,315,Early evening ambient flow: couples and small ...,6,53,random,88,,...,0.007758,,,0.714286,0.812698,0.019123,,,0.101605,0.150943


In [2]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

x_cols = ["category", "crowd_size", "goal_cnt", "toward_event_center", "map_obstacle_count"]
y_metrics = [
    "goal_achieve_as_long_as_pass_by",
    "away_event_achieve_rate",
    "towards_event_achieve_rate",
    "CollisionRate",
    "group_goal_achievement",
    "SocialDistanceViolations",
]

palette = px.colors.qualitative.Set2
fig = make_subplots(rows=len(x_cols), cols=1, shared_xaxes=False,
                    subplot_titles=[f"{xc} vs metrics" for xc in x_cols])

for i, xcol in enumerate(x_cols, start=1):
    grouped = df.groupby(xcol)[y_metrics].mean().reset_index()
    if pd.api.types.is_numeric_dtype(grouped[xcol]):
        grouped = grouped.sort_values(by=xcol)
    else:
        grouped = grouped.sort_values(by=xcol, key=lambda s: s.astype(str))

    for j, m in enumerate(y_metrics):
        fig.add_trace(
            go.Scatter(
                x=grouped[xcol],
                y=grouped[m],
                mode="lines+markers",
                name=m if i == 1 else None,  # show legend only once
                line=dict(color=palette[j % len(palette)], width=2),
                marker=dict(size=7),
                hovertemplate=f"{xcol}=%{{x}}<br>{m}=%{{y:.3f}}<extra></extra>",
            ),
            row=i, col=1,
        )
    fig.update_xaxes(title_text=xcol, row=i, col=1)
    fig.update_yaxes(title_text="mean metric", row=i, col=1)

fig.update_layout(
    height=300 * len(x_cols),
    title="Metrics vs X variables",
    legend_title="Metric",
    hovermode="x unified",
    template="plotly_white",
)
fig.show()
