In [39]:
# AndroidWorld 轨迹可视化

# 这个 notebook 用于可视化 `runs/xxx` 下的轨迹：

# - 展示 **Goal(指令)** + **整条轨迹**
# - 每个 step：展示 `screenshot_step{n}.png`，并 `print` 对应的 `model_response`（即 `result.json` 里的 `trajectory[n].response`）
# - 若该 step 的 tool call 是 `click` 或 `long_press`，在截图上用 **红色圆圈** 标注点击点


from __future__ import annotations

from pathlib import Path
import json
import re

from PIL import Image, ImageDraw
from IPython.display import display, Markdown

RUNS_DIR = Path("/Users/chengkanzhi/Desktop/ScaleCUA/evaluation/AndroidWorld/runs/diy_0208")

_TOOL_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)


def _extract_click_actions(response_text: str):
    """从 response 里抽取 click/long_press。

    注意：AndroidWorld 的坐标是归一化到 [0, 1000] 的。
    """

    actions = []
    for m in _TOOL_RE.finditer(response_text or ""):
        try:
            call = json.loads(m.group(1))
        except Exception:
            continue

        args = (call or {}).get("arguments") or {}
        action = args.get("action")
        if action not in {"click", "long_press"}:
            continue

        coord = args.get("coordinate")
        if isinstance(coord, (list, tuple)) and len(coord) == 2:
            # 坐标归一化到 0-1000
            actions.append({"action": action, "x_norm": float(coord[0]), "y_norm": float(coord[1])})

    return actions


def _annotate_clicks(img: Image.Image, actions):
    if not actions:
        return img

    img = img.convert("RGBA")
    draw = ImageDraw.Draw(img)
    w, h = img.size
    r = max(10, int(min(w, h) * 0.03))
    stroke = max(3, r // 4)

    for a in actions:
        x = int(round(a["x_norm"] / 1000.0 * w))
        y = int(round(a["y_norm"] / 1000.0 * h))
        draw.ellipse((x - r, y - r, x + r, y + r), outline=(255, 0, 0, 255), width=stroke)

    return img


def _resize_for_display(img: Image.Image, scale: float = 0.2) -> Image.Image:
    if scale >= 1:
        return img
    w, h = img.size
    nw, nh = max(1, int(w * scale)), max(1, int(h * scale))
    resample = getattr(Image, "Resampling", Image).LANCZOS
    return img.resize((nw, nh), resample)


def show_trajectory(traj_dir: Path, max_steps: int | None = None, display_scale: float = 0.2):
    """展示某条轨迹：每步截图 + response，并对 click/long_press 标注红圈。

    display_scale: 显示缩放比例（默认 1/5）。
    """

    data = json.loads((traj_dir / "result.json").read_text(encoding="utf-8"))
    goal = data.get("goal", "")
    display(Markdown(f"## {traj_dir.name}\n\n**Goal**: {goal}"))

    traj = data.get("trajectory") or []
    if max_steps is not None:
        traj = traj[: max_steps]

    for item in traj:
        step = item.get("step")
        response = item.get("response", "")
        actions = _extract_click_actions(response)

        display(Markdown(f"### Step {step}"))

        img_path = None
        for p in sorted(traj_dir.glob(f"screenshot_step{step}.*")):
            img_path = p
            break

        if img_path and img_path.exists():
            img = Image.open(img_path)
            img = _annotate_clicks(img, actions)
            display(_resize_for_display(img, display_scale))
        else:
            print(f"[缺少截图] step={step}")

        print(response)
        print("-" * 80)


In [None]:
import random


def list_trajectories(runs_dir: Path = RUNS_DIR):
    return sorted([p for p in runs_dir.iterdir() if p.is_dir() and (p / "result.json").exists()])


def _is_success_response(response_text: str) -> bool:
    """与 process_trajs.py 一致：terminate(success) 或 answer 视为成功。"""
    for m in _TOOL_RE.finditer(response_text or ""):
        try:
            call = json.loads(m.group(1))
        except Exception:
            continue
        if (call or {}).get("name") != "mobile_use":
            continue
        args = (call or {}).get("arguments") or {}
        action = args.get("action")
        return action == "answer" or (action == "terminate" and args.get("status") == "success")
    return False


def _is_success_traj(traj_dir: Path) -> bool:
    data = json.loads((traj_dir / "result.json").read_text(encoding="utf-8"))
    traj = data.get("trajectory") or []
    return bool(traj) and _is_success_response(traj[-1].get("response", ""))


def list_success_trajectories(runs_dir: Path = RUNS_DIR):
    trajs = list_trajectories(runs_dir)
    return [p for p in trajs if _is_success_traj(p)]


def show_random_trajectory(
    runs_dir: Path = RUNS_DIR,
    max_steps: int | None = None,
    seed: int | None = None,
    display_scale: float = 0.2,
):
    if seed is not None:
        random.seed(seed)

    succ = list_success_trajectories(runs_dir)
    print(f"Success trajs: {len(succ)}")
    if not succ:
        raise FileNotFoundError(f"未找到成功轨迹：{runs_dir}")

    traj_dir = random.choice(succ)
    show_trajectory(traj_dir, max_steps=max_steps, display_scale=display_scale)
    return traj_dir


# 每次运行随机抽 1 条【成功】轨迹展示（默认缩小到 1/5）
show_random_trajectory()

In [None]:
# 从 data_merge.json 随机抽 1 条轨迹可视化
import random


def _annotate_bbox(img: Image.Image, bbox: dict | None):
    """如果 bbox 存在，则用绿色框标注出来（bbox 坐标为原始像素坐标）。"""
    if not bbox:
        return img

    try:
        x_min = int(bbox.get("x_min"))
        x_max = int(bbox.get("x_max"))
        y_min = int(bbox.get("y_min"))
        y_max = int(bbox.get("y_max"))
    except Exception:
        return img

    img = img.convert("RGBA")
    draw = ImageDraw.Draw(img)
    w, h = img.size

    # clamp（避免越界导致看不到框）
    x_min = max(0, min(w - 1, x_min))
    x_max = max(0, min(w - 1, x_max))
    y_min = max(0, min(h - 1, y_min))
    y_max = max(0, min(h - 1, y_max))

    stroke = max(3, int(min(w, h) * 0.004))
    draw.rectangle((x_min, y_min, x_max, y_max), outline=(0, 255, 0, 255), width=stroke)
    return img


def show_random_from_merged(
    runs_dir: Path = RUNS_DIR,
    seed: int | None = None,
    max_steps: int | None = None,
    display_scale: float = 0.2,
    only_has_unreasonable: bool = False,
):
    if seed is not None:
        random.seed(seed)

    merge_path = runs_dir / "data_merge_0208_refine_conclusion_thinking_pattern_eval.json"
    merged = json.loads(merge_path.read_text(encoding="utf-8"))
    if not merged:
        raise ValueError(f"data_merge.json 为空：{merge_path}")

    # 仅展示包含 is_reasonable=False 的轨迹
    if only_has_unreasonable:
        merged = [
            item for item in merged
            if any(step.get("is_reasonable") is False for step in (item.get("trajectory") or []))
        ]
        print(f"Num of trajectories with is_reasonable=False: {len(merged)}")
        if not merged:
            raise ValueError("没有包含 is_reasonable=False 的轨迹")

    # # filter specific app/task
    # print(f"Num of total tasks: {len(merged)}")
    # merged = [item for item in merged if "task" in item["save_dir"].lower()]
    # print(f"Num of filtered tasks: {len(merged)}")

    item = random.choice(merged)
    goal = item.get("goal", "")
    traj = item.get("trajectory") or []
    if max_steps is not None:
        traj = traj[:max_steps]

    display(Markdown(f"## {item.get('save_dir','')}\n\n**Goal**: {goal}"))

    for step_obj in traj:
        step = step_obj.get("step")
        response = step_obj.get("response", "")
        actions = _extract_click_actions(response)
        bbox = step_obj.get("bbox")

        display(Markdown(f"### Step {step}"))

        img_rel = step_obj.get("image")
        img_path = (runs_dir / img_rel) if img_rel else None
        if img_path and img_path.exists():
            img = Image.open(img_path)
            img = _annotate_clicks(img, actions)
            img = _annotate_bbox(img, bbox)
            display(_resize_for_display(img, display_scale))
        else:
            print(f"[缺少截图] {img_rel}")

        # 新增字段打印
        if "is_reasonable" in step_obj:
            print(f"is_reasonable: {step_obj.get('is_reasonable')}")
        if step_obj.get("conclusion"):
            print(f"conclusion: {step_obj.get('conclusion')}")
        if step_obj.get("thinking_refine"):
            print(f"thinking_refine: {step_obj.get('thinking_refine')}")
        if step_obj.get("thinking_pattern"):
            print(f"thinking_pattern: {step_obj.get('thinking_pattern')}")

        print(response)
        print("-" * 80)

    return item


# 运行这个：从 data_merge.json 随机抽 1 条可视化
_ = show_random_from_merged(only_has_unreasonable=True)
# _ = show_random_from_merged()

In [None]:
# 发现“前后截图完全无变化”的 step，并随机可视化 1 个（不改数据，仅展示）
import random
from PIL import ImageChops
from tqdm import tqdm


def _images_identical_pixels(p1: Path, p2: Path) -> bool:
    """严格像素级一致（RGB）。"""
    try:
        with Image.open(p1) as im1, Image.open(p2) as im2:
            im1 = im1.convert("RGB")
            im2 = im2.convert("RGB")
            if im1.size != im2.size:
                return False
            return ImageChops.difference(im1, im2).getbbox() is None
    except Exception:
        return False


def show_random_static_step(
    runs_dir: Path = RUNS_DIR,
    seed: int | None = None,
    display_scale: float = 0.2,
    max_pairs: int | None = None,
):
    if seed is not None:
        random.seed(seed)

    merge_path = runs_dir / "data_merge_0208.json"
    merged = json.loads(merge_path.read_text(encoding="utf-8"))
    if not merged:
        raise ValueError(f"merged 为空：{merge_path}")

    candidates = []
    checked = 0

    for ep_idx, ep in tqdm(enumerate(merged)):
        traj = ep.get("trajectory") or []
        for i in range(len(traj) - 1):
            if max_pairs is not None and checked >= max_pairs:
                break
            s1, s2 = traj[i], traj[i + 1]
            img1_rel = s1.get("image")
            img2_rel = s2.get("image")
            if not img1_rel or not img2_rel:
                continue
            p1 = runs_dir / img1_rel
            p2 = runs_dir / img2_rel
            if not (p1.exists() and p2.exists()):
                continue

            checked += 1
            if _images_identical_pixels(p1, p2):
                candidates.append((ep_idx, i))

        if max_pairs is not None and checked >= max_pairs:
            break

    print(f"Checked pairs: {checked}")
    print(f"Static-step candidates: {len(candidates)}")
    if not candidates:
        raise ValueError("未找到任何前后截图完全一致的 step")

    ep_idx, i = random.choice(candidates)
    ep = merged[ep_idx]
    traj = ep.get("trajectory") or []
    s1, s2 = traj[i], traj[i + 1]

    goal = ep.get("goal", "")
    save_dir = ep.get("save_dir", "")

    display(Markdown(f"## Static Step Sample: {save_dir}\n\n**Goal**: {goal}\n\n- step_idx_in_traj: {i} → {i+1}\n- step_id: {s1.get('step')} → {s2.get('step')}"))

    # 当前 step
    response1 = s1.get("response", "")
    actions1 = _extract_click_actions(response1) if "_extract_click_actions" in globals() else []
    bbox1 = s1.get("bbox")

    display(Markdown(f"### Step {s1.get('step')}（当前）"))
    img1_path = runs_dir / s1.get("image")
    if img1_path.exists():
        img1 = Image.open(img1_path)
        if "_annotate_clicks" in globals():
            img1 = _annotate_clicks(img1, actions1)
        if "_annotate_bbox" in globals():
            img1 = _annotate_bbox(img1, bbox1)
        display(_resize_for_display(img1, display_scale) if "_resize_for_display" in globals() else img1)
    else:
        print(f"[缺少截图] {img1_path}")
    print(response1)

    # 下一步 step（对照图）
    response2 = s2.get("response", "")
    actions2 = _extract_click_actions(response2) if "_extract_click_actions" in globals() else []
    bbox2 = s2.get("bbox")

    display(Markdown(f"### Step {s2.get('step')}（下一步/对照）"))
    img2_path = runs_dir / s2.get("image")
    if img2_path.exists():
        img2 = Image.open(img2_path)
        if "_annotate_clicks" in globals():
            img2 = _annotate_clicks(img2, actions2)
        if "_annotate_bbox" in globals():
            img2 = _annotate_bbox(img2, bbox2)
        display(_resize_for_display(img2, display_scale) if "_resize_for_display" in globals() else img2)
    else:
        print(f"[缺少截图] {img2_path}")
    print(response2)

    return {"ep_idx": ep_idx, "step_i": i, "save_dir": save_dir}


# 运行：随机可视化 1 个“前后截图无变化”的 step
_ = show_random_static_step()