In [None]:
# run_gym_qt_route_a.py
import os
os.environ['QT_QPA_PLATFORM'] = 'offscreen'

import sys
import threading
import traceback

# Auto-detect the Food-Delivery-Bench repo root.
from pathlib import Path

cwd = Path.cwd().resolve()
base_dir = None
for p in [cwd, *cwd.parents]:
    # 1) Current directory is the repo root.
    if (p / "vlm_delivery").is_dir() and (p / "simworld").is_dir():
        base_dir = p
        break
    # 2) Repo root is a direct child of the current path.
    candidate = p / "Food-Delivery-Bench"
    if (candidate / "vlm_delivery").is_dir() and (candidate / "simworld").is_dir():
        base_dir = candidate
        break

if base_dir is None:
    raise RuntimeError("Cannot auto-detect Food-Delivery-Bench root.")

base_dir = str(base_dir)
sys.path.insert(0, base_dir)
sys.path.insert(0, os.path.join(base_dir, "simworld"))

from PyQt5.QtCore import QTimer
from vlm_delivery.gym_like_interface.gym_like_interface import DeliveryBenchGymEnvQtRouteA


def main():
    env = DeliveryBenchGymEnvQtRouteA(
        base_dir=base_dir,
        ue_ip="127.0.0.1",
        ue_port=9099,
        sim_tick_ms=100,
        vlm_pump_ms=100,
        enable_viewer=True,  # If unstable, try False to isolate viewer issues.
        map_name="medium-city-22roads",
        max_steps=20,
    )

    # 1) Must run on the main thread: create QApplication + invoker.
    env.bootstrap_qt()

    def rl_loop():
        try:
            # 2) Run reset/step on a worker thread.
            obs, info = env.reset(seed=0)
            print("reset info:", info)
            print("obs:", obs)

            for step_i in range(1, 999999):
                obs, r, term, trunc, info2 = env.step(None)
                print(f"[RL] step={step_i} info:", info2)

                # Print any error with tracebacks.
                if info2.get("error"):
                    print("STEP ERROR:", info2["error"])
                    if info2.get("dispatch_exc"):
                        print("DISPATCH TRACEBACK:\n", info2["dispatch_exc"])
                    if info2.get("enqueue_exc"):
                        print("ENQUEUE TRACEBACK:\n", info2["enqueue_exc"])
                    break

                if term or trunc:
                    break

        except Exception as e:
            print("[RL] Exception:", e)
            traceback.print_exc()

        finally:
            try:
                env.close()
            except Exception:
                pass
            try:
                if getattr(env, "_app", None) is not None:
                    env._app.quit()
            except Exception:
                pass

    # 3) Start the RL thread after the Qt loop is ready.
    QTimer.singleShot(0, lambda: threading.Thread(target=rl_loop, daemon=True).start())

    # 4) Run the Qt event loop on the main thread.
    env.run_qt_loop()


if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm
INFO:__init__:230:Got connection confirm: b'connected to gym_citynav'


Loaded bus route: route_bus_1 with 8 stops
Created bus bus_1 on route route_bus_1
=>Info: using ip-port socket


This plugin does not support propagateSizeHints()


reset info: {'sim_time': None, 'seed': 0, 'options': {}, 'sim_tick_ms': 100, 'vlm_pump_ms': 100, 'run_dir': '/home/lingjun/embodied-agent-framework/DeliveryBench-release/outputs/trajectories/run_20260130_193400'}
obs: {'state': array([-116.918,  227.4  ,    0.   ,    0.   ,    0.   ], dtype=float32)}
[GRAB] py= MainThread qt_id= 140589922228032 has_app= True is_gui= True
invoker elapsed: 5.774644136428833 ok= True exc= None


2026-01-30 19:34:11 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "No active orders. Energy and scooter battery sufficient. To maximize earnings, I should promptly check available orders as per rules.",
"action": "VIEW_ORDERS()",
"future_plan": "Accept the best-paying nearby pickup, ride to the restaurant, pick up, bag items properly, and deliver. Monitor battery; if it drops below ~20%, plan a quick charge en route."
}
2026-01-30 19:34:11 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: view_orders 
2026-01-30 19:34:11 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] view orders


Starting action: DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
[RL] step=1 info: {'sim_time': None, 'elapsed_steps': 1, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 0, 'done1': 1}
[GRAB] py= MainThread qt_id= 140589922228032 has_app= True is_gui= True
invoker elapsed: 5.489554405212402 ok= True exc= None


2026-01-30 19:34:26 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We have no active orders; battery 50% and energy 100%. Must accept nearby profitable orders. From current location near center, the closest pickups are restaurant 1 at (-288,-17) for orders #5 and #6, and restaurant 4 at (-269,175) for #7/#8, and restaurant 6 far north for #3/#4/#9. Best quick money with short distance/time is Order #6 (220.8 m, $2.54) plus possibly #5 (same pickup) to stack. I'll accept #6 and #5 together to maximize efficiency from same restaurant.",
"action": "ACCEPT_ORDER([6, 5])",
"future_plan": "Scoot to restaurant 1 at (-288.53m, -16.58m), pick up both orders, bag with heat/cold separation as needed; deliver #6 first to (-176.97,-125.79), then #5 to (-304.10,-224.68). Monitor battery; charge if below 20% at nearby stations."}
2026-01-30 19:34:26 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: accept_order {'oids': [6, 5]

Starting action: DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)
DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)


2026-01-30 19:34:27 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #6 relative score = 4.70
2026-01-30 19:34:27 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #5 relative score = 2.78
2026-01-30 19:34:27 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] accept orders: accepted #6 #5


[RL] step=2 info: {'sim_time': None, 'elapsed_steps': 2, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 1, 'done1': 2}
[GRAB] py= MainThread qt_id= 140589922228032 has_app= True is_gui= True
invoker elapsed: 5.633758068084717 ok= True exc= None


2026-01-30 19:34:38 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We just accepted two orders from the same restaurant; both are still preparing (3â€“5 min). Best move is to head to the pickup now to be there when ready. Battery 50% is ample for the ~320 m ride plus deliveries; no need to charge.",
"action": "MOVE(-288.53m, -16.58m)",
"future_plan": "On arrival, wait near pickup door and monitor readiness. PICKUP both when ready, arrange in bag, then deliver order #6 first to (-176.97,-125.79), followed by #5 to (-304.10,-224.68). Keep energy and battery monitored; avoid accel pace."
}
2026-01-30 19:34:38 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: move_to {'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}
2026-01-30 19:34:38 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-3.52m, 6.85m) to (-288.53m, -16.58m) [mode=e-scooter, speed=600.0 cm/s, pace=normal]
