In [None]:
# run_gym_qt_route_a.py
import os
import sys
import threading
import traceback

# ✅ 一般只需要 base_dir
sys.path.insert(0, r"D:\DeliveryBench-gym")
sys.path.insert(0, r"D:\DeliveryBench-gym\vlm_delivery")

from PyQt5.QtCore import QTimer
from gym_like_interface.gym_like_interface import DeliveryBenchGymEnvQtRouteA


def main():
    base = os.environ.get("DELIVERYBENCH_BASE_DIR", r"D:\DeliveryBench-gym")

    env = DeliveryBenchGymEnvQtRouteA(
        base_dir=base,
        ue_ip="127.0.0.1",
        ue_port=9000,
        sim_tick_ms=100,
        vlm_pump_ms=100,
        enable_viewer=True,   # 如果还会崩，先改 False 验证稳定性
        map_name="medium-city-22roads",
        max_steps=20,
    )

    # ✅ 1) 必须在 Python 主线程：创建 QApplication + invoker
    env.bootstrap_qt()

    def rl_loop():
        try:
            # ✅ 2) reset/step 在 worker 线程
            obs, info = env.reset(seed=0)
            print("reset info:", info)
            print("obs:", obs)

            for step_i in range(1, 999999):
                obs, r, term, trunc, info2 = env.step(None)
                print(f"[RL] step={step_i} info:", info2)

                # ✅ 不要只匹配某个固定字符串，统一打印 error + traceback
                if info2.get("error"):
                    print("STEP ERROR:", info2["error"])
                    if info2.get("dispatch_exc"):
                        print("DISPATCH TRACEBACK:\n", info2["dispatch_exc"])
                    if info2.get("enqueue_exc"):
                        print("ENQUEUE TRACEBACK:\n", info2["enqueue_exc"])
                    break

                if term or trunc:
                    break

        except Exception as e:
            print("[RL] Exception:", e)
            traceback.print_exc()

        finally:
            try:
                env.close()
            except Exception:
                pass
            try:
                if getattr(env, "_app", None) is not None:
                    env._app.quit()
            except Exception:
                pass

    # ✅ 3) Qt loop 开始后再启动 RL 线程
    QTimer.singleShot(0, lambda: threading.Thread(target=rl_loop, daemon=True).start())

    # ✅ 4) 主线程跑 Qt loop（脚本运行 OK；Jupyter 里不要这么跑）
    env.run_qt_loop()


if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm
INFO:__init__:230:Got connection confirm: b'connected to gym_citynav'


Loaded bus route: route_bus_1 with 8 stops
Created bus bus_1 on route route_bus_1
=>Info: using ip-port socket
reset info: {'sim_time': None, 'seed': 0, 'options': {}, 'sim_tick_ms': 100, 'vlm_pump_ms': 100, 'run_dir': 'D:\\DeliveryBench-gym\\outputs\\trajectories\\run_20260109_164531'}
obs: {'state': array([-0.344,  0.199,  0.   ,  0.   ,  0.   ], dtype=float32)}


2026-01-09 16:45:37 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "No active orders; energy and scooter battery are sufficient for multiple short trips. Per rules, I should promptly view and accept new orders to avoid idle time.",
"action": "VIEW_ORDERS()",
"future_plan": "Accept profitable nearby orders, ride to pickup, wait only if necessary, organize bag, deliver efficiently. Monitor battery (charge if <20% before long trip) and energy; buy/use items only if needed."
}
2026-01-09 16:45:37 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: view_orders 
2026-01-09 16:45:37 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] view orders


Starting action: DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
[RL] step=1 info: {'sim_time': None, 'elapsed_steps': 1, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 0, 'done1': 1}


2026-01-09 16:45:44 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "I have no active orders and full energy with 50% scooter battery. Must accept orders quickly. Evaluate proximity: nearest pickups are restaurant 1 (-288.53,-16.58) orders #5 and #6, and restaurant 4 (-268.64,175.38) orders #7 and #8. Short distance and good time windows favor #6 (220.8m, 4 min, $2.54) and #5 (366.7m, 6 min, $2.79). Can stack both from same pickup to maximize efficiency. Temperature needs are simple (tea hot; poke cold) manageable with bag compartments.",
"action": "ACCEPT_ORDER([6, 5])",
"future_plan": "Ride to restaurant 1 pickup at (-288.53m, -16.58m), pick up both orders, arrange hot/cold in separate compartments, deliver #6 first to (-176.97m, -125.79m) within 4 min, then deliver #5 to (-304.10m, -224.68m). Monitor battery; charge if it drops below ~20% after deliveries."}
2026-01-09 16:45:44 - delivery_system.agent_DeliveryMan1 - INFO - [VLM

Starting action: DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)
DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [6, 5]}, on_done=None)


2026-01-09 16:45:45 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #6 relative score = 4.70
2026-01-09 16:45:45 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #5 relative score = 2.78
2026-01-09 16:45:45 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] accept orders: accepted #6 #5


[RL] step=2 info: {'sim_time': None, 'elapsed_steps': 2, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 1, 'done1': 2}


2026-01-09 16:45:51 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We have two active orders from the same restaurant, both still preparing (3–5 min). Best to head to the pickup now to be ready. Battery 50% is sufficient for the round trip; no need to charge. Energy 100%. Prioritize timely arrival to minimize wait and start pickup ASAP.",
"action": "MOVE(-288.53m, -16.58m)",
"future_plan": "On arrival, if food still not ready, WAIT briefly; as each becomes ready, PICKUP both orders and PLACE_FOOD_IN_BAG keeping items stable. Then ride to drop off #6 first, then #5. Keep pace normal to avoid damage; monitor time limits and battery, charging only if <20% after deliveries."
}
2026-01-09 16:45:51 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: move_to {'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}
2026-01-09 16:45:51 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-0.00m, 0

Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
