In [None]:
# run_gym_qt_route_a.py
import os
os.environ['QT_QPA_PLATFORM'] = 'offscreen'

import json
import sys
import threading
import traceback

# Auto-detect the Food-Delivery-Bench repo root.
from pathlib import Path

cwd = Path.cwd().resolve()
base_dir = None
for p in [cwd, *cwd.parents]:
    # 1) Current directory is the repo root.
    if (p / "vlm_delivery").is_dir() and (p / "simworld").is_dir():
        base_dir = p
        break
    # 2) Repo root is a direct child of the current path.
    candidate = p / "Food-Delivery-Bench"
    if (candidate / "vlm_delivery").is_dir() and (candidate / "simworld").is_dir():
        base_dir = candidate
        break

if base_dir is None:
    raise RuntimeError("Cannot auto-detect Food-Delivery-Bench root.")

base_dir = str(base_dir)
sys.path.insert(0, base_dir)
sys.path.insert(0, os.path.join(base_dir, "simworld"))

from PyQt5.QtCore import QTimer
from vlm_delivery.gym_like_interface.gym_like_interface import DeliveryBenchGymEnvQtRouteA


def main():
    exp_cfg_path = os.path.join(base_dir, "vlm_delivery", "input", "experiment_config.json")
    with open(exp_cfg_path, "r", encoding="utf-8") as f:
        exp_cfg = json.load(f) or {}
    gym_env_cfg = exp_cfg.get("gym_env", {}) or {}

    env = DeliveryBenchGymEnvQtRouteA(
        base_dir=base_dir,
        ue_ip=gym_env_cfg.get("ue_ip", "127.0.0.1"),
        ue_port=int(gym_env_cfg.get("ue_port", 9015)),
        sim_tick_ms=100,
        vlm_pump_ms=100,
        enable_viewer=True,  # If unstable, try False to isolate viewer issues.
        map_name=gym_env_cfg.get("map_name", "medium-city-22"),
        max_steps=20,
    )

    # 1) Must run on the main thread: create QApplication + invoker.
    env.bootstrap_qt()

    def rl_loop():
        try:
            # 2) Run reset/step on a worker thread.
            obs, info = env.reset(seed=0)
            print("reset info:", info)
            print("obs:", obs)

            for step_i in range(1, 999999):
                obs, r, term, trunc, info2 = env.step(None)
                print(f"[RL] step={step_i} info:", info2)

                # Print any error with tracebacks.
                if info2.get("error"):
                    print("STEP ERROR:", info2["error"])
                    if info2.get("dispatch_exc"):
                        print("DISPATCH TRACEBACK:\n", info2["dispatch_exc"])
                    if info2.get("enqueue_exc"):
                        print("ENQUEUE TRACEBACK:\n", info2["enqueue_exc"])
                    break

                if term or trunc:
                    break

        except Exception as e:
            print("[RL] Exception:", e)
            traceback.print_exc()

        finally:
            try:
                env.close()
            except Exception:
                pass
            try:
                if getattr(env, "_app", None) is not None:
                    env._app.quit()
            except Exception:
                pass

    # 3) Start the RL thread after the Qt loop is ready.
    QTimer.singleShot(0, lambda: threading.Thread(target=rl_loop, daemon=True).start())

    # 4) Run the Qt event loop on the main thread.
    env.run_qt_loop()


if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm
INFO:__init__:230:Got connection confirm: b'connected to gym_citynav'


Loaded bus route: route_bus_1 with 8 stops
Created bus bus_1 on route route_bus_1
=>Info: using ip-port socket


This plugin does not support propagateSizeHints()


reset info: {'sim_time': None, 'seed': 0, 'options': {}, 'sim_tick_ms': 100, 'vlm_pump_ms': 100, 'run_dir': '/home/lingjun/embodied-agent-framework/DeliveryBench-release/outputs/trajectories/run_20260131_005615'}
obs: {'state': array([ -8.115, -12.354,   0.   ,   0.   ,   0.   ], dtype=float32)}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 6.0973145961761475 ok= True exc= None


2026-01-31 00:56:28 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "Scooter at origin with 50% battery and full energy. No active orders; to maximize earnings I should immediately look for available orders rather than moving or charging.",
"action": "VIEW_ORDERS()",
"future_plan": "Accept the best nearby multi-stop order(s), ride to the pickup, manage temperature with packs if needed, arrange items in bag, then navigate efficiently to drop-offs. Monitor energy and scooter battery; charge only if necessary between runs."
}
2026-01-31 00:56:28 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: view_orders 
2026-01-31 00:56:28 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] view orders


Starting action: DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
DMAction(kind=<DMActionKind.VIEW_ORDERS: 'view_orders'>, data={}, on_done=None)
[RL] step=1 info: {'sim_time': None, 'elapsed_steps': 1, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 0, 'done1': 1}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 5.582418441772461 ok= True exc= None


2026-01-31 00:56:42 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We’re idle with scooter at center, 50% battery—enough for a run. Need to accept profitable nearby orders with manageable temps. Orders 5 and 6 pick up at restaurant 1 (-288,-17), relatively close and short distances; combined payout $5.33 and similar route south-west. Accept both to batch efficiently.",
"action": "ACCEPT_ORDER([5, 6])",
"future_plan": "Ride to restaurant 1 at (-288.53m, -16.58m), PICKUP both when ready, then PLACE_FOOD_IN_BAG setting heat/ice as needed (Order 5 cold 5°C; Order 6 hot 65°C) using packs if required. Deliver Order 6 first (-176.97,-125.79), then Order 5 (-304.10,-224.68). Monitor battery; charge after if needed."
}
2026-01-31 00:56:42 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: accept_order {'oids': [5, 6]}


Starting action: DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [5, 6]}, on_done=None)
DMAction(kind=<DMActionKind.ACCEPT_ORDER: 'accept_order'>, data={'oids': [5, 6]}, on_done=None)


2026-01-31 00:56:43 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #5 relative score = 3.07
2026-01-31 00:56:43 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] order #6 relative score = 4.70
2026-01-31 00:56:43 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] accept orders: accepted #5 #6


[RL] step=2 info: {'sim_time': None, 'elapsed_steps': 2, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 1, 'done1': 2}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 5.7915332317352295 ok= True exc= None


2026-01-31 00:56:57 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We have two active orders from the same restaurant ~289 m west; prep times 3–5 min. Energy 100%, scooter battery 50% (sufficient). Best to ride to the pickup door now so we’re there when ready. No need to charge yet.",
"action": "MOVE(-288.53m, -16.58m)",
"future_plan": "Wait near the pickup door; when orders become ready, PICKUP both, then PLACE_FOOD_IN_BAG using heat pack for order 6 (hot) and ice pack for order 5 (cold) if needed. Deliver order 6 first to (-176.97,-125.79), then order 5 to (-304.10,-224.68). Monitor battery; charge afterward if low."
}
2026-01-31 00:56:57 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: move_to {'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}
2026-01-31 00:56:57 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-0.08m, -0.15m) to (-288.53m, -16.58m) [mode=e-scooter, speed=

Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -28853.0, 'ty': -1658.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
[RL] step=3 info: {'sim_time': None, 'elapsed_steps': 3, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 2, 'done1': 3}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 5.31400990486145 ok= True exc= None


2026-01-31 00:57:32 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "I’m at the pickup door for orders #5 and #6. Both are still being prepared (#6 ~2 min, #5 ~4 min). Battery 38% is adequate for these deliveries; energy 97% is fine. Best move is to wait at the pickup so I can grab food immediately when ready.",
"action": "WAIT(seconds=120)",
"future_plan": "When #6 becomes ready, PICKUP it (and #5 if ready). Arrange items in bag and add heat/ice packs if required. Deliver #6 first to (-176.97,-125.79), then return along route to drop off #5 at (-304.10,-224.68). Charge scooter afterward if battery dips below ~20%."
}
2026-01-31 00:57:32 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: wait {'duration_s': 120.0}
2026-01-31 00:57:32 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] start waiting: 120.0s (~2.0 min) @virtual


Starting action: DMAction(kind=<DMActionKind.WAIT: 'wait'>, data={'duration_s': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.WAIT: 'wait'>, data={'duration_s': 120.0}, on_done=None)
[RL] step=4 info: {'sim_time': None, 'elapsed_steps': 4, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 3, 'done1': 4}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 5.207130670547485 ok= True exc= None


2026-01-31 00:58:24 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We’re already at the pickup door. Order #6 is ready and time left is only ~1 min, so we must grab it immediately to avoid expiry. #5 is still being prepared; we can come back after dropping #6 if needed.",
"action": "PICKUP(orders=[6])",
"future_plan": "Immediately place food into bag; then ride toward dropoff (-176.97m,-125.79m) via S1 and 5th road. Deliver #6 (leave_at_door). Return to restaurant for #5 if prepared; then deliver #5. Monitor scooter battery and charge if below 20%."
}
2026-01-31 00:58:24 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: pickup {'orders': [Order(city_map=<vlm_delivery.map.map.Map object at 0x7fb3107f3370>, pickup_address=Vector(x=-28852.9, y=-7909.56), delivery_address=Vector(x=-15316.35, y=-12578.8), items=[FoodItem(name='Tea', category='HOT', odor='none', motion_sensitive=False, damage_level=0, nonthermal_time_

DEBUG: PICKUP orders = [6]
Starting action: DMAction(kind=<DMActionKind.PICKUP: 'pickup'>, data={'orders': [Order(city_map=<vlm_delivery.map.map.Map object at 0x7fb3107f3370>, pickup_address=Vector(x=-28852.9, y=-7909.56), delivery_address=Vector(x=-15316.35, y=-12578.8), items=[FoodItem(name='Tea', category='HOT', odor='none', motion_sensitive=False, damage_level=0, nonthermal_time_sensitive=False, prep_time_s=180, serving_temp_c=65.0, safe_min_c=50.0, safe_max_c=70.0, heat_capacity=1.1, temp_c=nan, prepared_at_sim=0.0, picked_at_sim=0.0, delivered_at_sim=0.0, odor_contamination=0.0)], special_note='You can leave it by the door, thank you', path_nodes=[Node(position=Vector(x=-28852.9, y=-1658.0885), type=door), Node(position=Vector(x=-28852.9, y=-1700.0), type=normal), Node(position=Vector(x=-26700.0, y=-1700.0), type=normal), Node(position=Vector(x=-21700.0, y=-1700.0), type=intersection), Node(position=Vector(x=-18300.0, y=-1700.0), type=intersection), Node(position=Vector(x=-18300.

2026-01-31 00:58:37 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "We picked up order #6 and must bag it before moving. Dropoff is 219 m away with 1 min left; scooter battery 38% is fine. No temperature requirement specified for tea; place into one compartment quickly.",
"action": "PLACE_FOOD_IN_BAG(bag_cmd=\"order 6: 1 -> A\")",
"future_plan": "Immediately ride toward S1 then south to 5th road left to (-176.97m,-125.79m) and DROP_OFF(oid=6, method=\"leave_at_door\"). Return to restaurant to pick up #5 when ready and deliver to (-304.10m,-224.68m). Charge scooter if battery drops below 20%."
}
2026-01-31 00:58:37 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: place_food_in_bag {'bag_cmd': 'order 6: 1 -> A'}
2026-01-31 00:58:37 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] placed pending food into bag for orders [6]


Starting action: DMAction(kind=<DMActionKind.PLACE_FOOD_IN_BAG: 'place_food_in_bag'>, data={'bag_cmd': 'order 6: 1 -> A'}, on_done=None)
DMAction(kind=<DMActionKind.PLACE_FOOD_IN_BAG: 'place_food_in_bag'>, data={'bag_cmd': 'order 6: 1 -> A'}, on_done=None)
[RL] step=6 info: {'sim_time': None, 'elapsed_steps': 6, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 5, 'done1': 6}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 5.171876907348633 ok= True exc= None


2026-01-31 00:58:48 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "Order 6 is already in bag and due in 1 min; closest path is east to S1 then south to 5th road left to the dropoff at (-176.97,-125.79). Battery 38% is enough; energy fine. Prioritize quick delivery of #6, then return for #5 when ready.",
"action": "MOVE(-176.97m, -125.79m)",
"future_plan": "Upon arrival, DROP_OFF(oid=6, method=\"leave_at_door\"). Then ride back to restaurant at (-288.53m,-16.58m) to PICKUP order 5 when ready, bag it, and deliver to (-304.10m,-224.68m). Monitor battery; charge if below 20% after deliveries."
}
2026-01-31 00:58:48 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: move_to {'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}
2026-01-31 00:58:48 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] move from (-287.18m, -16.71m) to (-176.97m, -125.79m) [mode=e-scooter, speed=600.0 cm/s, pace=normal]


Starting action: DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
DMAction(kind=<DMActionKind.MOVE_TO: 'move_to'>, data={'tx': -17697.0, 'ty': -12579.0, 'use_route': True, 'snap_cm': 120.0}, on_done=None)
[RL] step=7 info: {'sim_time': None, 'elapsed_steps': 7, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 6, 'done1': 7}
[GRAB] py= MainThread qt_id= 140407724197696 has_app= True is_gui= True
invoker elapsed: 4.942969560623169 ok= True exc= None


2026-01-31 00:59:15 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] [VLM] raw output: {
"reasoning_and_reflection": "I arrived at order #6’s drop-off address with the food already bagged. Battery at 30% is sufficient for the short next leg. Order #5 is still being prepared but nearly ready. I should complete #6 immediately to avoid overtime penalties.",
"action": "DROP_OFF(oid=6, method=\"leave_at_door\")",
"future_plan": "Scoot back to the restaurant at (-288.53m,-16.58m) to pick up order #5 as soon as it’s ready, bag it, then head to its drop-off at (-304.10m,-224.68m). If scooter battery drops near 20%, stop by the nearby charger on 11th road to top up."}
2026-01-31 00:59:15 - delivery_system.agent_DeliveryMan1 - INFO - [VLM] parsed action: drop_off {'oid': 6, 'method': 'leave_at_door'}
2026-01-31 00:59:15 - delivery_system.agent_DeliveryMan1 - INFO - [Agent 1] dropped off order #6 (extra +1.50, stars=4) [time=4, food=4, method=5] [on_time=Y, temp=BAD, odor=OK, damage=OK]


Starting action: DMAction(kind=<DMActionKind.DROP_OFF: 'drop_off'>, data={'oid': 6, 'method': 'leave_at_door'}, on_done=None)
DMAction(kind=<DMActionKind.DROP_OFF: 'drop_off'>, data={'oid': 6, 'method': 'leave_at_door'}, on_done=None)
[RL] step=8 info: {'sim_time': None, 'elapsed_steps': 8, 'reward_info': {'base': 0.0}, 'termination_info': {}, 'mode': 'vlm_decision', 'done0': 7, 'done1': 8}
