# Simulación determinista en Lurigancho

Este cuaderno ejecuta una corrida única del checkpoint final (`ckpt_step3_lurigancho_fixed.pt`) sobre el escenario de validación completo. El render toma como referencia el estilo de `mapeado.ipynb`: se muestra la grilla completa, los POIs y la trayectoria de cada UAV, además de métricas agregadas y el video de toda la secuencia.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import display, Markdown, Image
import imageio.v2 as imageio

import sys

PROJECT_ROOT = Path().resolve()
candidates = [PROJECT_ROOT, PROJECT_ROOT.parent, PROJECT_ROOT.parent.parent]
for candidate in candidates:
    if candidate and (candidate / 'envgen').exists():
        path_str = str(candidate)
        if path_str not in sys.path:
            sys.path.append(path_str)
        break
else:
    path_str = str(PROJECT_ROOT)
    if path_str not in sys.path:
        sys.path.append(path_str)

from envgen.config import load_config
from envgen.viz import plot_grid_with_base_pois
from marl.envs import load_lurigancho_map, load_lurigancho_fixed_data, build_lurigancho_fixed_episode
from marl.eval_marl import build_agent_for_scenarios, clone_instance, MarlActorPolicy
from marl.train_marl import RewardWeights, MarlEnv


In [None]:
CONFIG_PATH = Path("config.json")
SCENARIO_PATH = Path("lurigancho_scenario.json")
POIS_PATH = Path("lurigancho_pois_val.json")
CHECKPOINT_PATH = Path("results/ckpt_step3_lurigancho_fixed.pt")
SIM_SEED = 2025
VIDEO_PATH = Path("results/lurigancho_single_run.gif")
VIDEO_PATH.parent.mkdir(parents=True, exist_ok=True)


In [None]:
cfg = load_config(CONFIG_PATH)
map_data = load_lurigancho_map(SCENARIO_PATH)
fixed_data = load_lurigancho_fixed_data(POIS_PATH)
rng = np.random.default_rng(SIM_SEED)
episode, ei_map, hooks = build_lurigancho_fixed_episode(map_data, fixed_data, cfg, rng, split="val")

n_pois = len(episode["pois"])
n_uavs = len(episode["uavs"])
summary = (
    f"**Instancia determinista lista**\n"
    f"- Celdas: {map_data.rows} x {map_data.cols}\n"
    f"- Base: {map_data.base_xy}\n"
    f"- POIs cargados: {n_pois}\n"
    f"- UAVs disponibles: {n_uavs}\n"
)
display(Markdown(summary))


In [None]:
poi_dicts = [
    {"x": p.x, "y": p.y, "priority": p.priority, "dwell_ticks": p.dwell_ticks}
    for p in episode["pois"]
]
plot_grid_with_base_pois(
    episode["grid"],
    episode["base_xy"],
    poi_dicts,
    title="Mapa + POIs de validación",
)


In [None]:
agent = build_agent_for_scenarios(episode, str(CHECKPOINT_PATH))
policy = MarlActorPolicy(agent, deterministic=True)
agent.actor.eval()
agent.critic.eval()


def make_env():
    clone = clone_instance(episode)
    poi_cells = [p.y * map_data.cols + p.x for p in clone["pois"]]
    ei_map.reset_dynamic(poi_cells)
    return MarlEnv(
        clone,
        RewardWeights(),
        global_obs=ei_map,
        hooks=hooks,
        env_mode="lurigancho_fixed",
        ignore_horizon=True,
    )


env = make_env()
display(Markdown(f"Ambiente inicializado. Horizon ticks: {env.horizon_ticks}."))


In [None]:
def compute_status(env):
    total = len(env.pois)
    served = sum(1 for p in env.pois if p.served)
    coverage = served / max(total, 1)
    violations = sum(1 for p in env.pois if getattr(p, "violated", False))
    avg_energy = float(np.mean([u.E for u in env.uavs])) if env.uavs else 0.0
    return {
        "total_pois": total,
        "served": served,
        "coverage": coverage,
        "violations": violations,
        "avg_energy": avg_energy,
    }


def render_env_frame(env, step_idx, trajectories=None):
    status = compute_status(env)
    H, W = env.grid.shape
    bg = np.where(env.grid, 0.25, 0.95)
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.imshow(bg, cmap="gray", origin="upper")

    pending = [(p.x, p.y) for p in env.pois if not p.served]
    served = [(p.x, p.y) for p in env.pois if p.served]
    if pending:
        ax.scatter(
            [x for x, _ in pending],
            [y for _, y in pending],
            s=30,
            c="tomato",
            edgecolors="black",
            linewidths=0.3,
            label="POI pendiente",
        )
    if served:
        ax.scatter(
            [x for x, _ in served],
            [y for _, y in served],
            s=30,
            c="mediumseagreen",
            edgecolors="black",
            linewidths=0.3,
            label="POI servido",
        )

    by, bx = env.base_xy
    ax.scatter(
        [bx],
        [by],
        marker="s",
        s=120,
        facecolors="none",
        edgecolors="deepskyblue",
        linewidths=2.2,
        label="Base",
    )

    colors = plt.cm.tab10.colors
    for idx, u in enumerate(env.uavs):
        y, x = u.pos
        ax.scatter(
            [x],
            [y],
            marker="^",
            s=140,
            facecolors=colors[idx % len(colors)],
            edgecolors="black",
            linewidths=0.6,
        )
        ax.text(x + 0.2, y + 0.2, f"U{u.uid}", color="white", fontsize=8, weight="bold")
        if trajectories and u.uid in trajectories:
            xs = [pos[1] for pos in trajectories[u.uid]]
            ys = [pos[0] for pos in trajectories[u.uid]]
            ax.plot(xs, ys, color=colors[idx % len(colors)], linewidth=1.2, alpha=0.6)

    ax.set_xticks(np.arange(-0.5, W, 1), minor=True)
    ax.set_yticks(np.arange(-0.5, H, 1), minor=True)
    ax.grid(which="minor", color="black", linewidth=0.15, alpha=0.25)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(-0.5, W - 0.5)
    ax.set_ylim(H - 0.5, -0.5)
    ax.set_title(f"Paso {step_idx} | tick {env.tick} | cobertura {status['coverage']:.1%}")
    ax.legend(loc="upper right", frameon=True)
    fig.tight_layout()
    fig.canvas.draw()
    frame = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    frame = frame.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig)
    return frame


def run_logged_episode(env, policy, *, log_interval=10):
    observations = env.observations()
    policy.reset()
    done = False
    step_idx = 0
    frames = []
    logs = []
    trajectories = {u.uid: [u.pos] for u in env.uavs}

    while not done:
        frames.append(render_env_frame(env, step_idx, trajectories=trajectories))
        actions = policy.select_actions(env, observations)
        observations, _, done, info = env.step(actions)
        step_idx += 1
        for u in env.uavs:
            trajectories[u.uid].append(u.pos)
        if step_idx % log_interval == 0 or done:
            status = compute_status(env)
            log_entry = {
                "step": step_idx,
                "tick": env.tick,
                "served": status["served"],
                "coverage": status["coverage"],
                "violations": status["violations"],
                "avg_uav_energy": status["avg_energy"],
                "rtb_events": env.rtb_count,
            }
            logs.append(log_entry)
            print(
                f"Paso {step_idx:03d} | tick={env.tick:05d} | servidos={status['served']}/{status['total_pois']} "
                f"| cobertura={status['coverage']:.1%} | energía_prom={status['avg_energy']:.1f} | rtb={env.rtb_count}"
            )
    frames.append(render_env_frame(env, step_idx, trajectories=trajectories))
    return logs, frames, trajectories


def summarize_episode(env):
    total_pois = len(env.pois)
    served_after = sum(1 for p in env.pois if p.served)
    coverage = served_after / max(total_pois, 1)
    violations = sum(1 for p in env.pois if getattr(p, "violated", False))
    tardiness_vals = [float(getattr(p, "tardiness", 0.0)) for p in env.pois]
    avg_tardiness = float(np.mean(tardiness_vals)) if tardiness_vals else 0.0
    energy_tot = sum(env.energy_spent.values())
    energy_per_uav = energy_tot / max(len(env.uavs), 1)
    distance_total = sum(
        env.steps_ortho[u.uid] * env.L_o + env.steps_diag[u.uid] * env.L_d
        for u in env.uavs
    )
    metrics = {
        "coverage": coverage,
        "served": served_after,
        "violations": float(violations),
        "avg_tardiness": avg_tardiness,
        "energy_per_uav": energy_per_uav,
        "distance_total_m": distance_total,
        "duration_ticks": float(env.tick),
        "rtb_events": int(env.rtb_count),
    }
    extras = {
        "distance_per_uav": {
            u.uid: env.steps_ortho[u.uid] * env.L_o + env.steps_diag[u.uid] * env.L_d
            for u in env.uavs
        },
        "energy_per_uav_detail": dict(env.energy_spent),
        "action_hist": {int(k): int(v) for k, v in env.action_hist.items()},
    }
    return metrics, extras


In [None]:
env = make_env()
log_rows, frames, trajectories = run_logged_episode(env, policy, log_interval=10)
metrics, extras = summarize_episode(env)

log_df = pd.DataFrame(log_rows)
display(Markdown("**Log cada 10 pasos**"))
display(log_df)

display(Markdown("**Métricas del episodio**"))
display(pd.DataFrame([metrics]).round(3))

per_uav_df = pd.DataFrame({
    "distance_m": pd.Series(extras["distance_per_uav"]),
    "energy_spent": pd.Series(extras["energy_per_uav_detail"]),
})
per_uav_df.index.name = "uav_id"
display(Markdown("**Detalle por UAV**"))
display(per_uav_df.round(2))

action_hist = pd.Series(extras["action_hist"]).sort_index()
display(Markdown("**Historial de acciones ejecutadas**"))
display(action_hist)


In [None]:
if frames:
    imageio.mimsave(VIDEO_PATH, frames, duration=0.45)
    display(Markdown(f"Video guardado en `{VIDEO_PATH}`"))
    display(Image(filename=str(VIDEO_PATH)))
else:
    display(Markdown("No se generaron cuadros para el video."))
