In [None]:
## Multi-Agent Highway Platoon MDP — New simulations workbook

In [None]:
# Necessary imports
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, PillowWriter
import sqlite3
import importlib.util, sys
from pathlib import Path
import mdp_module as mdp_mod
from highway_platoon import (
    HighwayMultiAgentClean, simulate_with_platoon,
    deterministic_policy_2actions, mixed_policy_2actions
)
import pandas as pd

In [None]:
def compare_groups(result: dict, *, N: int, platoon_indices: list):
    """
    Split metrics into 'platoon' vs 'others' using the result from simulate_with_platoon(...).

    Returns:
      {
        'cum_reward_distribution': {
           'platoon': 1D array of cumulative rewards per agent,
           'others' : 1D array of cumulative rewards per agent
        },
        'timeseries': {
           'success': {'platoon': 1D array over time, 'others': 1D array over time},
           'collision': {'platoon': 1D array over time, 'others': 1D array over time}
        },
        'counts': {
           'platoon_size': int,
           'others_size': int,
           'T_steps': int
        }
      }
    """
    platoon = np.array(sorted(set(platoon_indices)), dtype=int)
    assert np.all((0 <= platoon) & (platoon < N)), "platoon_indices out of range"
    others = np.setdiff1d(np.arange(N), platoon, assume_unique=True)

    # Shapes: labels (T+2, N), rewards (T+1, N)
    labels = np.asarray(result["labels_hist"])         # 0=idle (t=0), 1=success, 2=collision
    rewards = np.asarray(result["rewards_hist"])

    # Sanity checks
    assert labels.ndim == 2 and labels.shape[1] == N, f"labels_hist shape {labels.shape} inconsistent with N={N}"
    assert rewards.ndim == 2 and rewards.shape[1] == N, f"rewards_hist shape {rewards.shape} inconsistent with N={N}"
    assert labels.shape[0] == rewards.shape[0] + 1, "labels should have exactly one more time row than rewards"

    # Drop t=0 idle row for rates
    labels_t = labels[1:]   # shape (T+1, N)
    T1 = labels_t.shape[0]  # equals rewards.shape[0]

    # Group slices
    Lp = labels_t[:, platoon]   # (T+1, |P|)
    Lo = labels_t[:, others]    # (T+1, |O|)
    Rp = rewards[:, platoon]    # (T+1, |P|)
    Ro = rewards[:, others]     # (T+1, |O|)

    # Per-agent cumulative rewards
    cum_rewards_platoon = Rp.sum(axis=0)
    cum_rewards_others  = Ro.sum(axis=0)

    # Time-series success/collision rates over agents in each group
    succ_platoon = (Lp == 1).mean(axis=1) if Lp.size else np.zeros(T1)
    succ_others  = (Lo == 1).mean(axis=1) if Lo.size else np.zeros(T1)
    coll_platoon = (Lp == 2).mean(axis=1) if Lp.size else np.zeros(T1)
    coll_others  = (Lo == 2).mean(axis=1) if Lo.size else np.zeros(T1)

    out = {
        "cum_reward_distribution": {
            "platoon": cum_rewards_platoon,
            "others":  cum_rewards_others,
        },
        "timeseries": {
            "success":   {"platoon": succ_platoon, "others": succ_others},
            "collision": {"platoon": coll_platoon, "others": coll_others},
        },
        "counts": {
            "platoon_size": int(platoon.size),
            "others_size":  int(others.size),
            "T_steps":      int(T1),
        }
    }
    return out

In [None]:
# Parameterss

N = 10
T = 100
seed = 1

# Geometry
num_lanes_total = 4
lane_directions = [1, 1, -1, -1]
lane_width = 3.75
interference_max_range = 500.0

# Radio / propagation
noise_mw = 1e-9
pathloss_n_los = 2.0
pathloss_n_nlos = 3.5
desired_link_distance_m = 10.0

# Initial spacing
min_initial_gap = 2.0


#sql database
bler_table_path = str('sinr_bler_lookup_table.csv')

platoon_idx = list(range(6))           # first 6 agents form the platoon

# Parameters to sweep
alphas = np.linspace(0.0, 1.0, 6)      # vary from 0 to 1
results_platoon = []
results_others = []

for alpha in alphas:
    # -----------------------------------------------
    # Define platoon and others policies for this α
    # -----------------------------------------------
    # Platoon: after success, probability alpha to use res0 (stay), (1-alpha) to switch to res1.
    # After collision, do the opposite (exploration when alpha small)
    pi_platoon = deterministic_policy_2actions({
        0: (1, 0),         # idle → prefer resource 0
        1: (alpha, 1 - alpha), # after success → bias controlled by α
        2: (1 - alpha, alpha), # after collision → reverse bias
    })
    # Others: fixed mild bias toward resource 0
    pi_others = deterministic_policy_2actions({
        0: (0.5, 0.5),
        1: (0.5, 0.5),
        2: (0.5, 0.5),
    })

    # -------------------------------
    # Run simulation
    # -------------------------------
    result = simulate_with_platoon(
        N=N, T=T, seed=seed,
        bler_csv_path=bler_table_path,
        num_lanes_total=num_lanes_total,
        lane_directions=lane_directions,
        lane_width=lane_width,
        interference_max_range=interference_max_range,
        noise_mw=noise_mw,
        pathloss_n_los=pathloss_n_los,
        pathloss_n_nlos=pathloss_n_nlos,
        desired_link_distance_m=desired_link_distance_m,
        min_initial_gap=min_initial_gap,
        platoon_indices=platoon_idx,
        pi_platoon=pi_platoon,
        pi_others=pi_others,
    )

    # Group comparison
    summary = compare_groups(result, N=N, platoon_indices=platoon_idx)

    # Pull cumulative reward distributions
    cum_rewards_platoon = summary["cum_reward_distribution"]["platoon"]
    cum_rewards_others  = summary["cum_reward_distribution"]["others"]

    # Pull success-rate time series
    ts = summary["timeseries"]
    succ_rates_platoon = ts["success"]["platoon"]
    succ_rates_others  = ts["success"]["others"]

    # Summaries
    results_platoon.append({
        "alpha": alpha,
        "mean": float(np.mean(cum_rewards_platoon)),
        "p10":  float(np.percentile(cum_rewards_platoon, 10)),
        "p90":  float(np.percentile(cum_rewards_platoon, 90)),
        "succ_mean": float(np.mean(succ_rates_platoon)),
        "succ_p10":  float(np.percentile(succ_rates_platoon, 10)),
        "succ_p90":  float(np.percentile(succ_rates_platoon, 90)),
    })

    results_others.append({
        "alpha": alpha,
        "mean": float(np.mean(cum_rewards_others)),
        "p10":  float(np.percentile(cum_rewards_others, 10)),
        "p90":  float(np.percentile(cum_rewards_others, 90)),
        "succ_mean": float(np.mean(succ_rates_others)),
        "succ_p10":  float(np.percentile(succ_rates_others, 10)),
        "succ_p90":  float(np.percentile(succ_rates_others, 90)),
    })


In [None]:
# Tables for downstream use
df_platoon = pd.DataFrame(results_platoon).sort_values("alpha").reset_index(drop=True)
df_others  = pd.DataFrame(results_others ).sort_values("alpha").reset_index(drop=True)

print("Platoon summary:\n", df_platoon)
print("Others summary:\n", df_others)

In [None]:
plt.figure(figsize=(8,5))
plt.plot(df_platoon["alpha"], df_platoon["succ_mean"], label="Platoon success rate", marker="o")
plt.plot(df_others["alpha"], df_others["succ_mean"], label="Others success rate", marker="s")
plt.xlabel("Alpha (bias after success)")
plt.ylabel("Mean success rate")
plt.legend()
plt.title("Platoon vs Others success rate vs α")
plt.show()
