## MAB Experiments (Paper Runs)
This notebook compares SP2 variants and several TPG variants under shared erasure sequences.

In [None]:
import sys, os
if os.path.basename(os.getcwd()) == 'notebooks':
    sys.path.insert(0, os.path.join(os.getcwd(), '..'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from runners import run_episodes_with_same_erasures
from models import (
    FEEDBACK_BEACON,
    FEEDBACK_ACK_SUCCESS,
    FEEDBACK_NONE,
    FEEDBACK_NACK_ERASE,
    FEEDBACK_ALL,
)

np.set_printoptions(precision=3, suppress=True)

In [None]:
def make_eps_vector(m, values):
    eps1, eps2, eps3, eps4 = values
    base = [eps1] * (m // 4) + [eps2] * (m // 4) + [eps3] * (m // 4)
    rem = m - 3 * (m // 4)
    base += [eps4] * rem
    return np.array(base)


def summarize(vars_dict, energy_fb=0.1, energy_tx=1.0):
    """Summarize final regret & energy for a dict returned by run_episodes_with_same_erasures."""
    rows = []
    for alg, data in vars_dict.items():
        name = data['name']
        reg = np.sum(data['regret'], axis=1)
        R_T = np.cumsum(reg)[-1]
        TX_T = data['avg_tx'][-1]
        FB_T = data['avg_fb'][-1]
        E_T = energy_tx * TX_T + energy_fb * FB_T
        rows.append({
            'alg': name,
            'R_T': R_T,
            'TX_T': TX_T,
            'FB_T': FB_T,
            'E_T': E_T,
        })
    return pd.DataFrame(rows)


def summarize_multiE(vars_dict, ratios, energy_tx=1.0):
    """Summarize regret + TX/FB + multiple energy ratios r = E_fb/E_tx."""
    rows = []
    for alg, data in vars_dict.items():
        name = data['name']
        reg = np.sum(data['regret'], axis=1)
        R_T = np.cumsum(reg)[-1]
        TX_T = data['avg_tx'][-1]
        FB_T = data['avg_fb'][-1]
        row = {
            'alg': name,
            'R_T': R_T,
            'TX_T': TX_T,
            'FB_T': FB_T,
        }
        for r in ratios:
            row[f'E_T_r{r}'] = energy_tx * TX_T + r * FB_T
        rows.append(row)
    return pd.DataFrame(rows)

# Shared experiment constants
ENERGY_TX = 1.0
RATIOS = [0.0, 0.1, 1.0]   # r = E_fb / E_tx values we may report in tables
RNG_SEED = 12345

# Fix global random state so that erasure sequences and random means are reproducible
np.random.seed(RNG_SEED)
import pickle
RESULTS_DIR = os.path.join(os.path.dirname(os.getcwd()), 'results') if os.path.basename(os.getcwd()) == 'notebooks' else './results'
os.makedirs(RESULTS_DIR, exist_ok=True)
print(f"Results will be saved to {os.path.abspath(RESULTS_DIR)}")

In [None]:
## Epsilon sampling (nominal + hard scenarios)
# We sample heterogeneous eps vectors once, reproducibly, and reuse them.

Ms = [4, 20, 40]
M_hard = 40  # we only use the hard scenario for M=40

rng_eps = np.random.default_rng(RNG_SEED)

def sample_eps_nominal(M, rng):
    """
    Nominal scenario:
      - first 1/4 from [0.1, 0.5]
      - second 1/4 from [0.5, 0.8]
      - third 1/4 from [0.8, 0.95]
      - last  1/4 from [0.95, 0.999]
    Then sort.
    """
    q = M // 4
    n1 = q
    n2 = q
    n3 = q
    n4 = M - 3*q

    eps1 = rng.uniform(0.1, 0.5, size=n1)
    eps2 = rng.uniform(0.5, 0.8, size=n2)
    eps3 = rng.uniform(0.8, 0.95, size=n3)
    eps4 = rng.uniform(0.95, 0.999, size=n4)

    eps = np.concatenate([eps1, eps2, eps3, eps4])
    eps.sort()
    return eps


def sample_eps_hard_from_nominal(M, eps_nominal, rng):
    """
    Hard scenario for M:
      - Start from the nominal eps (sorted).
      - Replace the *first* quarter (lowest 1/4) with fresh samples from [0.5, 0.8].
      - Keep the remaining 3/4 of the nominal eps.
      - Sort again.
    """
    q = M // 4
    if q == 0:
        raise ValueError("M too small for 'hard' construction.")

    n1 = q
    # nominal eps assumed sorted
    eps_nominal_sorted = np.sort(eps_nominal)
    eps_rest = eps_nominal_sorted[n1:]  # keep upper 3/4

    eps1_new = rng.uniform(0.5, 0.8, size=n1)
    eps_hard = np.concatenate([eps1_new, eps_rest])
    eps_hard.sort()
    return eps_hard


# Precompute and print eps vectors
eps_nominal = {}
eps_hard = {}

print("=== Nominal epsilon vectors ===")
for M in Ms:
    eps_nominal[M] = sample_eps_nominal(M, rng_eps)
    print(f"M={M}, nominal eps:")
    print(eps_nominal[M])

print("\n=== Hard epsilon vector (M=40) ===")
eps_hard[M_hard] = sample_eps_hard_from_nominal(M_hard, eps_nominal[M_hard], rng_eps)
print(f"M={M_hard}, hard eps:")
print(eps_hard[M_hard])

In [None]:
# Algorithm list (clear mapping to modes)
# Using ACK-SUCCESS as the chosen feedback scheme for SP2-Feedback and TPG.
algs = [
    ('SAE', True, 'Scheduled'),                      # SP2 baseline (no feedback)
    ('SAE', True, 'Feedback', FEEDBACK_ACK_SUCCESS), # SP2 with feedback (ACK)
    ('SAE', True, 'TPG', FEEDBACK_ACK_SUCCESS),      # main TPG (ACK)
]

In [None]:
# For plots: color/style mapping
colors = {'SP2':'tab:blue', 'SP2-Feedback':'tab:orange', 'TPG':'tab:green'}
linestyles = {'SP2':'-', 'SP2-Feedback':':', 'TPG':'--'}

### Quick smoke test (small T)
Set `RUN_SMALL` to True to validate wiring without long runs.

In [None]:
## Quick smoke test (small T)
RUN_SMALL = True

if RUN_SMALL:
    cfg = {'k':5, 'm':3, 'iters':200, 'episodes':5, 'var':1, 'mu':'random'}
    eps_vec = np.array([0.7, 0.9, 0.99])

    vars_small = run_episodes_with_same_erasures(
        algs,
        iters=cfg['iters'],
        k=cfg['k'],
        episodes=cfg['episodes'],
        m=cfg['m'],
        var=cfg['var'],
        mu=cfg['mu'],
        eps=eps_vec,
        base_actions=np.random.randint(cfg['k'], size=(cfg['m'],)),
        feedback_mode=[a[3] if len(a)==4 else FEEDBACK_NONE for a in algs],
        rng_seed=RNG_SEED,
    )

    print("Summary at E_fb=0.1:")
    display(summarize(vars_small, energy_fb=0.1, energy_tx=ENERGY_TX))
    print("Summary for multiple E_fb/E_tx ratios:")
    display(summarize_multiE(vars_small, RATIOS, energy_tx=ENERGY_TX))

### Full experiments
Toggle `RUN_FULL` to execute paper-scale experiments. Adjust M or epsilon scenarios below if needed.

In [None]:
## Full experiments (paper-scale simulations)
# Run once, then reuse in plotting cells.

# Reset global seed so full experiments are reproducible regardless of smoke test
np.random.seed(RNG_SEED)

os.environ['RUN_MAB_PARALLEL'] = '1'  # if helper_methods uses this flag

RUN_FULL = True

T_FULL = 50000
EPISODES_FULL = 100
VAR = 1.0
MU = 'random'

# Global caches
full_results = {}      # (M, 'nominal' or 'hard') -> vars_out
full_summary_rows = [] # for summary table (at a default E_fb/E_tx, say 0.1)

if RUN_FULL:
    fb_list = [a[3] if len(a)==4 else FEEDBACK_NONE for a in algs]

    for M in Ms:
        # --- Nominal scenario ---
        eps_vec_nom = eps_nominal[M]
        base_actions = np.random.randint(10, size=(M,))
        print(f"Running nominal: M={M}")
        vars_out_nom = run_episodes_with_same_erasures(
            algs,
            iters=T_FULL,
            k=10,
            episodes=EPISODES_FULL,
            m=M,
            var=VAR,
            mu=MU,
            eps=eps_vec_nom,
            base_actions=base_actions,
            feedback_mode=fb_list,
            rng_seed=RNG_SEED,
        )
        full_results[(M, 'nominal')] = vars_out_nom

        df_nom = summarize(vars_out_nom, energy_fb=0.1, energy_tx=ENERGY_TX)
        df_nom.insert(0, 'M', M)
        df_nom.insert(1, 'eps_tag', 'nominal')
        full_summary_rows.append(df_nom)

    # --- Hard scenario only for M=40 ---
    M_hard = 40
    if M_hard in eps_hard:
        eps_vec_hard = eps_hard[M_hard]
        base_actions = np.random.randint(10, size=(M_hard,))
        print(f"Running hard: M={M_hard}")
        vars_out_hard = run_episodes_with_same_erasures(
            algs,
            iters=T_FULL,
            k=10,
            episodes=EPISODES_FULL,
            m=M_hard,
            var=VAR,
            mu=MU,
            eps=eps_vec_hard,
            base_actions=base_actions,
            feedback_mode=fb_list,
            rng_seed=RNG_SEED,
        )
        full_results[(M_hard, 'hard')] = vars_out_hard

        df_hard = summarize(vars_out_hard, energy_fb=0.1, energy_tx=ENERGY_TX)
        df_hard.insert(0, 'M', M_hard)
        df_hard.insert(1, 'eps_tag', 'hard')
        full_summary_rows.append(df_hard)

    if full_summary_rows:
        full_summary_df = pd.concat(full_summary_rows, ignore_index=True)
        print("=== Full summary at E_fb/E_tx = 0.1 ===")
        display(full_summary_df)
    else:
        full_summary_df = None

# Convenience caches for plotting
cache_nominal = {M: full_results[(M, 'nominal')] for M in Ms if (M, 'nominal') in full_results}
cache_hard = full_results.get((40, 'hard'), None)

In [None]:
# Save full experiment results to disk
save_path = os.path.join(RESULTS_DIR, 'full_results.pkl')
with open(save_path, 'wb') as f:
    pickle.dump({
        'full_results': full_results,
        'eps_nominal': eps_nominal,
        'eps_hard': eps_hard,
        'config': {
            'T': T_FULL, 'episodes': EPISODES_FULL, 'K': 10,
            'VAR': VAR, 'MU': MU, 'RNG_SEED': RNG_SEED,
        }
    }, f)
print(f"Saved full results to {os.path.abspath(save_path)}")

# Also save summary table
full_summary_df.to_csv(os.path.join(RESULTS_DIR, 'full_summary.csv'), index=False)
print("Saved summary CSV")

### Feedback scheme sweep (TPG and SP2-Feedback)

In [None]:
### Feedback scheme sweep (SP2-Feedback and TPG)

M_sweep = 40
T_sweep = 20000      # shorter horizon for ablation; can set = T_FULL if desired
EPISODES_sweep = 20  # fewer episodes for ablation

# Use the same "hard" epsilon vector as in the main M=40 hard experiment
if 'eps_hard' not in globals() or M_sweep not in eps_hard:
    raise RuntimeError("eps_hard not defined for M_sweep. Run the eps sampling cell first.")
eps_vec_sweep = eps_hard[M_sweep]

# Reproducible base actions for this sweep
rng_base = np.random.default_rng(RNG_SEED + 10)
base_actions_sweep = rng_base.integers(10, size=(M_sweep,))

feedback_modes = [FEEDBACK_ALL, FEEDBACK_BEACON, FEEDBACK_ACK_SUCCESS, FEEDBACK_NACK_ERASE]

# Algorithms: SP2-Feedback and TPG; feedback_mode overridden per sweep
alg_feedback = [
    ('SAE', True, 'Feedback', None),  # SP2-Feedback; fb override set below
    ('SAE', True, 'TPG', None),       # TPG; fb override set below
]

feedback_sweep_rows = []

for fb_mode in feedback_modes:
    # Attach the chosen feedback mode as the 4th element in each alg tuple
    algs_sweep = [(a[0], a[1], a[2], fb_mode) for a in alg_feedback]

    vars_sweep = run_episodes_with_same_erasures(
        algs_sweep,
        iters=T_sweep,
        k=10,
        episodes=EPISODES_sweep,
        m=M_sweep,
        var=VAR,
        mu=MU,
        eps=eps_vec_sweep,
        base_actions=base_actions_sweep,
        rng_seed=RNG_SEED,
    )

    # Summarize for multiple E_fb/E_tx ratios (RATIOS) at this sweep setting
    df = summarize_multiE(vars_sweep, RATIOS, energy_tx=ENERGY_TX)
    df.insert(0, 'feedback_mode', fb_mode)
    feedback_sweep_rows.append(df)

if feedback_sweep_rows:
    feedback_sweep_df = pd.concat(feedback_sweep_rows, ignore_index=True)
    print("=== Feedback sweep summary (multiple E_fb/E_tx ratios) ===")
    display(feedback_sweep_df)
else:
    feedback_sweep_df = None

### Regret vs time (M=4,20,40)

In [None]:
# --- Load saved results (skip this cell if you just ran the experiments above) ---
# Uncomment the block below to reload from disk instead of re-running experiments.

# load_path = os.path.join(RESULTS_DIR, 'full_results.pkl')
# with open(load_path, 'rb') as f:
#     saved = pickle.load(f)
# full_results = saved['full_results']
# eps_nominal = saved['eps_nominal']
# eps_hard = saved['eps_hard']
# cache_nominal = {M: full_results[(M, 'nominal')] for M in Ms if (M, 'nominal') in full_results}
# cache_hard = full_results.get((40, 'hard'), None)
# print(f"Loaded results from {load_path}")

In [None]:
### Regret vs time for M=4,20,40 (nominal eps)

if not cache_nominal:
    raise RuntimeError("cache_nominal is empty. Run the full experiments cell first.")

Ms_plot = [4, 20, 40]
subplot_names = ['(a)', '(b)', '(c)']

# --- Font size (ADJUST HERE) ---
FONT_SIZE = 14       # axis labels, legend
TICK_SIZE = 12       # tick labels
TITLE_SIZE = 14      # subplot titles (if used)

dpi = 300
linewidth = 2.5
x_upper = 1e4
y_upper = 8500

clrs = ["tab:blue", "tab:orange", "tab:green"]
lstyles = ["solid", ":", "--"]
alg_order = ["SP2", "SP2-Feedback", "TPG"]

fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), dpi=dpi)

for subplot_idx, M in enumerate(Ms_plot):
    ax = axes[subplot_idx]
    vars_plot = cache_nominal[M]

    name_to_data = {}
    for alg in algs:
        data = vars_plot[alg]
        name_to_data[data['name']] = data

    for i, alg_name in enumerate(alg_order):
        data = name_to_data[alg_name]
        reg = np.sum(data['regret'], axis=1)
        cum_reg = np.cumsum(reg)
        t_axis = np.arange(1, cum_reg.shape[0] + 1)

        ax.plot(t_axis, cum_reg, linewidth=linewidth,
                color=clrs[i], linestyle=lstyles[i], label=alg_name)

    if subplot_idx == 0:
        ax.set_ylabel("Regret ($R_t$)", fontsize=FONT_SIZE)

    ax.set_xlabel("Rounds ($t$)\n" + subplot_names[subplot_idx], fontsize=FONT_SIZE)
    ax.ticklabel_format(style='scientific', axis='both', scilimits=(0, 1))
    ax.tick_params(labelsize=TICK_SIZE)
    ax.xaxis.offsetText.set_fontsize(TICK_SIZE)
    ax.yaxis.offsetText.set_fontsize(TICK_SIZE)
    ax.set_xlim([-10, x_upper])
    if y_upper is not None:
        ax.set_ylim([0, y_upper])
    ax.legend(fontsize=FONT_SIZE - 2)
    ax.grid(linewidth=0.15)

fig.tight_layout()
fig_path = os.path.join(RESULTS_DIR, 'k10_m4-20-40_nominal_jsait.png')
fig.savefig(fig_path, dpi=dpi, bbox_inches='tight')
print(f"Saved: {fig_path}")
plt.show()

### Regret vs time (hard instance, M=40, high eps)

In [None]:
### Regret vs time (M=40, hard eps)

if cache_hard is None:
    raise RuntimeError("cache_hard is empty. Run the full experiments cell first.")

dpi = 300
linewidth = 2.5
x_upper = 1e4
y_upper = 8900

clrs = ["tab:blue", "tab:orange", "tab:green"]
lstyles = ["solid", ":", "--"]
alg_order = ["SP2", "SP2-Feedback", "TPG"]

vars_plot = cache_hard

fig, ax = plt.subplots(figsize=(5.5, 4.5), dpi=dpi)

name_to_data = {}
for alg in algs:
    data = vars_plot[alg]
    name_to_data[data['name']] = data

for i, alg_name in enumerate(alg_order):
    data = name_to_data[alg_name]
    reg = np.sum(data['regret'], axis=1)
    cum_reg = np.cumsum(reg)
    t_axis = np.arange(1, cum_reg.shape[0] + 1)

    ax.plot(t_axis, cum_reg, linewidth=linewidth,
            color=clrs[i], linestyle=lstyles[i], label=alg_name)

ax.set_xlabel("Rounds ($t$)", fontsize=FONT_SIZE)
ax.set_ylabel("Regret ($R_t$)", fontsize=FONT_SIZE)
ax.ticklabel_format(style='scientific', axis='both', scilimits=(0, 1))
ax.tick_params(labelsize=TICK_SIZE)
ax.xaxis.offsetText.set_fontsize(TICK_SIZE)
ax.yaxis.offsetText.set_fontsize(TICK_SIZE)
ax.set_xlim([-10, x_upper])
if y_upper is not None:
    ax.set_ylim([0, y_upper])
ax.legend(fontsize=FONT_SIZE - 2)
ax.grid(linewidth=0.15)

fig.tight_layout()
fig_path = os.path.join(RESULTS_DIR, 'k10_m40_eps_hard_jsait.png')
fig.savefig(fig_path, dpi=dpi, bbox_inches='tight')
print(f"Saved: {fig_path}")
plt.show()