In [29]:
# =========================
# Summarize PPO training metrics at selected iterations
# =========================
import os, numpy as np, pandas as pd

def summarize_ppo_training(
    logdir,
    selected_iterations=(2,6,8,10,12,14,16,20,25),
    out_prefix="ppo_training_metrics_selected"
):
    progress_csv = os.path.join(logdir, "progress.csv")
    if not os.path.exists(progress_csv):
        raise FileNotFoundError(f"progress.csv not found at: {progress_csv}")

    df = pd.read_csv(progress_csv)

    # Column handles (present across SB3 versions; fallbacks if missing)
    it_col   = "time/iterations"      if "time/iterations"      in df.columns else None
    ts_col   = "time/total_timesteps" if "time/total_timesteps" in df.columns else None
    rew_col  = "rollout/ep_rew_mean"  if "rollout/ep_rew_mean"  in df.columns else None
    pol_col  = "train/policy_gradient_loss" if "train/policy_gradient_loss" in df.columns else None
    val_col  = "train/value_loss"            if "train/value_loss"            in df.columns else None
    ent_col  = "train/entropy_loss"          if "train/entropy_loss"          in df.columns else None
    kl_col   = "train/approx_kl"             if "train/approx_kl"             in df.columns else None

    # Fallbacks (keep table usable even if some keys are absent)
    if it_col is None:
        df["__iter__"] = np.arange(1, len(df)+1)
        it_col = "__iter__"
    if ts_col is None:
        df["__ts__"] = np.arange(1, len(df)+1)
        ts_col = "__ts__"
    if rew_col is None:
        df["__rew__"] = np.nan
        rew_col = "__rew__"

    def nearest_row_for_iter(target_it):
        idx = (df[it_col] - target_it).abs().idxmin()
        row = df.loc[idx]
        return {
            "Iteration":   int(round(row[it_col])) if pd.notnull(row[it_col]) else int(target_it),
            "Timesteps":   int(round(row[ts_col])) if pd.notnull(row[ts_col]) else int(idx+1),
            "Ep. Reward":  float(row[rew_col])     if pd.notnull(row[rew_col]) else np.nan,
            # Report entropy_loss under a single "Loss" column to match your example
            "Loss":        float(row[ent_col])     if ent_col and pd.notnull(row[ent_col]) else np.nan,
            "Policy Loss": float(row[pol_col])     if pol_col and pd.notnull(row[pol_col]) else np.nan,
            "Value Loss":  float(row[val_col])     if val_col and pd.notnull(row[val_col]) else np.nan,
            "KL Div.":     float(row[kl_col])      if kl_col  and pd.notnull(row[kl_col])  else np.nan,
        }

    rows = [nearest_row_for_iter(it) for it in selected_iterations]
    out = pd.DataFrame(rows)

    # Format like your sample
    def fmt(x, d=5):
        if pd.isna(x): return ""
        if isinstance(x, (int, np.integer)): return f"{x:d}"
        return f"{x:.{d}f}"
    out_fmt = out.copy()
    out_fmt["Iteration"]   = out_fmt["Iteration"].apply(lambda v: fmt(v, 0))
    out_fmt["Timesteps"]   = out_fmt["Timesteps"].apply(lambda v: fmt(v, 0))
    out_fmt["Ep. Reward"]  = out_fmt["Ep. Reward"].apply(lambda v: fmt(v, 2))
    out_fmt["Loss"]        = out_fmt["Loss"].apply(lambda v: fmt(v, 5))
    out_fmt["Policy Loss"] = out_fmt["Policy Loss"].apply(lambda v: fmt(v, 5))
    out_fmt["Value Loss"]  = out_fmt["Value Loss"].apply(lambda v: fmt(v, 2))
    out_fmt["KL Div."]     = out_fmt["KL Div."].apply(lambda v: fmt(v, 5))

    # Save
    csv_path = f"{out_prefix}.csv"
    tex_path = f"{out_prefix}.tex"
    out_fmt.to_csv(csv_path, index=False)

    # LaTeX (simple tabular with caption/label)
    latex = out_fmt.to_latex(
        index=False,
        caption="Summary of the proposed PPO Agent’s Training Metrics Across Selected Iterations.",
        label="tab:ppo_training_metrics",
        escape=False
    )
    with open(tex_path, "w") as f:
        f.write(latex)

    return out_fmt, csv_path, tex_path

# ==== Usage ====
# Use your actual log directory:
LOGDIR = "./logs/ppo_seq"   # or "./logs/ppo_static"
table, csv_path, tex_path = summarize_ppo_training(LOGDIR)

print(table)
print("Saved:", csv_path)
print("Saved:", tex_path)


  Iteration Timesteps Ep. Reward      Loss Policy Loss Value Loss  KL Div.
0         2      4096      -0.77  -0.66532    -0.12411       6.50  0.02943
1         6     12288       1.72  -0.04092    -0.00471       1.54  0.00117
2         8     16384       1.76  -0.01346    -0.00242       1.45  0.00060
3        10     20480       1.69  -0.00701    -0.00078       1.37  0.00012
4        12     24576       1.78  -0.00771    -0.00091       1.26  0.00049
5        14     28672       1.87  -0.00686    -0.00124       1.37  0.00059
6        16     32768       1.79  -0.00652    -0.00098       1.35  0.00044
7        20     40960       1.67  -0.00403    -0.00073       1.27  0.00042
8        25     51200       1.72  -0.00624    -0.00062       1.31  0.00104
Saved: ppo_training_metrics_selected.csv
Saved: ppo_training_metrics_selected.tex


In [66]:
import os
import pandas as pd
import numpy as np

# Path to your PPO log folder (where progress.csv is saved)
logdir = "./logs/ppo_seq"  # change to your PPO run directory
progress_csv = os.path.join(logdir, "progress.csv")

# Read progress.csv
df = pd.read_csv(progress_csv)

# Columns to extract (adjust names if SB3 version changes)
cols_map = {
    "Time-steps": "time/total_timesteps",
    "Ep. Len": "rollout/ep_len_mean",
    "Ep. Reward": "rollout/ep_rew_mean",
    "Policy Grad. Loss": "train/policy_gradient_loss",
    "Value Loss": "train/value_loss",
    "Entropy Loss": "train/entropy_loss",
    "KL Divergence": "train/approx_kl"
}

# Create output DataFrame
out_df = pd.DataFrame()
for pretty, col in cols_map.items():
    if col in df.columns:
        out_df[pretty] = df[col]
    else:
        out_df[pretty] = np.nan  # fill if missing

# Compute Total Loss (sum of policy, value, and entropy losses)
out_df["Total Loss"] = (
    out_df["Policy Grad. Loss"].fillna(0) +
    out_df["Value Loss"].fillna(0) +
    out_df["Entropy Loss"].fillna(0)
)

# Optionally pick specific iterations (example: every 2nd row up to 25 rows)
selected_rows = [2,6,8,10,12,14,16,20,25]
summary_df = out_df.iloc[selected_rows].reset_index(drop=True)

# Save CSV and LaTeX table
summary_df.to_csv("ppo_training_summary.csv", index=False)
with open("ppo_training_summary.tex", "w") as f:
    f.write(summary_df.to_latex(index=False, caption="PPO Training Metrics Summary", label="tab:ppo_metrics", escape=False))

print(summary_df)


   Time-steps  Ep. Len  Ep. Reward  Policy Grad. Loss  Value Loss  \
0        6144     1.39     -0.1759          -0.141333    5.414640   
1       14336     1.56      1.6464          -0.004921    1.482717   
2       18432     1.44      1.7356          -0.002416    1.528543   
3       22528     1.68      1.8552          -0.001066    1.382684   
4       26624     1.34      1.4866          -0.000679    1.237726   
5       30720     1.40      1.6060          -0.000026    1.355595   
6       34816     1.54      1.7146          -0.000293    1.224432   
7       43008     1.52      1.8148          -0.000605    1.222519   
8       53248     1.70      1.9430          -0.000484    1.315318   

   Entropy Loss  KL Divergence  Total Loss  
0     -0.565204       0.038563    4.708103  
1     -0.021416       0.002808    1.456380  
2     -0.009052       0.001057    1.517075  
3     -0.005384       0.000518    1.376234  
4     -0.006442       0.000410    1.230606  
5     -0.007462       0.000164    1.348

In [74]:
# ============================================
# Journal-quality plots from ppo_training_metrics_selected.csv
# - Episode Reward vs. Training Steps
# - KL Divergence vs. Training Steps
# Exports: PNG (300 DPI) + PDF (vector)
# ============================================
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

CSV_PATH   = "ppo_training_metrics_selected.csv"  # <- set if different
OUT_PREFIX = "ppo_selected"                       # output filename prefix
SMOOTH_FRAC = 0.3   # 0 = raw only; 0.2–0.4 works well for sparse points
SHOW_POINTS = True  # draw small markers at actual points

# ---- Matplotlib defaults (journal style) ----
mpl.rcParams.update({
    "savefig.dpi": 300,
    "font.size": 9,
    "axes.labelsize": 9,
    "axes.titlesize": 9,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "legend.fontsize": 8,
    "pdf.fonttype": 42,
    "ps.fonttype": 42,
    "font.family": "serif",
})

def _rolling_centered(series: pd.Series, frac: float):
    """Centered rolling mean with a safe window for few points."""
    if not frac or frac <= 0 or len(series) < 5:
        return series
    w = max(3, int(round(len(series) * frac)))
    return series.rolling(w, center=True, min_periods=max(1, w//2)).mean()

def _pick(df: pd.DataFrame, candidates):
    """Pick the first existing column from candidates."""
    for c in candidates:
        if c in df.columns:
            return c
    return None

def _format_thousands(ax):
    ax.xaxis.set_major_formatter(FuncFormatter(lambda x, pos: f"{int(x):,}" if x == int(x) else f"{x:,.0f}"))

def _style_axes(ax, xlabel, ylabel):
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    _format_thousands(ax)
    ax.grid(True, lw=0.4, alpha=0.45)
    for spine in ["top", "right"]:
        ax.spines[spine].set_visible(False)

def _plot_series(x, y, ylabel, out_suffix):
    # Clean NaNs and sort
    m = ~(x.isna() | y.isna())
    x, y = x[m], y[m]
    if len(x) == 0:
        print(f"[Warn] No data for {ylabel}; skipping.")
        return
    order = np.argsort(x.values)
    x = x.iloc[order].astype(float)
    y = y.iloc[order].astype(float)

    y_s = _rolling_centered(y, SMOOTH_FRAC)

    fig = plt.figure(figsize=(3.5, 2.4))  # single-column figure
    ax = fig.add_subplot(111)

    # Raw (faint) + smoothed (bold)
    if SHOW_POINTS:
        ax.plot(x, y, lw=0.9, alpha=0.35, marker='o', markersize=3, label="raw")
    else:
        ax.plot(x, y, lw=0.9, alpha=0.35, label="raw")

    ax.plot(x, y_s if len(y_s) else y, lw=1.9, label="smoothed")

    # Labels, grid, legend
    _style_axes(ax, "Training steps", ylabel)
    ax.legend(loc="best", frameon=False)

    fig.tight_layout()
    fig.savefig(f"{OUT_PREFIX}_{out_suffix}.png", bbox_inches="tight")
    fig.savefig(f"{OUT_PREFIX}_{out_suffix}.pdf", bbox_inches="tight")
    plt.close(fig)
    print(f"[Saved] {OUT_PREFIX}_{out_suffix}.png / .pdf")

# ======== Load and resolve columns ========
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV not found: {CSV_PATH}")
df = pd.read_csv(CSV_PATH)

steps_col = _pick(df, ["Timesteps", "Time-steps", "time/total_timesteps", "Steps"])
rew_col   = _pick(df, ["Ep. Reward", "Episode Reward", "rollout/ep_rew_mean"])
kl_col    = _pick(df, ["KL Divergence", "KL Div.", "train/approx_kl"])

# Fallback: if no steps column, use row index
if steps_col is None:
    df["__steps__"] = np.arange(1, len(df) + 1)
    steps_col = "__steps__"

# Cast numeric
for c in [steps_col, rew_col, kl_col]:
    if c is not None:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# ======== Plot figures ========
if rew_col is not None:
    _plot_series(df[steps_col], df[rew_col], ylabel="Episode reward (mean)", out_suffix="epreward")
else:
    print("[Warn] No episode reward column found.")

if kl_col is not None:
    _plot_series(df[steps_col], df[kl_col], ylabel="Approx. KL divergence", out_suffix="kl")
else:
    print("[Warn] No KL divergence column found.")


[Saved] ppo_selected_epreward.png / .pdf
[Saved] ppo_selected_kl.png / .pdf


In [76]:
# ============================================
# PPO plots from ppo_training_metrics_selected.csv
# - Episode Reward vs. Training Steps
# - KL Divergence vs. Training Steps
# No smoothing, smaller labels, PNG 300 DPI + PDF
# ============================================
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

CSV_PATH   = "ppo_training_metrics_selected.csv"  # change if needed
OUT_PREFIX = "ppo_selected"                       # output file prefix

# --- Compact journal-ish style (smaller labels) ---
mpl.rcParams.update({
    "savefig.dpi": 300,
    "font.size": 8,
    "axes.labelsize": 8,
    "axes.titlesize": 8,
    "xtick.labelsize": 7,
    "ytick.labelsize": 7,
    "legend.fontsize": 7,
    "pdf.fonttype": 42,
    "ps.fonttype": 42,
    "font.family": "serif",
})

def pick_col(df: pd.DataFrame, candidates):
    for c in candidates:
        if c in df.columns:
            return c
    return None

def format_thousands(ax):
    ax.xaxis.set_major_formatter(
        FuncFormatter(lambda x, pos: f"{int(x):,}" if x == int(x) else f"{x:,.0f}")
    )

def style_axes(ax, xlabel, ylabel):
    ax.set_xlabel(xlabel, labelpad=2)
    ax.set_ylabel(ylabel, labelpad=2)
    ax.grid(True, lw=0.35, alpha=0.45)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    format_thousands(ax)

def plot_line(x, y, ylabel, suffix):
    m = ~(x.isna() | y.isna())
    x, y = x[m], y[m]
    if len(x) == 0:
        print(f"[Warn] No data for {suffix}; skipping.")
        return
    order = np.argsort(x.values)
    x = x.iloc[order].astype(float)
    y = y.iloc[order].astype(float)

    fig = plt.figure(figsize=(3.3, 2.2))  # a bit smaller than before
    ax = fig.add_subplot(111)
    ax.plot(x, y, lw=1.4)                 # single clean line, no markers
    style_axes(ax, "Training steps", ylabel)
    fig.tight_layout(pad=0.6)
    fig.savefig(f"{OUT_PREFIX}_{suffix}.png", dpi=300, bbox_inches="tight")
    fig.savefig(f"{OUT_PREFIX}_{suffix}.pdf", bbox_inches="tight")
    plt.close(fig)
    print(f"[Saved] {OUT_PREFIX}_{suffix}.png / .pdf")

# ---- Load CSV ----
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV not found: {CSV_PATH}")
df = pd.read_csv(CSV_PATH)

# Resolve columns (robust to name variants)
steps_col = pick_col(df, ["Timesteps", "Time-steps", "time/total_timesteps", "Steps"])
rew_col   = pick_col(df, ["Ep. Reward", "Episode Reward", "rollout/ep_rew_mean"])
kl_col    = pick_col(df, ["KL Divergence", "KL Div.", "train/approx_kl"])

# Fallback for steps
if steps_col is None:
    df["__steps__"] = np.arange(1, len(df) + 1)
    steps_col = "__steps__"

# Cast numeric
for c in [steps_col, rew_col, kl_col]:
    if c is not None:
        df[c] = pd.to_numeric(df[c], errors="coerce")

# --- Plot Episode Reward ---
if rew_col is not None:
    plot_line(df[steps_col], df[rew_col], ylabel="Episode reward (mean)", suffix="epreward")
else:
    print("[Warn] Episode reward column not found; skipping.")

# --- Plot KL Divergence ---
if kl_col is not None:
    plot_line(df[steps_col], df[kl_col], ylabel="Approx. KL divergence", suffix="kl")
else:
    print("[Warn] KL divergence column not found; skipping.")


[Saved] ppo_selected_epreward.png / .pdf
[Saved] ppo_selected_kl.png / .pdf
