# PPO Telemetry Quick Look

Load PPO NDJSON logs (see `docs/samples/ppo_conflict_telemetry.jsonl`) and plot core metrics.

In [None]:
from __future__ import annotations

import json
from pathlib import Path

LOG_PATH = Path('..') / 'samples' / 'ppo_conflict_telemetry.jsonl'
records = [
    json.loads(line)
    for line in LOG_PATH.read_text().splitlines()
    if line.strip()
]
records[:3]  # peek

In [None]:
epochs = [float(entry['epoch']) for entry in records]
loss_total = [float(entry['loss_total']) for entry in records]
kl = [float(entry['kl_divergence']) for entry in records]
grad_norm = [float(entry['grad_norm']) for entry in records]
reward_sum = [float(entry.get('baseline_reward_sum', 0.0)) for entry in records]
rivalry_mean = [float(entry['conflict.rivalry_max_mean_avg']) for entry in records]
baseline_reward = [float(entry.get('baseline_reward_sum', 0.0)) for entry in records]
epochs, loss_total, kl, grad_norm, rivalry_mean, baseline_reward

In [None]:
try:
    import matplotlib.pyplot as plt
except ModuleNotFoundError:
    plt = None

if plt is None:
    print('matplotlib not available; showing tabular summary instead.')
    for epoch, loss, kl_val, grad in zip(epochs, loss_total, kl, grad_norm):
        print(f'Epoch {epoch:>3.0f}: loss_total={loss:.4f}, kl_divergence={kl_val:.4f}, grad_norm={grad:.4f}')
else:
    fig, axes = plt.subplots(3, 1, figsize=(8, 9), sharex=True)
    axes[0].plot(epochs, loss_total, marker='o')
    axes[0].set_ylabel('loss_total')
    axes[1].plot(epochs, kl, marker='o', color='tab:orange')
    axes[1].set_ylabel('kl_divergence')
    axes[2].plot(epochs, grad_norm, marker='o', color='tab:green')
    axes[2].set_ylabel('grad_norm')
    axes[2].set_xlabel('epoch')
    fig.tight_layout()
    plt.show()

In [None]:
print(f"Data mode (latest): {records[-1]['data_mode']}")
print(f"Cycle id (latest): {records[-1]['cycle_id']}")
print(f"Epoch duration (sec): {records[-1]['epoch_duration_sec']:.4f}")
print(f"Total reward sum: {reward_sum[-1]:.4f}")
baseline_reward_sum = baseline_reward[-1] if baseline_reward else 0.0
print(f'Baseline reward sum (final epoch): {baseline_reward_sum:.4f}')
print(f'Rivalry max mean (final epoch): {rivalry_mean[-1]:.4f}')