In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import shutil
import torch
from pathlib import Path
from runner import Runner
import interest_evolution
# from full_slate_q_agent import FullSlateQAgent
# from ppo  import PPOAgentWrapper
from bandit import BanditAgentWrapper
from random_agent import RandomAgent

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
seed = 0
np.random.seed(seed)

env_config = {
    'num_candidates': 10,
    'slate_size': 2,
    'resample_documents': True,
    'seed': seed,
}

In [5]:
tmp_base_dir = './logs/random'

# Automatically delete train and eval directories if they exist
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

# Recreate base log directory
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

def create_random_agent(env, **kwargs):
    return RandomAgent(action_space=env.action_space)
    
# --- Initialize training runner ---
runner_random = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_random_agent,
    env=interest_evolution.create_environment(env_config),
)

# --- Run training + evaluation ---
runner_random.run_training(max_training_steps=2000, num_iterations=100)
runner_random.run_evaluation(max_eval_episodes=5)

Training Episode:   2%|▏         | 2/100 [00:00<00:14,  6.57it/s]

[TRAIN][Step 2010] AvgLen: 35.26 | AvgRew: 74.30 | StdRew: 9.23 | Time/Step: 0.0001
[TRAIN][Step 4034] AvgLen: 34.90 | AvgRew: 74.98 | StdRew: 8.64 | Time/Step: 0.0001


Training Episode:   4%|▍         | 4/100 [00:00<00:14,  6.54it/s]

[TRAIN][Step 6072] AvgLen: 37.05 | AvgRew: 75.36 | StdRew: 7.20 | Time/Step: 0.0001
[TRAIN][Step 8095] AvgLen: 38.17 | AvgRew: 77.62 | StdRew: 11.40 | Time/Step: 0.0001


Training Episode:   6%|▌         | 6/100 [00:00<00:14,  6.61it/s]

[TRAIN][Step 10106] AvgLen: 37.94 | AvgRew: 75.27 | StdRew: 8.77 | Time/Step: 0.0001
[TRAIN][Step 12109] AvgLen: 35.77 | AvgRew: 74.48 | StdRew: 8.12 | Time/Step: 0.0001


Training Episode:   8%|▊         | 8/100 [00:01<00:14,  6.56it/s]

[TRAIN][Step 14131] AvgLen: 38.88 | AvgRew: 75.98 | StdRew: 9.92 | Time/Step: 0.0001
[TRAIN][Step 16163] AvgLen: 35.65 | AvgRew: 75.76 | StdRew: 9.01 | Time/Step: 0.0001


Training Episode:  10%|█         | 10/100 [00:01<00:14,  6.33it/s]

[TRAIN][Step 18164] AvgLen: 37.06 | AvgRew: 76.28 | StdRew: 9.93 | Time/Step: 0.0001
[TRAIN][Step 20213] AvgLen: 37.94 | AvgRew: 73.45 | StdRew: 8.21 | Time/Step: 0.0001


Training Episode:  12%|█▏        | 12/100 [00:01<00:13,  6.34it/s]

[TRAIN][Step 22222] AvgLen: 36.53 | AvgRew: 75.44 | StdRew: 11.36 | Time/Step: 0.0001
[TRAIN][Step 24240] AvgLen: 36.69 | AvgRew: 77.48 | StdRew: 9.58 | Time/Step: 0.0001


Training Episode:  14%|█▍        | 14/100 [00:02<00:13,  6.36it/s]

[TRAIN][Step 26256] AvgLen: 36.65 | AvgRew: 74.22 | StdRew: 8.07 | Time/Step: 0.0001
[TRAIN][Step 28282] AvgLen: 37.52 | AvgRew: 76.87 | StdRew: 8.73 | Time/Step: 0.0001


Training Episode:  16%|█▌        | 16/100 [00:02<00:13,  6.42it/s]

[TRAIN][Step 30325] AvgLen: 36.48 | AvgRew: 75.56 | StdRew: 8.37 | Time/Step: 0.0001
[TRAIN][Step 32370] AvgLen: 37.18 | AvgRew: 75.63 | StdRew: 9.21 | Time/Step: 0.0001


Training Episode:  18%|█▊        | 18/100 [00:02<00:13,  6.24it/s]

[TRAIN][Step 34379] AvgLen: 40.18 | AvgRew: 74.66 | StdRew: 7.22 | Time/Step: 0.0001
[TRAIN][Step 36403] AvgLen: 36.80 | AvgRew: 74.07 | StdRew: 8.22 | Time/Step: 0.0001


Training Episode:  20%|██        | 20/100 [00:03<00:12,  6.22it/s]

[TRAIN][Step 38429] AvgLen: 36.84 | AvgRew: 75.93 | StdRew: 8.01 | Time/Step: 0.0001
[TRAIN][Step 40444] AvgLen: 35.98 | AvgRew: 72.94 | StdRew: 8.20 | Time/Step: 0.0001


Training Episode:  22%|██▏       | 22/100 [00:03<00:12,  6.41it/s]

[TRAIN][Step 42453] AvgLen: 35.88 | AvgRew: 75.46 | StdRew: 10.00 | Time/Step: 0.0001
[TRAIN][Step 44455] AvgLen: 37.07 | AvgRew: 74.80 | StdRew: 8.37 | Time/Step: 0.0001


Training Episode:  24%|██▍       | 24/100 [00:03<00:11,  6.47it/s]

[TRAIN][Step 46464] AvgLen: 36.53 | AvgRew: 72.48 | StdRew: 7.49 | Time/Step: 0.0001
[TRAIN][Step 48478] AvgLen: 35.33 | AvgRew: 76.94 | StdRew: 8.19 | Time/Step: 0.0001


Training Episode:  26%|██▌       | 26/100 [00:04<00:11,  6.50it/s]

[TRAIN][Step 50478] AvgLen: 37.74 | AvgRew: 73.99 | StdRew: 9.63 | Time/Step: 0.0001
[TRAIN][Step 52511] AvgLen: 37.65 | AvgRew: 75.68 | StdRew: 9.30 | Time/Step: 0.0001


Training Episode:  28%|██▊       | 28/100 [00:04<00:11,  6.47it/s]

[TRAIN][Step 54539] AvgLen: 36.21 | AvgRew: 74.70 | StdRew: 7.77 | Time/Step: 0.0001
[TRAIN][Step 56556] AvgLen: 36.02 | AvgRew: 74.80 | StdRew: 6.61 | Time/Step: 0.0001


Training Episode:  30%|███       | 30/100 [00:04<00:10,  6.43it/s]

[TRAIN][Step 58585] AvgLen: 37.57 | AvgRew: 74.97 | StdRew: 8.43 | Time/Step: 0.0001
[TRAIN][Step 60601] AvgLen: 37.33 | AvgRew: 75.47 | StdRew: 8.34 | Time/Step: 0.0001


Training Episode:  32%|███▏      | 32/100 [00:04<00:10,  6.54it/s]

[TRAIN][Step 62625] AvgLen: 35.51 | AvgRew: 73.60 | StdRew: 8.58 | Time/Step: 0.0001
[TRAIN][Step 64626] AvgLen: 38.48 | AvgRew: 78.73 | StdRew: 10.06 | Time/Step: 0.0001


Training Episode:  34%|███▍      | 34/100 [00:05<00:10,  6.36it/s]

[TRAIN][Step 66637] AvgLen: 36.56 | AvgRew: 76.74 | StdRew: 9.00 | Time/Step: 0.0001
[TRAIN][Step 68672] AvgLen: 37.00 | AvgRew: 72.48 | StdRew: 9.17 | Time/Step: 0.0001


Training Episode:  36%|███▌      | 36/100 [00:05<00:09,  6.44it/s]

[TRAIN][Step 70675] AvgLen: 38.52 | AvgRew: 75.07 | StdRew: 8.39 | Time/Step: 0.0001
[TRAIN][Step 72685] AvgLen: 37.92 | AvgRew: 75.95 | StdRew: 8.10 | Time/Step: 0.0001


Training Episode:  38%|███▊      | 38/100 [00:05<00:09,  6.41it/s]

[TRAIN][Step 74705] AvgLen: 36.07 | AvgRew: 75.42 | StdRew: 7.49 | Time/Step: 0.0001
[TRAIN][Step 76716] AvgLen: 37.24 | AvgRew: 76.72 | StdRew: 10.27 | Time/Step: 0.0001


Training Episode:  40%|████      | 40/100 [00:06<00:09,  6.45it/s]

[TRAIN][Step 78725] AvgLen: 35.88 | AvgRew: 73.72 | StdRew: 8.55 | Time/Step: 0.0001
[TRAIN][Step 80730] AvgLen: 36.45 | AvgRew: 74.02 | StdRew: 9.37 | Time/Step: 0.0001


Training Episode:  42%|████▏     | 42/100 [00:06<00:09,  6.40it/s]

[TRAIN][Step 82749] AvgLen: 36.05 | AvgRew: 74.89 | StdRew: 8.49 | Time/Step: 0.0001
[TRAIN][Step 84749] AvgLen: 36.36 | AvgRew: 73.74 | StdRew: 10.16 | Time/Step: 0.0001


Training Episode:  44%|████▍     | 44/100 [00:06<00:08,  6.41it/s]

[TRAIN][Step 86750] AvgLen: 37.75 | AvgRew: 76.62 | StdRew: 9.76 | Time/Step: 0.0001
[TRAIN][Step 88750] AvgLen: 36.36 | AvgRew: 76.47 | StdRew: 9.64 | Time/Step: 0.0001


Training Episode:  46%|████▌     | 46/100 [00:07<00:08,  6.34it/s]

[TRAIN][Step 90760] AvgLen: 37.22 | AvgRew: 73.64 | StdRew: 6.92 | Time/Step: 0.0001
[TRAIN][Step 92795] AvgLen: 38.40 | AvgRew: 74.87 | StdRew: 9.22 | Time/Step: 0.0001


Training Episode:  48%|████▊     | 48/100 [00:07<00:08,  6.29it/s]

[TRAIN][Step 94814] AvgLen: 36.71 | AvgRew: 75.55 | StdRew: 8.36 | Time/Step: 0.0001
[TRAIN][Step 96832] AvgLen: 39.57 | AvgRew: 75.89 | StdRew: 10.39 | Time/Step: 0.0001


Training Episode:  50%|█████     | 50/100 [00:07<00:07,  6.32it/s]

[TRAIN][Step 98853] AvgLen: 35.46 | AvgRew: 76.24 | StdRew: 7.96 | Time/Step: 0.0001
[TRAIN][Step 100875] AvgLen: 36.11 | AvgRew: 74.98 | StdRew: 8.32 | Time/Step: 0.0001


Training Episode:  52%|█████▏    | 52/100 [00:08<00:07,  6.33it/s]

[TRAIN][Step 102881] AvgLen: 35.82 | AvgRew: 74.22 | StdRew: 9.69 | Time/Step: 0.0001
[TRAIN][Step 104901] AvgLen: 36.07 | AvgRew: 75.11 | StdRew: 9.83 | Time/Step: 0.0001


Training Episode:  54%|█████▍    | 54/100 [00:08<00:07,  6.41it/s]

[TRAIN][Step 106911] AvgLen: 37.22 | AvgRew: 75.41 | StdRew: 8.26 | Time/Step: 0.0001
[TRAIN][Step 108917] AvgLen: 37.15 | AvgRew: 75.70 | StdRew: 8.90 | Time/Step: 0.0001


Training Episode:  56%|█████▌    | 56/100 [00:08<00:06,  6.53it/s]

[TRAIN][Step 110926] AvgLen: 37.91 | AvgRew: 74.06 | StdRew: 8.24 | Time/Step: 0.0001
[TRAIN][Step 112944] AvgLen: 38.08 | AvgRew: 76.03 | StdRew: 11.05 | Time/Step: 0.0001


Training Episode:  58%|█████▊    | 58/100 [00:09<00:06,  6.42it/s]

[TRAIN][Step 114965] AvgLen: 38.87 | AvgRew: 75.45 | StdRew: 11.24 | Time/Step: 0.0001
[TRAIN][Step 116986] AvgLen: 36.75 | AvgRew: 72.89 | StdRew: 8.50 | Time/Step: 0.0001


Training Episode:  60%|██████    | 60/100 [00:09<00:06,  6.47it/s]

[TRAIN][Step 119010] AvgLen: 38.19 | AvgRew: 76.53 | StdRew: 9.65 | Time/Step: 0.0001
[TRAIN][Step 121032] AvgLen: 37.44 | AvgRew: 73.84 | StdRew: 8.61 | Time/Step: 0.0001


Training Episode:  62%|██████▏   | 62/100 [00:09<00:05,  6.39it/s]

[TRAIN][Step 123039] AvgLen: 39.35 | AvgRew: 77.26 | StdRew: 10.90 | Time/Step: 0.0001
[TRAIN][Step 125072] AvgLen: 35.67 | AvgRew: 74.49 | StdRew: 8.24 | Time/Step: 0.0001


Training Episode:  64%|██████▍   | 64/100 [00:09<00:05,  6.37it/s]

[TRAIN][Step 127101] AvgLen: 37.57 | AvgRew: 75.92 | StdRew: 7.76 | Time/Step: 0.0001
[TRAIN][Step 129125] AvgLen: 37.48 | AvgRew: 73.31 | StdRew: 9.47 | Time/Step: 0.0001


Training Episode:  66%|██████▌   | 66/100 [00:10<00:05,  6.23it/s]

[TRAIN][Step 131138] AvgLen: 37.98 | AvgRew: 74.52 | StdRew: 7.69 | Time/Step: 0.0001
[TRAIN][Step 133158] AvgLen: 38.11 | AvgRew: 75.26 | StdRew: 8.63 | Time/Step: 0.0001


Training Episode:  68%|██████▊   | 68/100 [00:10<00:05,  6.38it/s]

[TRAIN][Step 135170] AvgLen: 35.93 | AvgRew: 73.66 | StdRew: 8.12 | Time/Step: 0.0001
[TRAIN][Step 137197] AvgLen: 36.85 | AvgRew: 72.87 | StdRew: 8.32 | Time/Step: 0.0001


Training Episode:  70%|███████   | 70/100 [00:10<00:04,  6.34it/s]

[TRAIN][Step 139218] AvgLen: 35.46 | AvgRew: 75.02 | StdRew: 10.14 | Time/Step: 0.0001
[TRAIN][Step 141241] AvgLen: 38.90 | AvgRew: 76.59 | StdRew: 8.53 | Time/Step: 0.0001


Training Episode:  72%|███████▏  | 72/100 [00:11<00:04,  6.36it/s]

[TRAIN][Step 143264] AvgLen: 35.49 | AvgRew: 76.75 | StdRew: 8.68 | Time/Step: 0.0001
[TRAIN][Step 145291] AvgLen: 38.25 | AvgRew: 76.21 | StdRew: 8.90 | Time/Step: 0.0001


Training Episode:  74%|███████▍  | 74/100 [00:11<00:04,  6.33it/s]

[TRAIN][Step 147311] AvgLen: 38.11 | AvgRew: 75.06 | StdRew: 9.05 | Time/Step: 0.0001
[TRAIN][Step 149321] AvgLen: 38.65 | AvgRew: 74.74 | StdRew: 8.60 | Time/Step: 0.0001


Training Episode:  76%|███████▌  | 76/100 [00:11<00:03,  6.29it/s]

[TRAIN][Step 151340] AvgLen: 38.09 | AvgRew: 77.11 | StdRew: 8.79 | Time/Step: 0.0001
[TRAIN][Step 153368] AvgLen: 35.58 | AvgRew: 75.99 | StdRew: 8.41 | Time/Step: 0.0001


Training Episode:  78%|███████▊  | 78/100 [00:12<00:03,  6.45it/s]

[TRAIN][Step 155383] AvgLen: 38.75 | AvgRew: 75.23 | StdRew: 9.27 | Time/Step: 0.0001
[TRAIN][Step 157404] AvgLen: 38.87 | AvgRew: 75.71 | StdRew: 8.68 | Time/Step: 0.0001


Training Episode:  80%|████████  | 80/100 [00:12<00:03,  6.28it/s]

[TRAIN][Step 159418] AvgLen: 39.49 | AvgRew: 75.35 | StdRew: 10.34 | Time/Step: 0.0001
[TRAIN][Step 161432] AvgLen: 38.00 | AvgRew: 75.96 | StdRew: 8.35 | Time/Step: 0.0001


Training Episode:  82%|████████▏ | 82/100 [00:12<00:02,  6.44it/s]

[TRAIN][Step 163438] AvgLen: 37.15 | AvgRew: 76.57 | StdRew: 9.45 | Time/Step: 0.0001
[TRAIN][Step 165457] AvgLen: 38.09 | AvgRew: 76.59 | StdRew: 8.54 | Time/Step: 0.0001


Training Episode:  84%|████████▍ | 84/100 [00:13<00:02,  6.56it/s]

[TRAIN][Step 167477] AvgLen: 37.41 | AvgRew: 76.79 | StdRew: 9.12 | Time/Step: 0.0001
[TRAIN][Step 169501] AvgLen: 38.19 | AvgRew: 76.88 | StdRew: 9.12 | Time/Step: 0.0001


Training Episode:  86%|████████▌ | 86/100 [00:13<00:02,  6.56it/s]

[TRAIN][Step 171507] AvgLen: 35.82 | AvgRew: 73.82 | StdRew: 7.78 | Time/Step: 0.0001
[TRAIN][Step 173546] AvgLen: 37.76 | AvgRew: 73.92 | StdRew: 7.28 | Time/Step: 0.0001


Training Episode:  88%|████████▊ | 88/100 [00:13<00:01,  6.58it/s]

[TRAIN][Step 175579] AvgLen: 37.65 | AvgRew: 75.46 | StdRew: 7.68 | Time/Step: 0.0001
[TRAIN][Step 177600] AvgLen: 36.09 | AvgRew: 76.44 | StdRew: 8.28 | Time/Step: 0.0001


Training Episode:  90%|█████████ | 90/100 [00:14<00:01,  6.48it/s]

[TRAIN][Step 179616] AvgLen: 37.33 | AvgRew: 76.52 | StdRew: 9.93 | Time/Step: 0.0001
[TRAIN][Step 181626] AvgLen: 37.22 | AvgRew: 75.66 | StdRew: 9.29 | Time/Step: 0.0001


Training Episode:  92%|█████████▏| 92/100 [00:14<00:01,  6.34it/s]

[TRAIN][Step 183645] AvgLen: 36.71 | AvgRew: 75.76 | StdRew: 7.83 | Time/Step: 0.0001
[TRAIN][Step 185662] AvgLen: 37.35 | AvgRew: 75.50 | StdRew: 8.87 | Time/Step: 0.0001


Training Episode:  94%|█████████▍| 94/100 [00:14<00:00,  6.29it/s]

[TRAIN][Step 187668] AvgLen: 37.15 | AvgRew: 75.40 | StdRew: 8.35 | Time/Step: 0.0001
[TRAIN][Step 189679] AvgLen: 36.56 | AvgRew: 75.21 | StdRew: 8.24 | Time/Step: 0.0001


Training Episode:  96%|█████████▌| 96/100 [00:14<00:00,  6.28it/s]

[TRAIN][Step 191698] AvgLen: 36.05 | AvgRew: 76.07 | StdRew: 10.08 | Time/Step: 0.0001
[TRAIN][Step 193718] AvgLen: 36.07 | AvgRew: 73.73 | StdRew: 7.41 | Time/Step: 0.0001


Training Episode:  98%|█████████▊| 98/100 [00:15<00:00,  6.02it/s]

[TRAIN][Step 195720] AvgLen: 35.75 | AvgRew: 75.47 | StdRew: 8.48 | Time/Step: 0.0001
[TRAIN][Step 197727] AvgLen: 36.49 | AvgRew: 75.38 | StdRew: 8.75 | Time/Step: 0.0001


Training Episode: 100%|██████████| 100/100 [00:15<00:00,  6.39it/s]

[TRAIN][Step 199741] AvgLen: 38.00 | AvgRew: 77.13 | StdRew: 7.96 | Time/Step: 0.0001
[TRAIN][Step 201745] AvgLen: 36.44 | AvgRew: 75.64 | StdRew: 7.64 | Time/Step: 0.0001
Training plot path:  ./logs/random/train/plot_data.json



Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 366.54it/s]

[EVAL] CurrentAgent | Episode 1 | Reward: 64.79 | Length: 34
[EVAL] CurrentAgent | Episode 2 | Reward: 69.68 | Length: 37
[EVAL] CurrentAgent | Episode 3 | Reward: 71.66 | Length: 37
[EVAL] CurrentAgent | Episode 4 | Reward: 66.32 | Length: 30
[EVAL] CurrentAgent | Episode 5 | Reward: 68.00 | Length: 41
[EVAL][Step 0] AvgLen: 35.80 | AvgRew: 68.09 | StdRew: 2.42 | Time/Step: 0.0000
Plot path:  ./logs/random/train/eval_plot_data.json





In [6]:
# UCB_Bandit 
tmp_base_dir = './logs/UCB_bandit'
def create_bandit_agent(env):
    return BanditAgentWrapper(n_arms=env.action_space.nvec[0])


# Delete and recreate directories
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')
for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)
Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

# Run Bandit agent
runner_bandit = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_bandit_agent,
    env=interest_evolution.create_environment(env_config),
)

runner_bandit.run_training(max_training_steps=2000, num_iterations=50)
runner_bandit.run_evaluation(max_eval_episodes=5)

Training Episode:   4%|▍         | 2/50 [00:00<00:07,  6.40it/s]

[TRAIN][Step 2024] AvgLen: 47.07 | AvgRew: 68.83 | StdRew: 8.86 | Time/Step: 0.0001
[TRAIN][Step 4046] AvgLen: 53.21 | AvgRew: 70.72 | StdRew: 8.73 | Time/Step: 0.0001


Training Episode:   8%|▊         | 4/50 [00:00<00:07,  6.46it/s]

[TRAIN][Step 6089] AvgLen: 51.08 | AvgRew: 68.38 | StdRew: 7.98 | Time/Step: 0.0001
[TRAIN][Step 8098] AvgLen: 51.51 | AvgRew: 66.96 | StdRew: 8.91 | Time/Step: 0.0001


Training Episode:  12%|█▏        | 6/50 [00:00<00:06,  6.48it/s]

[TRAIN][Step 10149] AvgLen: 51.27 | AvgRew: 71.44 | StdRew: 10.03 | Time/Step: 0.0001
[TRAIN][Step 12169] AvgLen: 51.79 | AvgRew: 68.68 | StdRew: 8.51 | Time/Step: 0.0001


Training Episode:  16%|█▌        | 8/50 [00:01<00:06,  6.68it/s]

[TRAIN][Step 14171] AvgLen: 51.33 | AvgRew: 67.33 | StdRew: 7.53 | Time/Step: 0.0001
[TRAIN][Step 16232] AvgLen: 52.85 | AvgRew: 67.79 | StdRew: 7.53 | Time/Step: 0.0001


Training Episode:  20%|██        | 10/50 [00:01<00:06,  6.58it/s]

[TRAIN][Step 18257] AvgLen: 50.62 | AvgRew: 68.20 | StdRew: 8.03 | Time/Step: 0.0001
[TRAIN][Step 20289] AvgLen: 49.56 | AvgRew: 68.47 | StdRew: 8.75 | Time/Step: 0.0001


Training Episode:  24%|██▍       | 12/50 [00:01<00:05,  6.57it/s]

[TRAIN][Step 22329] AvgLen: 51.00 | AvgRew: 69.73 | StdRew: 8.66 | Time/Step: 0.0001
[TRAIN][Step 24335] AvgLen: 52.79 | AvgRew: 68.63 | StdRew: 10.50 | Time/Step: 0.0001


Training Episode:  28%|██▊       | 14/50 [00:02<00:05,  6.51it/s]

[TRAIN][Step 26380] AvgLen: 49.88 | AvgRew: 68.29 | StdRew: 7.37 | Time/Step: 0.0001
[TRAIN][Step 28387] AvgLen: 51.46 | AvgRew: 68.16 | StdRew: 9.03 | Time/Step: 0.0001


Training Episode:  32%|███▏      | 16/50 [00:02<00:05,  6.61it/s]

[TRAIN][Step 30407] AvgLen: 50.50 | AvgRew: 69.40 | StdRew: 7.28 | Time/Step: 0.0001
[TRAIN][Step 32431] AvgLen: 53.26 | AvgRew: 70.26 | StdRew: 8.88 | Time/Step: 0.0001


Training Episode:  36%|███▌      | 18/50 [00:02<00:04,  6.62it/s]

[TRAIN][Step 34460] AvgLen: 49.49 | AvgRew: 69.23 | StdRew: 8.63 | Time/Step: 0.0001
[TRAIN][Step 36489] AvgLen: 49.49 | AvgRew: 67.72 | StdRew: 8.93 | Time/Step: 0.0001


Training Episode:  40%|████      | 20/50 [00:03<00:04,  6.70it/s]

[TRAIN][Step 38506] AvgLen: 53.08 | AvgRew: 70.15 | StdRew: 8.38 | Time/Step: 0.0001
[TRAIN][Step 40536] AvgLen: 50.75 | AvgRew: 69.11 | StdRew: 9.80 | Time/Step: 0.0001


Training Episode:  44%|████▍     | 22/50 [00:03<00:04,  6.67it/s]

[TRAIN][Step 42564] AvgLen: 53.37 | AvgRew: 68.49 | StdRew: 8.89 | Time/Step: 0.0001
[TRAIN][Step 44596] AvgLen: 52.10 | AvgRew: 69.04 | StdRew: 9.53 | Time/Step: 0.0001


Training Episode:  48%|████▊     | 24/50 [00:03<00:03,  6.57it/s]

[TRAIN][Step 46671] AvgLen: 51.88 | AvgRew: 71.59 | StdRew: 10.11 | Time/Step: 0.0001
[TRAIN][Step 48689] AvgLen: 53.11 | AvgRew: 67.09 | StdRew: 7.85 | Time/Step: 0.0001


Training Episode:  52%|█████▏    | 26/50 [00:03<00:03,  6.62it/s]

[TRAIN][Step 50724] AvgLen: 53.55 | AvgRew: 70.26 | StdRew: 8.84 | Time/Step: 0.0001
[TRAIN][Step 52744] AvgLen: 48.10 | AvgRew: 68.83 | StdRew: 8.21 | Time/Step: 0.0001


Training Episode:  56%|█████▌    | 28/50 [00:04<00:03,  6.67it/s]

[TRAIN][Step 54775] AvgLen: 49.54 | AvgRew: 68.74 | StdRew: 8.81 | Time/Step: 0.0001
[TRAIN][Step 56783] AvgLen: 51.49 | AvgRew: 69.02 | StdRew: 8.39 | Time/Step: 0.0001


Training Episode:  60%|██████    | 30/50 [00:04<00:02,  6.73it/s]

[TRAIN][Step 58785] AvgLen: 48.83 | AvgRew: 69.06 | StdRew: 7.92 | Time/Step: 0.0001
[TRAIN][Step 60815] AvgLen: 52.05 | AvgRew: 67.66 | StdRew: 9.50 | Time/Step: 0.0001


Training Episode:  64%|██████▍   | 32/50 [00:04<00:02,  6.67it/s]

[TRAIN][Step 62856] AvgLen: 52.33 | AvgRew: 68.48 | StdRew: 7.99 | Time/Step: 0.0001
[TRAIN][Step 64860] AvgLen: 48.88 | AvgRew: 66.51 | StdRew: 9.24 | Time/Step: 0.0001


Training Episode:  68%|██████▊   | 34/50 [00:05<00:02,  6.51it/s]

[TRAIN][Step 66902] AvgLen: 47.49 | AvgRew: 69.14 | StdRew: 9.40 | Time/Step: 0.0001
[TRAIN][Step 68908] AvgLen: 51.44 | AvgRew: 68.91 | StdRew: 9.41 | Time/Step: 0.0001


Training Episode:  72%|███████▏  | 36/50 [00:05<00:02,  6.64it/s]

[TRAIN][Step 70943] AvgLen: 52.18 | AvgRew: 67.23 | StdRew: 8.35 | Time/Step: 0.0001
[TRAIN][Step 72945] AvgLen: 55.61 | AvgRew: 69.37 | StdRew: 8.20 | Time/Step: 0.0001


Training Episode:  76%|███████▌  | 38/50 [00:05<00:01,  6.50it/s]

[TRAIN][Step 74982] AvgLen: 49.68 | AvgRew: 69.88 | StdRew: 7.82 | Time/Step: 0.0001
[TRAIN][Step 77001] AvgLen: 50.48 | AvgRew: 72.14 | StdRew: 8.43 | Time/Step: 0.0001


Training Episode:  80%|████████  | 40/50 [00:06<00:01,  6.65it/s]

[TRAIN][Step 79013] AvgLen: 50.30 | AvgRew: 65.98 | StdRew: 6.17 | Time/Step: 0.0001
[TRAIN][Step 81019] AvgLen: 52.79 | AvgRew: 68.87 | StdRew: 7.88 | Time/Step: 0.0001


Training Episode:  84%|████████▍ | 42/50 [00:06<00:01,  6.46it/s]

[TRAIN][Step 83055] AvgLen: 48.48 | AvgRew: 68.66 | StdRew: 8.08 | Time/Step: 0.0001
[TRAIN][Step 85065] AvgLen: 50.25 | AvgRew: 67.71 | StdRew: 7.92 | Time/Step: 0.0001


Training Episode:  88%|████████▊ | 44/50 [00:06<00:00,  6.44it/s]

[TRAIN][Step 87123] AvgLen: 51.45 | AvgRew: 67.83 | StdRew: 7.30 | Time/Step: 0.0001
[TRAIN][Step 89171] AvgLen: 52.51 | AvgRew: 70.43 | StdRew: 7.53 | Time/Step: 0.0001


Training Episode:  92%|█████████▏| 46/50 [00:06<00:00,  6.56it/s]

[TRAIN][Step 91198] AvgLen: 48.26 | AvgRew: 69.02 | StdRew: 7.70 | Time/Step: 0.0001
[TRAIN][Step 93203] AvgLen: 48.90 | AvgRew: 69.66 | StdRew: 7.81 | Time/Step: 0.0001


Training Episode:  96%|█████████▌| 48/50 [00:07<00:00,  6.63it/s]

[TRAIN][Step 95244] AvgLen: 52.33 | AvgRew: 65.52 | StdRew: 8.37 | Time/Step: 0.0001
[TRAIN][Step 97268] AvgLen: 51.90 | AvgRew: 68.51 | StdRew: 7.17 | Time/Step: 0.0001


Training Episode: 100%|██████████| 50/50 [00:07<00:00,  6.56it/s]

[TRAIN][Step 99269] AvgLen: 52.66 | AvgRew: 69.00 | StdRew: 8.72 | Time/Step: 0.0001
[TRAIN][Step 101316] AvgLen: 52.49 | AvgRew: 70.18 | StdRew: 9.23 | Time/Step: 0.0001
Training plot path:  ./logs/UCB_bandit/train/plot_data.json



Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 273.99it/s]

[EVAL] CurrentAgent | Episode 1 | Reward: 73.49 | Length: 61
[EVAL] CurrentAgent | Episode 2 | Reward: 69.25 | Length: 46
[EVAL] CurrentAgent | Episode 3 | Reward: 68.00 | Length: 39
[EVAL] CurrentAgent | Episode 4 | Reward: 60.00 | Length: 41
[EVAL] CurrentAgent | Episode 5 | Reward: 64.00 | Length: 51
[EVAL][Step 0] AvgLen: 47.60 | AvgRew: 66.95 | StdRew: 4.61 | Time/Step: 0.0000
Plot path:  ./logs/UCB_bandit/train/eval_plot_data.json





In [7]:
tmp_base_dir = './logs/Contextual_bandit'
from contextual_bandit import ContextualBanditAgent

def create_contextual_bandit_agent(env, **kwargs):
    n_arms = env.action_space.nvec[0]  # Use .nvec[0] for MultiDiscrete
    return ContextualBanditAgent(n_arms=n_arms, epsilon=0.1)

train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

runner_cb = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_contextual_bandit_agent,
    env=interest_evolution.create_environment(env_config),
)

runner_cb.run_training(max_training_steps=2000, num_iterations=50)
runner_cb.run_evaluation(max_eval_episodes=5)

Training Episode:   4%|▍         | 2/50 [00:00<00:07,  6.57it/s]

[TRAIN][Step 2033] AvgLen: 48.40 | AvgRew: 67.16 | StdRew: 8.50 | Time/Step: 0.0001
[TRAIN][Step 4044] AvgLen: 50.27 | AvgRew: 69.61 | StdRew: 8.69 | Time/Step: 0.0001


Training Episode:   8%|▊         | 4/50 [00:00<00:06,  6.77it/s]

[TRAIN][Step 6097] AvgLen: 50.07 | AvgRew: 66.97 | StdRew: 7.62 | Time/Step: 0.0001
[TRAIN][Step 8127] AvgLen: 49.51 | AvgRew: 68.59 | StdRew: 8.45 | Time/Step: 0.0001


Training Episode:  12%|█▏        | 6/50 [00:00<00:06,  6.92it/s]

[TRAIN][Step 10162] AvgLen: 53.55 | AvgRew: 69.94 | StdRew: 9.42 | Time/Step: 0.0001
[TRAIN][Step 12173] AvgLen: 51.56 | AvgRew: 67.64 | StdRew: 7.52 | Time/Step: 0.0001


Training Episode:  16%|█▌        | 8/50 [00:01<00:05,  7.02it/s]

[TRAIN][Step 14202] AvgLen: 54.84 | AvgRew: 66.60 | StdRew: 7.89 | Time/Step: 0.0001
[TRAIN][Step 16230] AvgLen: 52.00 | AvgRew: 66.48 | StdRew: 7.71 | Time/Step: 0.0001


Training Episode:  20%|██        | 10/50 [00:01<00:05,  7.02it/s]

[TRAIN][Step 18272] AvgLen: 52.36 | AvgRew: 67.95 | StdRew: 8.20 | Time/Step: 0.0001
[TRAIN][Step 20311] AvgLen: 55.11 | AvgRew: 66.07 | StdRew: 7.38 | Time/Step: 0.0001


Training Episode:  24%|██▍       | 12/50 [00:01<00:05,  6.69it/s]

[TRAIN][Step 22369] AvgLen: 50.20 | AvgRew: 67.69 | StdRew: 7.08 | Time/Step: 0.0001
[TRAIN][Step 24399] AvgLen: 50.75 | AvgRew: 67.76 | StdRew: 9.60 | Time/Step: 0.0001


Training Episode:  28%|██▊       | 14/50 [00:02<00:05,  6.81it/s]

[TRAIN][Step 26439] AvgLen: 49.76 | AvgRew: 67.17 | StdRew: 5.99 | Time/Step: 0.0001
[TRAIN][Step 28443] AvgLen: 51.38 | AvgRew: 69.52 | StdRew: 9.94 | Time/Step: 0.0001


Training Episode:  32%|███▏      | 16/50 [00:02<00:05,  6.50it/s]

[TRAIN][Step 30480] AvgLen: 50.92 | AvgRew: 70.67 | StdRew: 9.98 | Time/Step: 0.0001
[TRAIN][Step 32504] AvgLen: 50.60 | AvgRew: 69.71 | StdRew: 8.52 | Time/Step: 0.0001


Training Episode:  34%|███▍      | 17/50 [00:02<00:05,  6.15it/s]

[TRAIN][Step 34505] AvgLen: 51.31 | AvgRew: 67.71 | StdRew: 8.76 | Time/Step: 0.0001
[TRAIN][Step 36505] AvgLen: 51.28 | AvgRew: 65.55 | StdRew: 7.64 | Time/Step: 0.0001


Training Episode:  40%|████      | 20/50 [00:03<00:04,  6.33it/s]

[TRAIN][Step 38534] AvgLen: 50.73 | AvgRew: 68.85 | StdRew: 7.43 | Time/Step: 0.0001
[TRAIN][Step 40538] AvgLen: 47.71 | AvgRew: 69.52 | StdRew: 8.29 | Time/Step: 0.0001


Training Episode:  44%|████▍     | 22/50 [00:03<00:04,  6.66it/s]

[TRAIN][Step 42557] AvgLen: 51.77 | AvgRew: 69.00 | StdRew: 8.33 | Time/Step: 0.0001
[TRAIN][Step 44586] AvgLen: 53.39 | AvgRew: 69.20 | StdRew: 8.84 | Time/Step: 0.0001


Training Episode:  48%|████▊     | 24/50 [00:03<00:03,  6.59it/s]

[TRAIN][Step 46599] AvgLen: 52.97 | AvgRew: 67.49 | StdRew: 7.73 | Time/Step: 0.0001
[TRAIN][Step 48635] AvgLen: 56.56 | AvgRew: 66.23 | StdRew: 8.75 | Time/Step: 0.0001


Training Episode:  52%|█████▏    | 26/50 [00:03<00:03,  6.67it/s]

[TRAIN][Step 50655] AvgLen: 50.50 | AvgRew: 72.00 | StdRew: 9.23 | Time/Step: 0.0001
[TRAIN][Step 52701] AvgLen: 48.71 | AvgRew: 68.04 | StdRew: 7.56 | Time/Step: 0.0001


Training Episode:  56%|█████▌    | 28/50 [00:04<00:03,  6.66it/s]

[TRAIN][Step 54709] AvgLen: 52.84 | AvgRew: 65.65 | StdRew: 7.25 | Time/Step: 0.0001
[TRAIN][Step 56734] AvgLen: 51.92 | AvgRew: 69.59 | StdRew: 9.30 | Time/Step: 0.0001


Training Episode:  60%|██████    | 30/50 [00:04<00:02,  6.83it/s]

[TRAIN][Step 58735] AvgLen: 50.02 | AvgRew: 69.27 | StdRew: 7.56 | Time/Step: 0.0001
[TRAIN][Step 60771] AvgLen: 50.90 | AvgRew: 67.68 | StdRew: 9.39 | Time/Step: 0.0001


Training Episode:  64%|██████▍   | 32/50 [00:04<00:02,  6.78it/s]

[TRAIN][Step 62793] AvgLen: 51.85 | AvgRew: 69.66 | StdRew: 8.37 | Time/Step: 0.0001
[TRAIN][Step 64810] AvgLen: 50.42 | AvgRew: 67.50 | StdRew: 8.18 | Time/Step: 0.0001


Training Episode:  68%|██████▊   | 34/50 [00:05<00:02,  6.69it/s]

[TRAIN][Step 66829] AvgLen: 48.07 | AvgRew: 68.66 | StdRew: 7.70 | Time/Step: 0.0001
[TRAIN][Step 68853] AvgLen: 49.37 | AvgRew: 69.98 | StdRew: 9.09 | Time/Step: 0.0001


Training Episode:  72%|███████▏  | 36/50 [00:05<00:02,  6.25it/s]

[TRAIN][Step 70883] AvgLen: 52.05 | AvgRew: 65.90 | StdRew: 7.09 | Time/Step: 0.0001
[TRAIN][Step 72900] AvgLen: 50.42 | AvgRew: 70.04 | StdRew: 7.55 | Time/Step: 0.0001


Training Episode:  76%|███████▌  | 38/50 [00:05<00:01,  6.41it/s]

[TRAIN][Step 74953] AvgLen: 50.07 | AvgRew: 66.97 | StdRew: 6.44 | Time/Step: 0.0001
[TRAIN][Step 76965] AvgLen: 49.07 | AvgRew: 69.33 | StdRew: 8.31 | Time/Step: 0.0001


Training Episode:  80%|████████  | 40/50 [00:06<00:01,  6.67it/s]

[TRAIN][Step 79003] AvgLen: 48.52 | AvgRew: 68.87 | StdRew: 8.44 | Time/Step: 0.0001
[TRAIN][Step 81016] AvgLen: 50.33 | AvgRew: 67.18 | StdRew: 7.27 | Time/Step: 0.0001


Training Episode:  84%|████████▍ | 42/50 [00:06<00:01,  6.87it/s]

[TRAIN][Step 83041] AvgLen: 50.62 | AvgRew: 67.90 | StdRew: 10.50 | Time/Step: 0.0001
[TRAIN][Step 85081] AvgLen: 51.00 | AvgRew: 67.99 | StdRew: 8.42 | Time/Step: 0.0001


Training Episode:  88%|████████▊ | 44/50 [00:06<00:00,  6.99it/s]

[TRAIN][Step 87083] AvgLen: 51.33 | AvgRew: 67.97 | StdRew: 7.50 | Time/Step: 0.0001
[TRAIN][Step 89085] AvgLen: 51.33 | AvgRew: 73.27 | StdRew: 9.04 | Time/Step: 0.0001


Training Episode:  92%|█████████▏| 46/50 [00:06<00:00,  6.98it/s]

[TRAIN][Step 91132] AvgLen: 49.93 | AvgRew: 70.93 | StdRew: 7.63 | Time/Step: 0.0001
[TRAIN][Step 93181] AvgLen: 49.98 | AvgRew: 68.68 | StdRew: 9.54 | Time/Step: 0.0001


Training Episode:  96%|█████████▌| 48/50 [00:07<00:00,  6.92it/s]

[TRAIN][Step 95214] AvgLen: 52.13 | AvgRew: 67.19 | StdRew: 8.23 | Time/Step: 0.0001
[TRAIN][Step 97216] AvgLen: 50.05 | AvgRew: 68.93 | StdRew: 8.46 | Time/Step: 0.0001


Training Episode: 100%|██████████| 50/50 [00:07<00:00,  6.69it/s]


[TRAIN][Step 99240] AvgLen: 50.60 | AvgRew: 68.50 | StdRew: 9.23 | Time/Step: 0.0001
[TRAIN][Step 101274] AvgLen: 49.61 | AvgRew: 69.21 | StdRew: 7.71 | Time/Step: 0.0001
Training plot path:  ./logs/Contextual_bandit/train/plot_data.json


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 284.54it/s]


[EVAL] ckpt_0.pkl | Episode 1 | Reward: 69.64 | Length: 49
[EVAL] ckpt_0.pkl | Episode 2 | Reward: 60.19 | Length: 51
[EVAL] ckpt_0.pkl | Episode 3 | Reward: 64.00 | Length: 60
[EVAL] ckpt_0.pkl | Episode 4 | Reward: 65.97 | Length: 52
[EVAL] ckpt_0.pkl | Episode 5 | Reward: 82.11 | Length: 35
[EVAL][Step 2033] AvgLen: 49.40 | AvgRew: 68.38 | StdRew: 7.51 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 323.31it/s]


[EVAL] ckpt_1.pkl | Episode 1 | Reward: 76.00 | Length: 40
[EVAL] ckpt_1.pkl | Episode 2 | Reward: 60.00 | Length: 53
[EVAL] ckpt_1.pkl | Episode 3 | Reward: 64.00 | Length: 33
[EVAL] ckpt_1.pkl | Episode 4 | Reward: 64.00 | Length: 49
[EVAL] ckpt_1.pkl | Episode 5 | Reward: 76.00 | Length: 34
[EVAL][Step 4044] AvgLen: 41.80 | AvgRew: 68.00 | StdRew: 6.69 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_2.pkl | Episode 1 | Reward: 64.00 | Length: 47
[EVAL] ckpt_2.pkl | Episode 2 | Reward: 98.21 | Length: 46
[EVAL] ckpt_2.pkl | Episode 3 | Reward: 68.00 | Length: 40


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 287.83it/s]


[EVAL] ckpt_2.pkl | Episode 4 | Reward: 72.00 | Length: 60
[EVAL] ckpt_2.pkl | Episode 5 | Reward: 64.00 | Length: 40
[EVAL][Step 6097] AvgLen: 46.60 | AvgRew: 73.24 | StdRew: 12.83 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 303.28it/s]


[EVAL] ckpt_3.pkl | Episode 1 | Reward: 69.23 | Length: 37
[EVAL] ckpt_3.pkl | Episode 2 | Reward: 66.17 | Length: 36
[EVAL] ckpt_3.pkl | Episode 3 | Reward: 73.74 | Length: 47
[EVAL] ckpt_3.pkl | Episode 4 | Reward: 64.00 | Length: 48
[EVAL] ckpt_3.pkl | Episode 5 | Reward: 63.41 | Length: 50
[EVAL][Step 8127] AvgLen: 43.60 | AvgRew: 67.31 | StdRew: 3.81 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 291.14it/s]


[EVAL] ckpt_4.pkl | Episode 1 | Reward: 71.01 | Length: 61
[EVAL] ckpt_4.pkl | Episode 2 | Reward: 72.62 | Length: 38
[EVAL] ckpt_4.pkl | Episode 3 | Reward: 59.76 | Length: 54
[EVAL] ckpt_4.pkl | Episode 4 | Reward: 84.00 | Length: 37
[EVAL] ckpt_4.pkl | Episode 5 | Reward: 64.00 | Length: 35
[EVAL][Step 10162] AvgLen: 45.00 | AvgRew: 70.28 | StdRew: 8.30 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 264.34it/s]


[EVAL] ckpt_5.pkl | Episode 1 | Reward: 80.90 | Length: 39
[EVAL] ckpt_5.pkl | Episode 2 | Reward: 84.00 | Length: 39
[EVAL] ckpt_5.pkl | Episode 3 | Reward: 55.45 | Length: 78
[EVAL] ckpt_5.pkl | Episode 4 | Reward: 68.00 | Length: 42
[EVAL] ckpt_5.pkl | Episode 5 | Reward: 60.26 | Length: 54
[EVAL][Step 12173] AvgLen: 50.40 | AvgRew: 69.72 | StdRew: 11.18 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 233.80it/s]


[EVAL] ckpt_6.pkl | Episode 1 | Reward: 84.00 | Length: 49
[EVAL] ckpt_6.pkl | Episode 2 | Reward: 69.41 | Length: 58
[EVAL] ckpt_6.pkl | Episode 3 | Reward: 52.00 | Length: 112
[EVAL] ckpt_6.pkl | Episode 4 | Reward: 60.51 | Length: 39
[EVAL] ckpt_6.pkl | Episode 5 | Reward: 74.14 | Length: 43
[EVAL][Step 14202] AvgLen: 60.20 | AvgRew: 68.01 | StdRew: 11.03 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 306.33it/s]


[EVAL] ckpt_7.pkl | Episode 1 | Reward: 80.00 | Length: 42
[EVAL] ckpt_7.pkl | Episode 2 | Reward: 78.93 | Length: 50
[EVAL] ckpt_7.pkl | Episode 3 | Reward: 66.02 | Length: 45
[EVAL] ckpt_7.pkl | Episode 4 | Reward: 68.00 | Length: 40
[EVAL] ckpt_7.pkl | Episode 5 | Reward: 60.26 | Length: 36
[EVAL][Step 16230] AvgLen: 42.60 | AvgRew: 70.64 | StdRew: 7.65 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 286.60it/s]


[EVAL] ckpt_8.pkl | Episode 1 | Reward: 75.37 | Length: 53
[EVAL] ckpt_8.pkl | Episode 2 | Reward: 72.00 | Length: 55
[EVAL] ckpt_8.pkl | Episode 3 | Reward: 73.35 | Length: 46
[EVAL] ckpt_8.pkl | Episode 4 | Reward: 67.74 | Length: 44
[EVAL] ckpt_8.pkl | Episode 5 | Reward: 63.90 | Length: 34
[EVAL][Step 18272] AvgLen: 46.40 | AvgRew: 70.47 | StdRew: 4.13 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_9.pkl | Episode 1 | Reward: 68.00 | Length: 60
[EVAL] ckpt_9.pkl | Episode 2 | Reward: 56.00 | Length: 50


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 262.68it/s]


[EVAL] ckpt_9.pkl | Episode 3 | Reward: 60.41 | Length: 53
[EVAL] ckpt_9.pkl | Episode 4 | Reward: 74.38 | Length: 48
[EVAL] ckpt_9.pkl | Episode 5 | Reward: 68.00 | Length: 42
[EVAL][Step 20311] AvgLen: 50.60 | AvgRew: 65.36 | StdRew: 6.44 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_10.pkl | Episode 1 | Reward: 64.00 | Length: 44
[EVAL] ckpt_10.pkl | Episode 2 | Reward: 62.41 | Length: 47
[EVAL] ckpt_10.pkl | Episode 3 | Reward: 68.49 | Length: 73
[EVAL] ckpt_10.pkl | Episode 4 | Reward: 60.48 | Length: 49


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 259.95it/s]

[EVAL] ckpt_10.pkl | Episode 5 | Reward: 73.01 | Length: 39
[EVAL][Step 22369] AvgLen: 50.40 | AvgRew: 65.68 | StdRew: 4.52 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_11.pkl | Episode 1 | Reward: 72.29 | Length: 30
[EVAL] ckpt_11.pkl | Episode 2 | Reward: 62.73 | Length: 67


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 255.92it/s]


[EVAL] ckpt_11.pkl | Episode 3 | Reward: 56.95 | Length: 37
[EVAL] ckpt_11.pkl | Episode 4 | Reward: 59.73 | Length: 71
[EVAL] ckpt_11.pkl | Episode 5 | Reward: 80.00 | Length: 49
[EVAL][Step 24399] AvgLen: 50.80 | AvgRew: 66.34 | StdRew: 8.57 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_12.pkl | Episode 1 | Reward: 67.54 | Length: 54


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 273.69it/s]


[EVAL] ckpt_12.pkl | Episode 2 | Reward: 64.00 | Length: 59
[EVAL] ckpt_12.pkl | Episode 3 | Reward: 64.09 | Length: 45
[EVAL] ckpt_12.pkl | Episode 4 | Reward: 66.33 | Length: 40
[EVAL] ckpt_12.pkl | Episode 5 | Reward: 65.18 | Length: 40
[EVAL][Step 26439] AvgLen: 47.60 | AvgRew: 65.43 | StdRew: 1.35 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_13.pkl | Episode 1 | Reward: 76.00 | Length: 45
[EVAL] ckpt_13.pkl | Episode 2 | Reward: 64.00 | Length: 31


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 278.25it/s]


[EVAL] ckpt_13.pkl | Episode 3 | Reward: 80.00 | Length: 58
[EVAL] ckpt_13.pkl | Episode 4 | Reward: 72.00 | Length: 44
[EVAL] ckpt_13.pkl | Episode 5 | Reward: 76.00 | Length: 57
[EVAL][Step 28443] AvgLen: 47.00 | AvgRew: 73.60 | StdRew: 5.43 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 254.35it/s]

[EVAL] ckpt_14.pkl | Episode 1 | Reward: 73.94 | Length: 62
[EVAL] ckpt_14.pkl | Episode 2 | Reward: 77.42 | Length: 40
[EVAL] ckpt_14.pkl | Episode 3 | Reward: 67.17 | Length: 44
[EVAL] ckpt_14.pkl | Episode 4 | Reward: 76.08 | Length: 71
[EVAL] ckpt_14.pkl | Episode 5 | Reward: 64.00 | Length: 55
[EVAL][Step 30480] AvgLen: 54.40 | AvgRew: 71.72 | StdRew: 5.23 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_15.pkl | Episode 1 | Reward: 70.54 | Length: 80
[EVAL] ckpt_15.pkl | Episode 2 | Reward: 57.36 | Length: 53
[EVAL] ckpt_15.pkl | Episode 3 | Reward: 78.78 | Length: 48
[EVAL] ckpt_15.pkl | Episode 4 | Reward: 66.02 | Length: 60


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 233.64it/s]


[EVAL] ckpt_15.pkl | Episode 5 | Reward: 80.00 | Length: 47
[EVAL][Step 32504] AvgLen: 57.60 | AvgRew: 70.54 | StdRew: 8.38 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_16.pkl | Episode 1 | Reward: 56.00 | Length: 62
[EVAL] ckpt_16.pkl | Episode 2 | Reward: 85.14 | Length: 47
[EVAL] ckpt_16.pkl | Episode 3 | Reward: 52.00 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 262.09it/s]


[EVAL] ckpt_16.pkl | Episode 4 | Reward: 66.05 | Length: 51
[EVAL] ckpt_16.pkl | Episode 5 | Reward: 68.00 | Length: 45
[EVAL][Step 34505] AvgLen: 50.60 | AvgRew: 65.44 | StdRew: 11.53 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_17.pkl | Episode 1 | Reward: 64.00 | Length: 47
[EVAL] ckpt_17.pkl | Episode 2 | Reward: 76.60 | Length: 45
[EVAL] ckpt_17.pkl | Episode 3 | Reward: 72.00 | Length: 38


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 292.84it/s]


[EVAL] ckpt_17.pkl | Episode 4 | Reward: 57.05 | Length: 59
[EVAL] ckpt_17.pkl | Episode 5 | Reward: 76.00 | Length: 35
[EVAL][Step 36505] AvgLen: 44.80 | AvgRew: 69.13 | StdRew: 7.53 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 287.83it/s]


[EVAL] ckpt_18.pkl | Episode 1 | Reward: 76.00 | Length: 39
[EVAL] ckpt_18.pkl | Episode 2 | Reward: 69.07 | Length: 66
[EVAL] ckpt_18.pkl | Episode 3 | Reward: 68.00 | Length: 44
[EVAL] ckpt_18.pkl | Episode 4 | Reward: 80.00 | Length: 52
[EVAL] ckpt_18.pkl | Episode 5 | Reward: 67.02 | Length: 31
[EVAL][Step 38534] AvgLen: 46.40 | AvgRew: 72.02 | StdRew: 5.09 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_19.pkl | Episode 1 | Reward: 72.00 | Length: 43
[EVAL] ckpt_19.pkl | Episode 2 | Reward: 65.39 | Length: 73


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 238.17it/s]


[EVAL] ckpt_19.pkl | Episode 3 | Reward: 76.00 | Length: 37
[EVAL] ckpt_19.pkl | Episode 4 | Reward: 71.43 | Length: 50
[EVAL] ckpt_19.pkl | Episode 5 | Reward: 60.60 | Length: 80
[EVAL][Step 40538] AvgLen: 56.60 | AvgRew: 69.08 | StdRew: 5.43 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_20.pkl | Episode 1 | Reward: 78.85 | Length: 46
[EVAL] ckpt_20.pkl | Episode 2 | Reward: 79.91 | Length: 51
[EVAL] ckpt_20.pkl | Episode 3 | Reward: 72.00 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 255.72it/s]


[EVAL] ckpt_20.pkl | Episode 4 | Reward: 68.00 | Length: 61
[EVAL] ckpt_20.pkl | Episode 5 | Reward: 70.47 | Length: 54
[EVAL][Step 42557] AvgLen: 52.00 | AvgRew: 73.85 | StdRew: 4.71 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_21.pkl | Episode 1 | Reward: 52.56 | Length: 58
[EVAL] ckpt_21.pkl | Episode 2 | Reward: 62.90 | Length: 40
[EVAL] ckpt_21.pkl | Episode 3 | Reward: 64.38 | Length: 55
[EVAL] ckpt_21.pkl | Episode 4 | Reward: 68.44 | Length: 48
[EVAL] ckpt_21.pkl | Episode 5 | Reward: 66.78 | Length: 62


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 253.99it/s]


[EVAL][Step 44586] AvgLen: 52.60 | AvgRew: 63.01 | StdRew: 5.56 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_22.pkl | Episode 1 | Reward: 59.10 | Length: 58
[EVAL] ckpt_22.pkl | Episode 2 | Reward: 60.00 | Length: 50
[EVAL] ckpt_22.pkl | Episode 3 | Reward: 74.00 | Length: 41


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 285.06it/s]


[EVAL] ckpt_22.pkl | Episode 4 | Reward: 84.00 | Length: 44
[EVAL] ckpt_22.pkl | Episode 5 | Reward: 77.85 | Length: 36
[EVAL][Step 46599] AvgLen: 45.80 | AvgRew: 70.99 | StdRew: 9.87 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_23.pkl | Episode 1 | Reward: 67.25 | Length: 45
[EVAL] ckpt_23.pkl | Episode 2 | Reward: 67.05 | Length: 33
[EVAL] ckpt_23.pkl | Episode 3 | Reward: 72.00 | Length: 44
[EVAL] ckpt_23.pkl | Episode 4 | Reward: 60.00 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 294.23it/s]


[EVAL] ckpt_23.pkl | Episode 5 | Reward: 60.65 | Length: 57
[EVAL][Step 48635] AvgLen: 45.00 | AvgRew: 65.39 | StdRew: 4.50 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 232.05it/s]


[EVAL] ckpt_24.pkl | Episode 1 | Reward: 64.00 | Length: 40
[EVAL] ckpt_24.pkl | Episode 2 | Reward: 80.00 | Length: 65
[EVAL] ckpt_24.pkl | Episode 3 | Reward: 68.00 | Length: 58
[EVAL] ckpt_24.pkl | Episode 4 | Reward: 64.00 | Length: 61
[EVAL] ckpt_24.pkl | Episode 5 | Reward: 58.47 | Length: 77
[EVAL][Step 50655] AvgLen: 60.20 | AvgRew: 66.89 | StdRew: 7.22 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_25.pkl | Episode 1 | Reward: 56.98 | Length: 50
[EVAL] ckpt_25.pkl | Episode 2 | Reward: 56.00 | Length: 61
[EVAL] ckpt_25.pkl | Episode 3 | Reward: 68.00 | Length: 32
[EVAL] ckpt_25.pkl | Episode 4 | Reward: 64.47 | Length: 41


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 269.73it/s]


[EVAL] ckpt_25.pkl | Episode 5 | Reward: 60.56 | Length: 58
[EVAL][Step 52701] AvgLen: 48.40 | AvgRew: 61.20 | StdRew: 4.52 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_26.pkl | Episode 1 | Reward: 72.60 | Length: 32


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 291.47it/s]

[EVAL] ckpt_26.pkl | Episode 2 | Reward: 80.71 | Length: 49
[EVAL] ckpt_26.pkl | Episode 3 | Reward: 77.74 | Length: 45
[EVAL] ckpt_26.pkl | Episode 4 | Reward: 64.56 | Length: 43
[EVAL] ckpt_26.pkl | Episode 5 | Reward: 76.70 | Length: 53





[EVAL][Step 54709] AvgLen: 44.40 | AvgRew: 74.46 | StdRew: 5.59 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_27.pkl | Episode 1 | Reward: 65.46 | Length: 58
[EVAL] ckpt_27.pkl | Episode 2 | Reward: 75.42 | Length: 43
[EVAL] ckpt_27.pkl | Episode 3 | Reward: 65.04 | Length: 64
[EVAL] ckpt_27.pkl | Episode 4 | Reward: 63.07 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 261.33it/s]


[EVAL] ckpt_27.pkl | Episode 5 | Reward: 86.07 | Length: 33
[EVAL][Step 56734] AvgLen: 48.80 | AvgRew: 71.01 | StdRew: 8.67 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 252.50it/s]


[EVAL] ckpt_28.pkl | Episode 1 | Reward: 62.78 | Length: 53
[EVAL] ckpt_28.pkl | Episode 2 | Reward: 64.93 | Length: 45
[EVAL] ckpt_28.pkl | Episode 3 | Reward: 71.24 | Length: 56
[EVAL] ckpt_28.pkl | Episode 4 | Reward: 80.00 | Length: 52
[EVAL] ckpt_28.pkl | Episode 5 | Reward: 61.26 | Length: 66
[EVAL][Step 58735] AvgLen: 54.40 | AvgRew: 68.04 | StdRew: 6.88 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_29.pkl | Episode 1 | Reward: 73.32 | Length: 69
[EVAL] ckpt_29.pkl | Episode 2 | Reward: 67.08 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 219.04it/s]


[EVAL] ckpt_29.pkl | Episode 3 | Reward: 56.00 | Length: 72
[EVAL] ckpt_29.pkl | Episode 4 | Reward: 74.13 | Length: 62
[EVAL] ckpt_29.pkl | Episode 5 | Reward: 65.09 | Length: 55
[EVAL][Step 60771] AvgLen: 61.00 | AvgRew: 67.12 | StdRew: 6.56 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_30.pkl | Episode 1 | Reward: 69.58 | Length: 37
[EVAL] ckpt_30.pkl | Episode 2 | Reward: 80.78 | Length: 59
[EVAL] ckpt_30.pkl | Episode 3 | Reward: 60.00 | Length: 43


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 279.89it/s]


[EVAL] ckpt_30.pkl | Episode 4 | Reward: 68.00 | Length: 43
[EVAL] ckpt_30.pkl | Episode 5 | Reward: 77.19 | Length: 53
[EVAL][Step 62793] AvgLen: 47.00 | AvgRew: 71.11 | StdRew: 7.29 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_31.pkl | Episode 1 | Reward: 79.49 | Length: 45
[EVAL] ckpt_31.pkl | Episode 2 | Reward: 67.63 | Length: 46
[EVAL] ckpt_31.pkl | Episode 3 | Reward: 83.14 | Length: 53
[EVAL] ckpt_31.pkl | Episode 4 | Reward: 74.15 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 265.06it/s]


[EVAL] ckpt_31.pkl | Episode 5 | Reward: 66.97 | Length: 42
[EVAL][Step 64810] AvgLen: 46.80 | AvgRew: 74.28 | StdRew: 6.38 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_32.pkl | Episode 1 | Reward: 76.00 | Length: 45
[EVAL] ckpt_32.pkl | Episode 2 | Reward: 63.30 | Length: 60
[EVAL] ckpt_32.pkl | Episode 3 | Reward: 71.71 | Length: 51


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 269.19it/s]


[EVAL] ckpt_32.pkl | Episode 4 | Reward: 68.00 | Length: 39
[EVAL] ckpt_32.pkl | Episode 5 | Reward: 61.58 | Length: 42
[EVAL][Step 66829] AvgLen: 47.40 | AvgRew: 68.12 | StdRew: 5.31 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_33.pkl | Episode 1 | Reward: 52.00 | Length: 61
[EVAL] ckpt_33.pkl | Episode 2 | Reward: 61.02 | Length: 41
[EVAL] ckpt_33.pkl | Episode 3 | Reward: 64.00 | Length: 48
[EVAL] ckpt_33.pkl | Episode 4 | Reward: 72.91 | Length: 65


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 255.04it/s]


[EVAL] ckpt_33.pkl | Episode 5 | Reward: 71.60 | Length: 48
[EVAL][Step 68853] AvgLen: 52.60 | AvgRew: 64.31 | StdRew: 7.61 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 294.99it/s]


[EVAL] ckpt_34.pkl | Episode 1 | Reward: 68.00 | Length: 38
[EVAL] ckpt_34.pkl | Episode 2 | Reward: 60.72 | Length: 46
[EVAL] ckpt_34.pkl | Episode 3 | Reward: 72.54 | Length: 47
[EVAL] ckpt_34.pkl | Episode 4 | Reward: 76.00 | Length: 52
[EVAL] ckpt_34.pkl | Episode 5 | Reward: 60.49 | Length: 44
[EVAL][Step 70883] AvgLen: 45.40 | AvgRew: 67.55 | StdRew: 6.21 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_35.pkl | Episode 1 | Reward: 67.06 | Length: 37
[EVAL] ckpt_35.pkl | Episode 2 | Reward: 67.16 | Length: 69
[EVAL] ckpt_35.pkl | Episode 3 | Reward: 52.00 | Length: 40
[EVAL] ckpt_35.pkl | Episode 4 | Reward: 78.81 | Length: 39


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 262.94it/s]


[EVAL] ckpt_35.pkl | Episode 5 | Reward: 62.81 | Length: 68
[EVAL][Step 72900] AvgLen: 50.60 | AvgRew: 65.57 | StdRew: 8.62 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_36.pkl | Episode 1 | Reward: 62.71 | Length: 53
[EVAL] ckpt_36.pkl | Episode 2 | Reward: 60.48 | Length: 49
[EVAL] ckpt_36.pkl | Episode 3 | Reward: 57.82 | Length: 51


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 256.99it/s]


[EVAL] ckpt_36.pkl | Episode 4 | Reward: 68.80 | Length: 58
[EVAL] ckpt_36.pkl | Episode 5 | Reward: 60.00 | Length: 45
[EVAL][Step 74953] AvgLen: 51.20 | AvgRew: 61.96 | StdRew: 3.76 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_37.pkl | Episode 1 | Reward: 71.21 | Length: 51
[EVAL] ckpt_37.pkl | Episode 2 | Reward: 56.00 | Length: 49
[EVAL] ckpt_37.pkl | Episode 3 | Reward: 68.87 | Length: 58
[EVAL] ckpt_37.pkl | Episode 4 | Reward: 73.68 | Length: 51


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 268.58it/s]


[EVAL] ckpt_37.pkl | Episode 5 | Reward: 76.85 | Length: 42
[EVAL][Step 76965] AvgLen: 50.20 | AvgRew: 69.32 | StdRew: 7.17 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 278.31it/s]


[EVAL] ckpt_38.pkl | Episode 1 | Reward: 69.43 | Length: 43
[EVAL] ckpt_38.pkl | Episode 2 | Reward: 64.00 | Length: 58
[EVAL] ckpt_38.pkl | Episode 3 | Reward: 62.17 | Length: 64
[EVAL] ckpt_38.pkl | Episode 4 | Reward: 67.04 | Length: 40
[EVAL] ckpt_38.pkl | Episode 5 | Reward: 67.05 | Length: 37
[EVAL][Step 79003] AvgLen: 48.40 | AvgRew: 65.94 | StdRew: 2.55 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_39.pkl | Episode 1 | Reward: 78.39 | Length: 34
[EVAL] ckpt_39.pkl | Episode 2 | Reward: 59.25 | Length: 47
[EVAL] ckpt_39.pkl | Episode 3 | Reward: 68.00 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 236.30it/s]


[EVAL] ckpt_39.pkl | Episode 4 | Reward: 64.00 | Length: 69
[EVAL] ckpt_39.pkl | Episode 5 | Reward: 69.95 | Length: 83
[EVAL][Step 81016] AvgLen: 56.20 | AvgRew: 67.92 | StdRew: 6.39 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_40.pkl | Episode 1 | Reward: 65.22 | Length: 69
[EVAL] ckpt_40.pkl | Episode 2 | Reward: 88.00 | Length: 37
[EVAL] ckpt_40.pkl | Episode 3 | Reward: 74.55 | Length: 57


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 260.27it/s]


[EVAL] ckpt_40.pkl | Episode 4 | Reward: 57.65 | Length: 49
[EVAL] ckpt_40.pkl | Episode 5 | Reward: 58.51 | Length: 41
[EVAL][Step 83041] AvgLen: 50.60 | AvgRew: 68.79 | StdRew: 11.36 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_41.pkl | Episode 1 | Reward: 71.39 | Length: 53
[EVAL] ckpt_41.pkl | Episode 2 | Reward: 73.37 | Length: 39
[EVAL] ckpt_41.pkl | Episode 3 | Reward: 60.26 | Length: 41
[EVAL] ckpt_41.pkl | Episode 4 | Reward: 72.00 | Length: 58


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 272.70it/s]


[EVAL] ckpt_41.pkl | Episode 5 | Reward: 77.95 | Length: 45
[EVAL][Step 85081] AvgLen: 47.20 | AvgRew: 70.99 | StdRew: 5.84 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_42.pkl | Episode 1 | Reward: 64.91 | Length: 52
[EVAL] ckpt_42.pkl | Episode 2 | Reward: 80.58 | Length: 60
[EVAL] ckpt_42.pkl | Episode 3 | Reward: 65.73 | Length: 42


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 241.70it/s]


[EVAL] ckpt_42.pkl | Episode 4 | Reward: 88.00 | Length: 68
[EVAL] ckpt_42.pkl | Episode 5 | Reward: 64.00 | Length: 52
[EVAL][Step 87083] AvgLen: 54.80 | AvgRew: 72.64 | StdRew: 9.81 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_43.pkl | Episode 1 | Reward: 64.55 | Length: 28
[EVAL] ckpt_43.pkl | Episode 2 | Reward: 82.38 | Length: 44
[EVAL] ckpt_43.pkl | Episode 3 | Reward: 69.73 | Length: 56
[EVAL] ckpt_43.pkl | Episode 4 | Reward: 76.00 | Length: 45
[EVAL] ckpt_43.pkl | Episode 5 | Reward: 72.00 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 273.21it/s]


[EVAL][Step 89085] AvgLen: 47.40 | AvgRew: 72.93 | StdRew: 6.00 | Time/Step: 0.0000


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 252.40it/s]

[EVAL] ckpt_44.pkl | Episode 1 | Reward: 58.80 | Length: 52
[EVAL] ckpt_44.pkl | Episode 2 | Reward: 90.13 | Length: 60
[EVAL] ckpt_44.pkl | Episode 3 | Reward: 63.41 | Length: 64
[EVAL] ckpt_44.pkl | Episode 4 | Reward: 75.14 | Length: 35
[EVAL] ckpt_44.pkl | Episode 5 | Reward: 69.12 | Length: 61





[EVAL][Step 91132] AvgLen: 54.40 | AvgRew: 71.32 | StdRew: 10.89 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_45.pkl | Episode 1 | Reward: 78.42 | Length: 37
[EVAL] ckpt_45.pkl | Episode 2 | Reward: 76.00 | Length: 40
[EVAL] ckpt_45.pkl | Episode 3 | Reward: 73.76 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 281.77it/s]

[EVAL] ckpt_45.pkl | Episode 4 | Reward: 64.00 | Length: 47
[EVAL] ckpt_45.pkl | Episode 5 | Reward: 60.00 | Length: 56
[EVAL][Step 93181] AvgLen: 45.20 | AvgRew: 70.44 | StdRew: 7.16 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_46.pkl | Episode 1 | Reward: 68.00 | Length: 45
[EVAL] ckpt_46.pkl | Episode 2 | Reward: 55.46 | Length: 46
[EVAL] ckpt_46.pkl | Episode 3 | Reward: 80.01 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 265.28it/s]

[EVAL] ckpt_46.pkl | Episode 4 | Reward: 72.00 | Length: 35
[EVAL] ckpt_46.pkl | Episode 5 | Reward: 65.81 | Length: 58





[EVAL][Step 95214] AvgLen: 49.60 | AvgRew: 68.26 | StdRew: 8.02 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_47.pkl | Episode 1 | Reward: 67.19 | Length: 55
[EVAL] ckpt_47.pkl | Episode 2 | Reward: 74.45 | Length: 52
[EVAL] ckpt_47.pkl | Episode 3 | Reward: 56.52 | Length: 72
[EVAL] ckpt_47.pkl | Episode 4 | Reward: 59.56 | Length: 57


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 234.98it/s]


[EVAL] ckpt_47.pkl | Episode 5 | Reward: 57.17 | Length: 56
[EVAL][Step 97216] AvgLen: 58.40 | AvgRew: 62.98 | StdRew: 6.88 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_48.pkl | Episode 1 | Reward: 66.64 | Length: 67
[EVAL] ckpt_48.pkl | Episode 2 | Reward: 88.91 | Length: 63
[EVAL] ckpt_48.pkl | Episode 3 | Reward: 74.39 | Length: 39


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 236.11it/s]

[EVAL] ckpt_48.pkl | Episode 4 | Reward: 57.98 | Length: 66
[EVAL] ckpt_48.pkl | Episode 5 | Reward: 66.81 | Length: 47





[EVAL][Step 99240] AvgLen: 56.40 | AvgRew: 70.95 | StdRew: 10.38 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_49.pkl | Episode 1 | Reward: 76.00 | Length: 72
[EVAL] ckpt_49.pkl | Episode 2 | Reward: 68.00 | Length: 57
[EVAL] ckpt_49.pkl | Episode 3 | Reward: 72.00 | Length: 39
[EVAL] ckpt_49.pkl | Episode 4 | Reward: 92.00 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 257.21it/s]

[EVAL] ckpt_49.pkl | Episode 5 | Reward: 62.17 | Length: 42
[EVAL][Step 101274] AvgLen: 53.00 | AvgRew: 74.03 | StdRew: 10.08 | Time/Step: 0.0000
Plot path:  ./logs/Contextual_bandit/train/eval_plot_data.json





In [14]:
from naive_dqn import NaiveDQNAgent
from gymnasium import spaces
tmp_base_dir = './logs/naive_dqn'

def create_naive_dqn_agent(env, **kwargs):
    doc_obs_space = env.observation_space['doc']

    if isinstance(doc_obs_space, spaces.Dict):
        # 每个文档的 shape 应该一致，取第一个文档的 shape[0]
        first_key = list(doc_obs_space.spaces.keys())[0]
        per_doc_dim = doc_obs_space.spaces[first_key].shape[0]
        num_docs = len(doc_obs_space.spaces)
        obs_dim = per_doc_dim * num_docs  # e.g., 7 * 10 = 70
    else:
        raise ValueError("Unsupported doc observation space:", doc_obs_space)

    if isinstance(env.action_space, spaces.Discrete):
        n_actions = env.action_space.n
    elif isinstance(env.action_space, spaces.MultiDiscrete):
        n_actions = env.action_space.nvec[0]
    else:
        raise ValueError("Unsupported action space type")

    print(f"obs_dim: {obs_dim}, n_actions: {n_actions}")
    return NaiveDQNAgent(
        obs_dim=obs_dim,
        n_actions=n_actions,
        epsilon=kwargs.get('epsilon', 0.1),
        gamma=kwargs.get('gamma', 0.99),
        lr=kwargs.get('lr', 1e-3)
    )

train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)
runner_naive_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_naive_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

obs_dim: 70, n_actions: 10


In [22]:
from naive_dqn import NaiveDQNAgent
from gymnasium import spaces
tmp_base_dir = './logs/naive_dqn'

def create_naive_dqn_agent(env, **kwargs):
    doc_obs_space = env.observation_space['doc']

    if isinstance(doc_obs_space, spaces.Dict):
        # 每个文档的 shape 应该一致，取第一个文档的 shape[0]
        first_key = list(doc_obs_space.spaces.keys())[0]
        per_doc_dim = doc_obs_space.spaces[first_key].shape[0]
        num_docs = len(doc_obs_space.spaces)
        obs_dim = per_doc_dim * num_docs  # e.g., 7 * 10 = 70
    else:
        raise ValueError("Unsupported doc observation space:", doc_obs_space)

    if isinstance(env.action_space, spaces.Discrete):
        n_actions = env.action_space.n
    elif isinstance(env.action_space, spaces.MultiDiscrete):
        n_actions = env.action_space.nvec[0]
    else:
        raise ValueError("Unsupported action space type")

    print(f"obs_dim: {obs_dim}, n_actions: {n_actions}")
    return NaiveDQNAgent(
        obs_dim=obs_dim,
        n_actions=n_actions,
        epsilon=kwargs.get('epsilon', 0.1),
        gamma=kwargs.get('gamma', 0.99),
        lr=kwargs.get('lr', 1e-3)
    )

train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)
runner_naive_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_naive_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

obs_dim: 70, n_actions: 10


In [26]:
from naive_dqn import NaiveDQNAgent
from gymnasium import spaces
tmp_base_dir = './logs/naive_dqn'

def create_naive_dqn_agent(env, **kwargs):
    doc_obs_space = env.observation_space['doc']

    if isinstance(doc_obs_space, spaces.Dict):
        # 每个文档的 shape 应该一致，取第一个文档的 shape[0]
        first_key = list(doc_obs_space.spaces.keys())[0]
        per_doc_dim = doc_obs_space.spaces[first_key].shape[0]
        num_docs = len(doc_obs_space.spaces)
        obs_dim = per_doc_dim * num_docs  # e.g., 7 * 10 = 70
    else:
        raise ValueError("Unsupported doc observation space:", doc_obs_space)

    if isinstance(env.action_space, spaces.Discrete):
        n_actions = env.action_space.n
    elif isinstance(env.action_space, spaces.MultiDiscrete):
        n_actions = env.action_space.nvec[0]
    else:
        raise ValueError("Unsupported action space type")

    print(f"obs_dim: {obs_dim}, n_actions: {n_actions}")
    return NaiveDQNAgent(
        obs_dim=obs_dim,
        n_actions=n_actions,
        epsilon=kwargs.get('epsilon', 0.1),
        gamma=kwargs.get('gamma', 0.99),
        lr=kwargs.get('lr', 1e-3)
    )

train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)

In [27]:
runner_naive_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_naive_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

obs_dim: 70, n_actions: 10


In [28]:
runner_naive_dqn.run_training(max_training_steps=2000, num_iterations=50)
runner_naive_dqn.run_evaluation(max_eval_episodes=5)

  s, a, r, s_next, done = zip(*batch)
Training Episode:   2%|▏         | 1/50 [00:05<04:25,  5.41s/it]

[TRAIN][Step 2019] AvgLen: 49.24 | AvgRew: 67.77 | StdRew: 8.12 | Time/Step: 0.0027


Training Episode:   4%|▍         | 2/50 [00:08<03:00,  3.76s/it]

[TRAIN][Step 4072] AvgLen: 54.03 | AvgRew: 74.49 | StdRew: 10.06 | Time/Step: 0.0013


Training Episode:   6%|▌         | 3/50 [00:10<02:30,  3.19s/it]

[TRAIN][Step 6072] AvgLen: 55.56 | AvgRew: 72.63 | StdRew: 9.65 | Time/Step: 0.0013


Training Episode:   8%|▊         | 4/50 [00:13<02:15,  2.94s/it]

[TRAIN][Step 8094] AvgLen: 56.17 | AvgRew: 70.03 | StdRew: 10.26 | Time/Step: 0.0013


Training Episode:  10%|█         | 5/50 [00:15<02:06,  2.80s/it]

[TRAIN][Step 10100] AvgLen: 54.22 | AvgRew: 76.27 | StdRew: 15.49 | Time/Step: 0.0013


Training Episode:  12%|█▏        | 6/50 [00:18<02:00,  2.74s/it]

[TRAIN][Step 12138] AvgLen: 58.23 | AvgRew: 78.83 | StdRew: 13.15 | Time/Step: 0.0013


Training Episode:  14%|█▍        | 7/50 [00:20<01:56,  2.71s/it]

[TRAIN][Step 14194] AvgLen: 60.47 | AvgRew: 76.71 | StdRew: 14.78 | Time/Step: 0.0013


Training Episode:  16%|█▌        | 8/50 [00:23<01:52,  2.67s/it]

[TRAIN][Step 16209] AvgLen: 62.97 | AvgRew: 71.64 | StdRew: 11.42 | Time/Step: 0.0013


Training Episode:  18%|█▊        | 9/50 [00:26<01:48,  2.64s/it]

[TRAIN][Step 18220] AvgLen: 54.35 | AvgRew: 80.46 | StdRew: 14.41 | Time/Step: 0.0013


Training Episode:  20%|██        | 10/50 [00:28<01:45,  2.63s/it]

[TRAIN][Step 20250] AvgLen: 58.00 | AvgRew: 79.28 | StdRew: 13.80 | Time/Step: 0.0013


Training Episode:  22%|██▏       | 11/50 [00:31<01:42,  2.63s/it]

[TRAIN][Step 22274] AvgLen: 59.53 | AvgRew: 76.03 | StdRew: 13.72 | Time/Step: 0.0013


Training Episode:  24%|██▍       | 12/50 [00:33<01:39,  2.61s/it]

[TRAIN][Step 24306] AvgLen: 59.76 | AvgRew: 78.56 | StdRew: 15.65 | Time/Step: 0.0013


Training Episode:  26%|██▌       | 13/50 [00:36<01:36,  2.60s/it]

[TRAIN][Step 26308] AvgLen: 60.67 | AvgRew: 77.77 | StdRew: 13.74 | Time/Step: 0.0013


Training Episode:  28%|██▊       | 14/50 [00:39<01:33,  2.61s/it]

[TRAIN][Step 28325] AvgLen: 61.12 | AvgRew: 76.89 | StdRew: 12.09 | Time/Step: 0.0013


Training Episode:  30%|███       | 15/50 [00:41<01:31,  2.61s/it]

[TRAIN][Step 30333] AvgLen: 64.77 | AvgRew: 79.26 | StdRew: 18.81 | Time/Step: 0.0013


Training Episode:  32%|███▏      | 16/50 [00:44<01:28,  2.60s/it]

[TRAIN][Step 32359] AvgLen: 59.59 | AvgRew: 82.39 | StdRew: 15.58 | Time/Step: 0.0013


Training Episode:  34%|███▍      | 17/50 [00:46<01:25,  2.60s/it]

[TRAIN][Step 34382] AvgLen: 57.80 | AvgRew: 85.80 | StdRew: 13.73 | Time/Step: 0.0013


Training Episode:  36%|███▌      | 18/50 [00:49<01:23,  2.61s/it]

[TRAIN][Step 36404] AvgLen: 61.27 | AvgRew: 85.63 | StdRew: 17.09 | Time/Step: 0.0013


Training Episode:  38%|███▊      | 19/50 [00:52<01:20,  2.60s/it]

[TRAIN][Step 38417] AvgLen: 59.21 | AvgRew: 83.73 | StdRew: 18.62 | Time/Step: 0.0013


Training Episode:  40%|████      | 20/50 [00:54<01:17,  2.60s/it]

[TRAIN][Step 40451] AvgLen: 63.56 | AvgRew: 79.53 | StdRew: 16.84 | Time/Step: 0.0013


Training Episode:  42%|████▏     | 21/50 [00:57<01:15,  2.60s/it]

[TRAIN][Step 42468] AvgLen: 65.06 | AvgRew: 84.54 | StdRew: 20.59 | Time/Step: 0.0013


Training Episode:  44%|████▍     | 22/50 [00:59<01:12,  2.59s/it]

[TRAIN][Step 44474] AvgLen: 66.87 | AvgRew: 92.13 | StdRew: 20.53 | Time/Step: 0.0013


Training Episode:  46%|████▌     | 23/50 [01:02<01:09,  2.59s/it]

[TRAIN][Step 46482] AvgLen: 64.77 | AvgRew: 84.02 | StdRew: 21.73 | Time/Step: 0.0013


Training Episode:  48%|████▊     | 24/50 [01:05<01:07,  2.59s/it]

[TRAIN][Step 48508] AvgLen: 65.35 | AvgRew: 83.50 | StdRew: 18.28 | Time/Step: 0.0013


Training Episode:  50%|█████     | 25/50 [01:07<01:04,  2.58s/it]

[TRAIN][Step 50534] AvgLen: 69.86 | AvgRew: 77.23 | StdRew: 18.61 | Time/Step: 0.0013


Training Episode:  52%|█████▏    | 26/50 [01:10<01:02,  2.58s/it]

[TRAIN][Step 52547] AvgLen: 64.94 | AvgRew: 87.71 | StdRew: 18.90 | Time/Step: 0.0013


Training Episode:  54%|█████▍    | 27/50 [01:12<00:59,  2.59s/it]

[TRAIN][Step 54575] AvgLen: 65.42 | AvgRew: 85.14 | StdRew: 18.97 | Time/Step: 0.0013


Training Episode:  56%|█████▌    | 28/50 [01:15<00:57,  2.61s/it]

[TRAIN][Step 56641] AvgLen: 68.87 | AvgRew: 88.32 | StdRew: 20.68 | Time/Step: 0.0013


Training Episode:  58%|█████▊    | 29/50 [01:17<00:54,  2.60s/it]

[TRAIN][Step 58658] AvgLen: 65.06 | AvgRew: 90.43 | StdRew: 21.08 | Time/Step: 0.0013


Training Episode:  60%|██████    | 30/50 [01:20<00:52,  2.61s/it]

[TRAIN][Step 60732] AvgLen: 66.90 | AvgRew: 82.91 | StdRew: 18.18 | Time/Step: 0.0013


Training Episode:  62%|██████▏   | 31/50 [01:23<00:49,  2.61s/it]

[TRAIN][Step 62772] AvgLen: 63.75 | AvgRew: 87.35 | StdRew: 19.72 | Time/Step: 0.0013


Training Episode:  64%|██████▍   | 32/50 [01:25<00:46,  2.60s/it]

[TRAIN][Step 64778] AvgLen: 66.87 | AvgRew: 79.33 | StdRew: 17.96 | Time/Step: 0.0013


Training Episode:  66%|██████▌   | 33/50 [01:28<00:44,  2.61s/it]

[TRAIN][Step 66822] AvgLen: 63.88 | AvgRew: 86.49 | StdRew: 20.43 | Time/Step: 0.0013


Training Episode:  68%|██████▊   | 34/50 [01:31<00:41,  2.62s/it]

[TRAIN][Step 68843] AvgLen: 63.16 | AvgRew: 85.38 | StdRew: 19.01 | Time/Step: 0.0013


Training Episode:  70%|███████   | 35/50 [01:33<00:39,  2.62s/it]

[TRAIN][Step 70906] AvgLen: 66.55 | AvgRew: 86.53 | StdRew: 17.16 | Time/Step: 0.0013


Training Episode:  72%|███████▏  | 36/50 [01:36<00:36,  2.63s/it]

[TRAIN][Step 72949] AvgLen: 68.10 | AvgRew: 85.50 | StdRew: 21.90 | Time/Step: 0.0013


Training Episode:  74%|███████▍  | 37/50 [01:38<00:33,  2.61s/it]

[TRAIN][Step 74953] AvgLen: 66.80 | AvgRew: 92.40 | StdRew: 21.38 | Time/Step: 0.0013


Training Episode:  76%|███████▌  | 38/50 [01:41<00:31,  2.61s/it]

[TRAIN][Step 76969] AvgLen: 65.03 | AvgRew: 85.34 | StdRew: 19.89 | Time/Step: 0.0013


Training Episode:  78%|███████▊  | 39/50 [01:44<00:28,  2.60s/it]

[TRAIN][Step 78987] AvgLen: 65.10 | AvgRew: 82.47 | StdRew: 20.04 | Time/Step: 0.0013


Training Episode:  80%|████████  | 40/50 [01:46<00:26,  2.61s/it]

[TRAIN][Step 81031] AvgLen: 61.94 | AvgRew: 91.45 | StdRew: 21.58 | Time/Step: 0.0013


Training Episode:  82%|████████▏ | 41/50 [01:49<00:23,  2.62s/it]

[TRAIN][Step 83088] AvgLen: 66.35 | AvgRew: 84.89 | StdRew: 23.28 | Time/Step: 0.0013


Training Episode:  84%|████████▍ | 42/50 [01:51<00:20,  2.61s/it]

[TRAIN][Step 85099] AvgLen: 67.03 | AvgRew: 82.06 | StdRew: 21.84 | Time/Step: 0.0013


Training Episode:  86%|████████▌ | 43/50 [01:54<00:18,  2.59s/it]

[TRAIN][Step 87106] AvgLen: 69.21 | AvgRew: 93.86 | StdRew: 19.71 | Time/Step: 0.0013


Training Episode:  88%|████████▊ | 44/50 [01:57<00:15,  2.61s/it]

[TRAIN][Step 89179] AvgLen: 64.78 | AvgRew: 92.17 | StdRew: 18.90 | Time/Step: 0.0013


Training Episode:  90%|█████████ | 45/50 [01:59<00:13,  2.60s/it]

[TRAIN][Step 91199] AvgLen: 67.33 | AvgRew: 83.52 | StdRew: 21.19 | Time/Step: 0.0013


Training Episode:  92%|█████████▏| 46/50 [02:02<00:10,  2.60s/it]

[TRAIN][Step 93225] AvgLen: 65.35 | AvgRew: 86.56 | StdRew: 23.15 | Time/Step: 0.0013


Training Episode:  94%|█████████▍| 47/50 [02:05<00:07,  2.61s/it]

[TRAIN][Step 95284] AvgLen: 66.42 | AvgRew: 86.45 | StdRew: 19.89 | Time/Step: 0.0013


Training Episode:  96%|█████████▌| 48/50 [02:07<00:05,  2.62s/it]

[TRAIN][Step 97335] AvgLen: 64.09 | AvgRew: 82.49 | StdRew: 18.89 | Time/Step: 0.0013


Training Episode:  98%|█████████▊| 49/50 [02:10<00:02,  2.62s/it]

[TRAIN][Step 99389] AvgLen: 66.26 | AvgRew: 85.20 | StdRew: 19.58 | Time/Step: 0.0013


Training Episode: 100%|██████████| 50/50 [02:12<00:00,  2.66s/it]


[TRAIN][Step 101417] AvgLen: 67.60 | AvgRew: 86.60 | StdRew: 21.84 | Time/Step: 0.0013
Training plot path:  ./logs/naive_dqn/train/plot_data.json


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.51it/s]

[EVAL] ckpt_0.pkl | Episode 1 | Reward: 64.47 | Length: 77
[EVAL] ckpt_0.pkl | Episode 2 | Reward: 76.00 | Length: 44
[EVAL] ckpt_0.pkl | Episode 3 | Reward: 67.87 | Length: 66
[EVAL] ckpt_0.pkl | Episode 4 | Reward: 68.00 | Length: 44


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.90it/s]


[EVAL] ckpt_0.pkl | Episode 5 | Reward: 68.00 | Length: 47
[EVAL][Step 2019] AvgLen: 55.60 | AvgRew: 68.87 | StdRew: 3.81 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.79it/s]

[EVAL] ckpt_1.pkl | Episode 1 | Reward: 65.82 | Length: 65
[EVAL] ckpt_1.pkl | Episode 2 | Reward: 67.94 | Length: 47


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 14.60it/s]

[EVAL] ckpt_1.pkl | Episode 3 | Reward: 76.68 | Length: 57
[EVAL] ckpt_1.pkl | Episode 4 | Reward: 71.58 | Length: 44


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.35it/s]


[EVAL] ckpt_1.pkl | Episode 5 | Reward: 73.27 | Length: 76
[EVAL][Step 4072] AvgLen: 57.80 | AvgRew: 71.06 | StdRew: 3.84 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.32it/s]

[EVAL] ckpt_2.pkl | Episode 1 | Reward: 72.00 | Length: 55
[EVAL] ckpt_2.pkl | Episode 2 | Reward: 80.20 | Length: 53
[EVAL] ckpt_2.pkl | Episode 3 | Reward: 56.00 | Length: 42


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.32it/s]


[EVAL] ckpt_2.pkl | Episode 4 | Reward: 75.70 | Length: 67
[EVAL] ckpt_2.pkl | Episode 5 | Reward: 76.37 | Length: 53
[EVAL][Step 6072] AvgLen: 54.00 | AvgRew: 72.05 | StdRew: 8.44 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_3.pkl | Episode 1 | Reward: 59.09 | Length: 46


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.75it/s]

[EVAL] ckpt_3.pkl | Episode 2 | Reward: 73.90 | Length: 67
[EVAL] ckpt_3.pkl | Episode 3 | Reward: 64.68 | Length: 62


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.95it/s]

[EVAL] ckpt_3.pkl | Episode 4 | Reward: 56.00 | Length: 63


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.96it/s]


[EVAL] ckpt_3.pkl | Episode 5 | Reward: 66.36 | Length: 40
[EVAL][Step 8094] AvgLen: 55.60 | AvgRew: 64.00 | StdRew: 6.20 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_4.pkl | Episode 1 | Reward: 91.43 | Length: 75


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.79it/s]

[EVAL] ckpt_4.pkl | Episode 2 | Reward: 60.00 | Length: 69


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 14.70it/s]

[EVAL] ckpt_4.pkl | Episode 3 | Reward: 73.22 | Length: 39
[EVAL] ckpt_4.pkl | Episode 4 | Reward: 83.77 | Length: 39


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.97it/s]


[EVAL] ckpt_4.pkl | Episode 5 | Reward: 68.00 | Length: 36
[EVAL][Step 10100] AvgLen: 51.60 | AvgRew: 75.28 | StdRew: 11.17 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_5.pkl | Episode 1 | Reward: 87.12 | Length: 55


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.47it/s]

[EVAL] ckpt_5.pkl | Episode 2 | Reward: 60.00 | Length: 69
[EVAL] ckpt_5.pkl | Episode 3 | Reward: 80.00 | Length: 44


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.16it/s]


[EVAL] ckpt_5.pkl | Episode 4 | Reward: 70.06 | Length: 47
[EVAL] ckpt_5.pkl | Episode 5 | Reward: 66.91 | Length: 58
[EVAL][Step 12138] AvgLen: 54.60 | AvgRew: 72.82 | StdRew: 9.62 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_6.pkl | Episode 1 | Reward: 89.23 | Length: 56


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.13it/s]

[EVAL] ckpt_6.pkl | Episode 2 | Reward: 85.12 | Length: 53
[EVAL] ckpt_6.pkl | Episode 3 | Reward: 80.59 | Length: 47


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.58it/s]

[EVAL] ckpt_6.pkl | Episode 4 | Reward: 48.47 | Length: 104


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.59it/s]


[EVAL] ckpt_6.pkl | Episode 5 | Reward: 65.54 | Length: 47
[EVAL][Step 14194] AvgLen: 61.40 | AvgRew: 73.79 | StdRew: 14.98 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 15.54it/s]

[EVAL] ckpt_7.pkl | Episode 1 | Reward: 52.00 | Length: 61
[EVAL] ckpt_7.pkl | Episode 2 | Reward: 91.92 | Length: 38
[EVAL] ckpt_7.pkl | Episode 3 | Reward: 73.51 | Length: 40


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 16.80it/s]

[EVAL] ckpt_7.pkl | Episode 4 | Reward: 98.44 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 16.47it/s]


[EVAL] ckpt_7.pkl | Episode 5 | Reward: 61.62 | Length: 48
[EVAL][Step 16209] AvgLen: 46.60 | AvgRew: 75.50 | StdRew: 17.58 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_8.pkl | Episode 1 | Reward: 80.00 | Length: 65


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.99it/s]

[EVAL] ckpt_8.pkl | Episode 2 | Reward: 76.00 | Length: 64
[EVAL] ckpt_8.pkl | Episode 3 | Reward: 61.54 | Length: 51


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.05it/s]

[EVAL] ckpt_8.pkl | Episode 4 | Reward: 96.62 | Length: 77


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.09it/s]

[EVAL] ckpt_8.pkl | Episode 5 | Reward: 64.29 | Length: 63
[EVAL][Step 18220] AvgLen: 64.00 | AvgRew: 75.69 | StdRew: 12.54 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_9.pkl | Episode 1 | Reward: 78.28 | Length: 37


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.79it/s]

[EVAL] ckpt_9.pkl | Episode 2 | Reward: 52.00 | Length: 75
[EVAL] ckpt_9.pkl | Episode 3 | Reward: 56.66 | Length: 68


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.06it/s]

[EVAL] ckpt_9.pkl | Episode 4 | Reward: 67.58 | Length: 72


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.43it/s]


[EVAL] ckpt_9.pkl | Episode 5 | Reward: 84.00 | Length: 59
[EVAL][Step 20250] AvgLen: 62.20 | AvgRew: 67.70 | StdRew: 12.21 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_10.pkl | Episode 1 | Reward: 76.00 | Length: 62


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.66it/s]

[EVAL] ckpt_10.pkl | Episode 2 | Reward: 60.80 | Length: 59
[EVAL] ckpt_10.pkl | Episode 3 | Reward: 66.58 | Length: 78


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.04it/s]

[EVAL] ckpt_10.pkl | Episode 4 | Reward: 84.00 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.83it/s]


[EVAL] ckpt_10.pkl | Episode 5 | Reward: 77.71 | Length: 46
[EVAL][Step 22274] AvgLen: 60.00 | AvgRew: 73.02 | StdRew: 8.27 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_11.pkl | Episode 1 | Reward: 83.36 | Length: 48


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.02it/s]

[EVAL] ckpt_11.pkl | Episode 2 | Reward: 115.76 | Length: 80
[EVAL] ckpt_11.pkl | Episode 3 | Reward: 60.00 | Length: 45


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.32it/s]

[EVAL] ckpt_11.pkl | Episode 4 | Reward: 60.00 | Length: 63


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.51it/s]


[EVAL] ckpt_11.pkl | Episode 5 | Reward: 114.35 | Length: 73
[EVAL][Step 24306] AvgLen: 61.80 | AvgRew: 86.69 | StdRew: 24.68 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_12.pkl | Episode 1 | Reward: 114.66 | Length: 56


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.93it/s]

[EVAL] ckpt_12.pkl | Episode 2 | Reward: 98.13 | Length: 55


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 15.76it/s]

[EVAL] ckpt_12.pkl | Episode 3 | Reward: 86.60 | Length: 44
[EVAL] ckpt_12.pkl | Episode 4 | Reward: 84.00 | Length: 45


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.75it/s]

[EVAL] ckpt_12.pkl | Episode 5 | Reward: 59.82 | Length: 62
[EVAL][Step 26308] AvgLen: 52.40 | AvgRew: 88.64 | StdRew: 18.02 | Time/Step: 0.0000



Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.77it/s]

[EVAL] ckpt_13.pkl | Episode 1 | Reward: 116.00 | Length: 79


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.82it/s]

[EVAL] ckpt_13.pkl | Episode 2 | Reward: 69.53 | Length: 65
[EVAL] ckpt_13.pkl | Episode 3 | Reward: 95.44 | Length: 56
[EVAL] ckpt_13.pkl | Episode 4 | Reward: 67.39 | Length: 68


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.40it/s]


[EVAL] ckpt_13.pkl | Episode 5 | Reward: 88.99 | Length: 71
[EVAL][Step 28325] AvgLen: 67.80 | AvgRew: 87.47 | StdRew: 17.91 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_14.pkl | Episode 1 | Reward: 90.69 | Length: 56


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.85it/s]

[EVAL] ckpt_14.pkl | Episode 2 | Reward: 67.69 | Length: 65
[EVAL] ckpt_14.pkl | Episode 3 | Reward: 82.83 | Length: 52


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.01it/s]

[EVAL] ckpt_14.pkl | Episode 4 | Reward: 104.31 | Length: 66


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.18it/s]


[EVAL] ckpt_14.pkl | Episode 5 | Reward: 88.00 | Length: 80
[EVAL][Step 30333] AvgLen: 63.80 | AvgRew: 86.71 | StdRew: 11.87 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_15.pkl | Episode 1 | Reward: 84.00 | Length: 46


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 15.63it/s]

[EVAL] ckpt_15.pkl | Episode 2 | Reward: 64.00 | Length: 53
[EVAL] ckpt_15.pkl | Episode 3 | Reward: 72.42 | Length: 80


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.80it/s]

[EVAL] ckpt_15.pkl | Episode 4 | Reward: 91.47 | Length: 56


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.40it/s]


[EVAL] ckpt_15.pkl | Episode 5 | Reward: 60.09 | Length: 77
[EVAL][Step 32359] AvgLen: 62.40 | AvgRew: 74.40 | StdRew: 11.83 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.46it/s]

[EVAL] ckpt_16.pkl | Episode 1 | Reward: 107.22 | Length: 76
[EVAL] ckpt_16.pkl | Episode 2 | Reward: 84.00 | Length: 59
[EVAL] ckpt_16.pkl | Episode 3 | Reward: 78.76 | Length: 49


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.05it/s]

[EVAL] ckpt_16.pkl | Episode 4 | Reward: 114.97 | Length: 81
[EVAL] ckpt_16.pkl | Episode 5 | Reward: 105.65 | Length: 56
[EVAL][Step 34382] AvgLen: 64.20 | AvgRew: 98.12 | StdRew: 14.12 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_17.pkl | Episode 1 | Reward: 68.14 | Length: 48


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.35it/s]

[EVAL] ckpt_17.pkl | Episode 2 | Reward: 63.55 | Length: 60
[EVAL] ckpt_17.pkl | Episode 3 | Reward: 105.42 | Length: 83


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.39it/s]


[EVAL] ckpt_17.pkl | Episode 4 | Reward: 88.00 | Length: 92
[EVAL] ckpt_17.pkl | Episode 5 | Reward: 116.00 | Length: 58
[EVAL][Step 36404] AvgLen: 68.20 | AvgRew: 88.22 | StdRew: 20.39 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_18.pkl | Episode 1 | Reward: 89.27 | Length: 43


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.76it/s]

[EVAL] ckpt_18.pkl | Episode 2 | Reward: 112.00 | Length: 78
[EVAL] ckpt_18.pkl | Episode 3 | Reward: 93.31 | Length: 48


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.65it/s]

[EVAL] ckpt_18.pkl | Episode 4 | Reward: 68.66 | Length: 60


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.46it/s]


[EVAL] ckpt_18.pkl | Episode 5 | Reward: 79.84 | Length: 82
[EVAL][Step 38417] AvgLen: 62.20 | AvgRew: 88.62 | StdRew: 14.45 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_19.pkl | Episode 1 | Reward: 91.20 | Length: 59


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.07it/s]

[EVAL] ckpt_19.pkl | Episode 2 | Reward: 76.00 | Length: 51
[EVAL] ckpt_19.pkl | Episode 3 | Reward: 79.79 | Length: 82


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.83it/s]

[EVAL] ckpt_19.pkl | Episode 4 | Reward: 108.66 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.24it/s]


[EVAL] ckpt_19.pkl | Episode 5 | Reward: 66.19 | Length: 54
[EVAL][Step 40451] AvgLen: 58.40 | AvgRew: 84.37 | StdRew: 14.55 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_20.pkl | Episode 1 | Reward: 78.79 | Length: 59


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.92it/s]

[EVAL] ckpt_20.pkl | Episode 2 | Reward: 64.00 | Length: 83
[EVAL] ckpt_20.pkl | Episode 3 | Reward: 80.00 | Length: 44


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.68it/s]

[EVAL] ckpt_20.pkl | Episode 4 | Reward: 102.97 | Length: 82


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.84it/s]


[EVAL] ckpt_20.pkl | Episode 5 | Reward: 61.69 | Length: 89
[EVAL][Step 42468] AvgLen: 71.40 | AvgRew: 77.49 | StdRew: 14.76 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_21.pkl | Episode 1 | Reward: 100.56 | Length: 54


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.84it/s]

[EVAL] ckpt_21.pkl | Episode 2 | Reward: 88.00 | Length: 57
[EVAL] ckpt_21.pkl | Episode 3 | Reward: 66.99 | Length: 76


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.18it/s]

[EVAL] ckpt_21.pkl | Episode 4 | Reward: 116.00 | Length: 61


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.65it/s]


[EVAL] ckpt_21.pkl | Episode 5 | Reward: 64.49 | Length: 56
[EVAL][Step 44474] AvgLen: 60.80 | AvgRew: 87.21 | StdRew: 19.66 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_22.pkl | Episode 1 | Reward: 64.00 | Length: 50


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.30it/s]

[EVAL] ckpt_22.pkl | Episode 2 | Reward: 74.32 | Length: 66
[EVAL] ckpt_22.pkl | Episode 3 | Reward: 92.00 | Length: 70


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.93it/s]

[EVAL] ckpt_22.pkl | Episode 4 | Reward: 127.06 | Length: 68


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.53it/s]


[EVAL] ckpt_22.pkl | Episode 5 | Reward: 67.48 | Length: 53
[EVAL][Step 46482] AvgLen: 61.40 | AvgRew: 84.97 | StdRew: 23.15 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_23.pkl | Episode 1 | Reward: 60.00 | Length: 60


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.52it/s]

[EVAL] ckpt_23.pkl | Episode 2 | Reward: 84.00 | Length: 54
[EVAL] ckpt_23.pkl | Episode 3 | Reward: 103.52 | Length: 57


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.13it/s]

[EVAL] ckpt_23.pkl | Episode 4 | Reward: 109.57 | Length: 62


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.85it/s]


[EVAL] ckpt_23.pkl | Episode 5 | Reward: 100.00 | Length: 66
[EVAL][Step 48508] AvgLen: 59.80 | AvgRew: 91.42 | StdRew: 17.84 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_24.pkl | Episode 1 | Reward: 88.00 | Length: 63


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.65it/s]

[EVAL] ckpt_24.pkl | Episode 2 | Reward: 96.00 | Length: 70
[EVAL] ckpt_24.pkl | Episode 3 | Reward: 72.23 | Length: 75


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.59it/s]

[EVAL] ckpt_24.pkl | Episode 4 | Reward: 96.00 | Length: 59


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.85it/s]


[EVAL] ckpt_24.pkl | Episode 5 | Reward: 87.90 | Length: 59
[EVAL][Step 50534] AvgLen: 65.20 | AvgRew: 88.03 | StdRew: 8.68 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_25.pkl | Episode 1 | Reward: 64.48 | Length: 49


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.59it/s]

[EVAL] ckpt_25.pkl | Episode 2 | Reward: 107.92 | Length: 74
[EVAL] ckpt_25.pkl | Episode 3 | Reward: 72.00 | Length: 46


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.40it/s]


[EVAL] ckpt_25.pkl | Episode 4 | Reward: 108.00 | Length: 56
[EVAL] ckpt_25.pkl | Episode 5 | Reward: 95.33 | Length: 44
[EVAL][Step 52547] AvgLen: 53.80 | AvgRew: 89.55 | StdRew: 18.15 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_26.pkl | Episode 1 | Reward: 97.54 | Length: 61


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.39it/s]

[EVAL] ckpt_26.pkl | Episode 2 | Reward: 100.59 | Length: 64


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.23it/s]

[EVAL] ckpt_26.pkl | Episode 3 | Reward: 89.73 | Length: 93
[EVAL] ckpt_26.pkl | Episode 4 | Reward: 94.09 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.56it/s]


[EVAL] ckpt_26.pkl | Episode 5 | Reward: 93.45 | Length: 36
[EVAL][Step 54575] AvgLen: 61.80 | AvgRew: 95.08 | StdRew: 3.71 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.97it/s]

[EVAL] ckpt_27.pkl | Episode 1 | Reward: 77.95 | Length: 62
[EVAL] ckpt_27.pkl | Episode 2 | Reward: 112.38 | Length: 68
[EVAL] ckpt_27.pkl | Episode 3 | Reward: 72.00 | Length: 69


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.98it/s]


[EVAL] ckpt_27.pkl | Episode 4 | Reward: 100.91 | Length: 58
[EVAL] ckpt_27.pkl | Episode 5 | Reward: 84.00 | Length: 67
[EVAL][Step 56641] AvgLen: 64.80 | AvgRew: 89.45 | StdRew: 14.99 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_28.pkl | Episode 1 | Reward: 59.66 | Length: 66


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.31it/s]

[EVAL] ckpt_28.pkl | Episode 2 | Reward: 108.00 | Length: 60
[EVAL] ckpt_28.pkl | Episode 3 | Reward: 76.00 | Length: 62


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.68it/s]

[EVAL] ckpt_28.pkl | Episode 4 | Reward: 76.00 | Length: 58


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.64it/s]


[EVAL] ckpt_28.pkl | Episode 5 | Reward: 51.51 | Length: 88
[EVAL][Step 58658] AvgLen: 66.80 | AvgRew: 74.23 | StdRew: 19.37 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.83it/s]

[EVAL] ckpt_29.pkl | Episode 1 | Reward: 120.95 | Length: 72
[EVAL] ckpt_29.pkl | Episode 2 | Reward: 72.00 | Length: 71
[EVAL] ckpt_29.pkl | Episode 3 | Reward: 60.76 | Length: 74


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.40it/s]


[EVAL] ckpt_29.pkl | Episode 4 | Reward: 72.15 | Length: 51
[EVAL] ckpt_29.pkl | Episode 5 | Reward: 76.00 | Length: 44
[EVAL][Step 60732] AvgLen: 62.40 | AvgRew: 80.37 | StdRew: 20.92 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_30.pkl | Episode 1 | Reward: 106.90 | Length: 59


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.07it/s]

[EVAL] ckpt_30.pkl | Episode 2 | Reward: 100.00 | Length: 70
[EVAL] ckpt_30.pkl | Episode 3 | Reward: 122.38 | Length: 61


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.51it/s]

[EVAL] ckpt_30.pkl | Episode 4 | Reward: 89.46 | Length: 79


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.02it/s]


[EVAL] ckpt_30.pkl | Episode 5 | Reward: 72.00 | Length: 55
[EVAL][Step 62772] AvgLen: 64.80 | AvgRew: 98.15 | StdRew: 16.89 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_31.pkl | Episode 1 | Reward: 72.00 | Length: 65


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.13it/s]

[EVAL] ckpt_31.pkl | Episode 2 | Reward: 129.56 | Length: 63
[EVAL] ckpt_31.pkl | Episode 3 | Reward: 77.87 | Length: 79


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.31it/s]

[EVAL] ckpt_31.pkl | Episode 4 | Reward: 76.00 | Length: 65


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.47it/s]


[EVAL] ckpt_31.pkl | Episode 5 | Reward: 72.42 | Length: 39
[EVAL][Step 64778] AvgLen: 62.20 | AvgRew: 85.57 | StdRew: 22.11 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_32.pkl | Episode 1 | Reward: 137.42 | Length: 68


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.67it/s]

[EVAL] ckpt_32.pkl | Episode 2 | Reward: 106.38 | Length: 65
[EVAL] ckpt_32.pkl | Episode 3 | Reward: 83.16 | Length: 73


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.33it/s]

[EVAL] ckpt_32.pkl | Episode 4 | Reward: 74.22 | Length: 67


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.54it/s]


[EVAL] ckpt_32.pkl | Episode 5 | Reward: 104.00 | Length: 63
[EVAL][Step 66822] AvgLen: 67.20 | AvgRew: 101.04 | StdRew: 21.91 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_33.pkl | Episode 1 | Reward: 65.94 | Length: 69


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.54it/s]

[EVAL] ckpt_33.pkl | Episode 2 | Reward: 95.95 | Length: 46
[EVAL] ckpt_33.pkl | Episode 3 | Reward: 56.00 | Length: 93


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.77it/s]

[EVAL] ckpt_33.pkl | Episode 4 | Reward: 106.79 | Length: 89
[EVAL] ckpt_33.pkl | Episode 5 | Reward: 107.16 | Length: 65
[EVAL][Step 68843] AvgLen: 72.40 | AvgRew: 86.37 | StdRew: 21.36 | Time/Step: 0.0000



Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_34.pkl | Episode 1 | Reward: 72.00 | Length: 64


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.90it/s]

[EVAL] ckpt_34.pkl | Episode 2 | Reward: 85.72 | Length: 56
[EVAL] ckpt_34.pkl | Episode 3 | Reward: 101.14 | Length: 65


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.96it/s]

[EVAL] ckpt_34.pkl | Episode 4 | Reward: 100.00 | Length: 54


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.89it/s]


[EVAL] ckpt_34.pkl | Episode 5 | Reward: 88.38 | Length: 117
[EVAL][Step 70906] AvgLen: 71.20 | AvgRew: 89.45 | StdRew: 10.65 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 15.34it/s]

[EVAL] ckpt_35.pkl | Episode 1 | Reward: 110.98 | Length: 44
[EVAL] ckpt_35.pkl | Episode 2 | Reward: 109.14 | Length: 57
[EVAL] ckpt_35.pkl | Episode 3 | Reward: 112.39 | Length: 58


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.23it/s]


[EVAL] ckpt_35.pkl | Episode 4 | Reward: 120.08 | Length: 67
[EVAL] ckpt_35.pkl | Episode 5 | Reward: 95.11 | Length: 66
[EVAL][Step 72949] AvgLen: 58.40 | AvgRew: 109.54 | StdRew: 8.12 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_36.pkl | Episode 1 | Reward: 111.04 | Length: 69


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.13it/s]

[EVAL] ckpt_36.pkl | Episode 2 | Reward: 99.79 | Length: 59
[EVAL] ckpt_36.pkl | Episode 3 | Reward: 72.00 | Length: 67


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.67it/s]

[EVAL] ckpt_36.pkl | Episode 4 | Reward: 105.58 | Length: 70


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.07it/s]


[EVAL] ckpt_36.pkl | Episode 5 | Reward: 97.67 | Length: 85
[EVAL][Step 74953] AvgLen: 70.00 | AvgRew: 97.22 | StdRew: 13.44 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_37.pkl | Episode 1 | Reward: 104.00 | Length: 47


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.63it/s]

[EVAL] ckpt_37.pkl | Episode 2 | Reward: 115.20 | Length: 58
[EVAL] ckpt_37.pkl | Episode 3 | Reward: 109.16 | Length: 75


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.92it/s]

[EVAL] ckpt_37.pkl | Episode 4 | Reward: 60.00 | Length: 72


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.99it/s]


[EVAL] ckpt_37.pkl | Episode 5 | Reward: 69.29 | Length: 45
[EVAL][Step 76969] AvgLen: 59.40 | AvgRew: 91.53 | StdRew: 22.43 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_38.pkl | Episode 1 | Reward: 58.68 | Length: 65


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.40it/s]

[EVAL] ckpt_38.pkl | Episode 2 | Reward: 98.09 | Length: 60
[EVAL] ckpt_38.pkl | Episode 3 | Reward: 64.29 | Length: 67


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.16it/s]

[EVAL] ckpt_38.pkl | Episode 4 | Reward: 140.54 | Length: 62


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.07it/s]


[EVAL] ckpt_38.pkl | Episode 5 | Reward: 52.00 | Length: 67
[EVAL][Step 78987] AvgLen: 64.20 | AvgRew: 82.72 | StdRew: 32.99 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  8.73it/s]

[EVAL] ckpt_39.pkl | Episode 1 | Reward: 69.92 | Length: 89


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 12.53it/s]

[EVAL] ckpt_39.pkl | Episode 2 | Reward: 80.00 | Length: 47
[EVAL] ckpt_39.pkl | Episode 3 | Reward: 92.64 | Length: 57
[EVAL] ckpt_39.pkl | Episode 4 | Reward: 88.00 | Length: 60


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.37it/s]


[EVAL] ckpt_39.pkl | Episode 5 | Reward: 84.00 | Length: 59
[EVAL][Step 81031] AvgLen: 62.40 | AvgRew: 82.91 | StdRew: 7.73 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_40.pkl | Episode 1 | Reward: 122.45 | Length: 61


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.73it/s]

[EVAL] ckpt_40.pkl | Episode 2 | Reward: 80.96 | Length: 84
[EVAL] ckpt_40.pkl | Episode 3 | Reward: 68.21 | Length: 64


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.83it/s]

[EVAL] ckpt_40.pkl | Episode 4 | Reward: 68.00 | Length: 59


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.55it/s]


[EVAL] ckpt_40.pkl | Episode 5 | Reward: 96.39 | Length: 70
[EVAL][Step 83088] AvgLen: 67.60 | AvgRew: 87.20 | StdRew: 20.47 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_41.pkl | Episode 1 | Reward: 86.17 | Length: 43


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.44it/s]

[EVAL] ckpt_41.pkl | Episode 2 | Reward: 44.00 | Length: 106
[EVAL] ckpt_41.pkl | Episode 3 | Reward: 110.13 | Length: 47


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.18it/s]

[EVAL] ckpt_41.pkl | Episode 4 | Reward: 98.80 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.20it/s]

[EVAL] ckpt_41.pkl | Episode 5 | Reward: 48.74 | Length: 102
[EVAL][Step 85099] AvgLen: 69.20 | AvgRew: 77.57 | StdRew: 26.62 | Time/Step: 0.0000



Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.39it/s]

[EVAL] ckpt_42.pkl | Episode 1 | Reward: 60.00 | Length: 83


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00,  9.59it/s]

[EVAL] ckpt_42.pkl | Episode 2 | Reward: 56.00 | Length: 80
[EVAL] ckpt_42.pkl | Episode 3 | Reward: 93.46 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00,  9.27it/s]


[EVAL] ckpt_42.pkl | Episode 4 | Reward: 139.29 | Length: 131
[EVAL] ckpt_42.pkl | Episode 5 | Reward: 92.40 | Length: 62
[EVAL][Step 87106] AvgLen: 84.00 | AvgRew: 88.23 | StdRew: 29.96 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.19it/s]

[EVAL] ckpt_43.pkl | Episode 1 | Reward: 77.68 | Length: 85


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 10.13it/s]

[EVAL] ckpt_43.pkl | Episode 2 | Reward: 68.93 | Length: 60
[EVAL] ckpt_43.pkl | Episode 3 | Reward: 44.09 | Length: 88
[EVAL] ckpt_43.pkl | Episode 4 | Reward: 89.73 | Length: 63


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.46it/s]


[EVAL] ckpt_43.pkl | Episode 5 | Reward: 125.81 | Length: 75
[EVAL][Step 89179] AvgLen: 74.20 | AvgRew: 81.25 | StdRew: 26.85 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_44.pkl | Episode 1 | Reward: 68.00 | Length: 72


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.85it/s]

[EVAL] ckpt_44.pkl | Episode 2 | Reward: 87.67 | Length: 72
[EVAL] ckpt_44.pkl | Episode 3 | Reward: 100.24 | Length: 61


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.18it/s]

[EVAL] ckpt_44.pkl | Episode 4 | Reward: 122.38 | Length: 75


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.05it/s]


[EVAL] ckpt_44.pkl | Episode 5 | Reward: 68.00 | Length: 73
[EVAL][Step 91199] AvgLen: 70.60 | AvgRew: 89.26 | StdRew: 20.61 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_45.pkl | Episode 1 | Reward: 106.09 | Length: 74


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.41it/s]

[EVAL] ckpt_45.pkl | Episode 2 | Reward: 80.00 | Length: 52
[EVAL] ckpt_45.pkl | Episode 3 | Reward: 68.00 | Length: 72


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.41it/s]

[EVAL] ckpt_45.pkl | Episode 4 | Reward: 60.00 | Length: 73


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.91it/s]


[EVAL] ckpt_45.pkl | Episode 5 | Reward: 80.00 | Length: 57
[EVAL][Step 93225] AvgLen: 65.60 | AvgRew: 78.82 | StdRew: 15.61 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.30it/s]

[EVAL] ckpt_46.pkl | Episode 1 | Reward: 90.50 | Length: 84
[EVAL] ckpt_46.pkl | Episode 2 | Reward: 65.42 | Length: 44


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.90it/s]

[EVAL] ckpt_46.pkl | Episode 3 | Reward: 108.00 | Length: 74
[EVAL] ckpt_46.pkl | Episode 4 | Reward: 65.84 | Length: 56


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.03it/s]


[EVAL] ckpt_46.pkl | Episode 5 | Reward: 100.43 | Length: 66
[EVAL][Step 95284] AvgLen: 64.80 | AvgRew: 86.04 | StdRew: 17.56 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_47.pkl | Episode 1 | Reward: 68.16 | Length: 42


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.77it/s]

[EVAL] ckpt_47.pkl | Episode 2 | Reward: 60.15 | Length: 91


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 10.88it/s]

[EVAL] ckpt_47.pkl | Episode 3 | Reward: 48.00 | Length: 82
[EVAL] ckpt_47.pkl | Episode 4 | Reward: 63.68 | Length: 70


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.92it/s]


[EVAL] ckpt_47.pkl | Episode 5 | Reward: 128.55 | Length: 73
[EVAL][Step 97335] AvgLen: 71.60 | AvgRew: 73.71 | StdRew: 28.23 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.65it/s]

[EVAL] ckpt_48.pkl | Episode 1 | Reward: 62.52 | Length: 63
[EVAL] ckpt_48.pkl | Episode 2 | Reward: 72.00 | Length: 51
[EVAL] ckpt_48.pkl | Episode 3 | Reward: 109.10 | Length: 67


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.68it/s]


[EVAL] ckpt_48.pkl | Episode 4 | Reward: 120.46 | Length: 75
[EVAL] ckpt_48.pkl | Episode 5 | Reward: 109.12 | Length: 75
[EVAL][Step 99389] AvgLen: 66.20 | AvgRew: 94.64 | StdRew: 22.93 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_49.pkl | Episode 1 | Reward: 80.00 | Length: 63


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.42it/s]

[EVAL] ckpt_49.pkl | Episode 2 | Reward: 77.35 | Length: 73


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 10.94it/s]

[EVAL] ckpt_49.pkl | Episode 3 | Reward: 64.54 | Length: 85
[EVAL] ckpt_49.pkl | Episode 4 | Reward: 68.00 | Length: 61


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.31it/s]

[EVAL] ckpt_49.pkl | Episode 5 | Reward: 102.30 | Length: 94
[EVAL][Step 101417] AvgLen: 75.20 | AvgRew: 78.44 | StdRew: 13.23 | Time/Step: 0.0000
Plot path:  ./logs/naive_dqn/train/eval_plot_data.json





In [31]:
# Double DQN
from double_dqn import DoubleDQNAgent
tmp_base_dir = './logs/double_dqn'

def create_double_dqn_agent(env, **kwargs):
    doc_obs_space = env.observation_space['doc']

    if isinstance(doc_obs_space, spaces.Dict):
        first_key = list(doc_obs_space.spaces.keys())[0]
        per_doc_dim = doc_obs_space.spaces[first_key].shape[0]
        num_docs = len(doc_obs_space.spaces)
        obs_dim = per_doc_dim * num_docs
    else:
        raise ValueError("Unsupported doc observation space:", doc_obs_space)

    if isinstance(env.action_space, spaces.Discrete):
        n_actions = env.action_space.n
    elif isinstance(env.action_space, spaces.MultiDiscrete):
        n_actions = env.action_space.nvec[0]
    else:
        raise ValueError("Unsupported action space type")

    print(f"[DoubleDQN] obs_dim: {obs_dim}, n_actions: {n_actions}")
    return DoubleDQNAgent(
        obs_dim=obs_dim,
        n_actions=n_actions,
        epsilon=kwargs.get('epsilon', 0.1),
        gamma=kwargs.get('gamma', 0.99),
        lr=kwargs.get('lr', 1e-3),
        tau=kwargs.get('tau', 0.005)  # soft update rate
    )
train_log_dir = os.path.join(tmp_base_dir, 'train')
eval_log_dir = os.path.join(tmp_base_dir, 'eval')

for log_dir in [train_log_dir, eval_log_dir]:
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)

Path(tmp_base_dir).mkdir(parents=True, exist_ok=True)
runner_double_dqn = Runner(
    base_dir=tmp_base_dir,
    create_agent_fn=create_double_dqn_agent,
    env=interest_evolution.create_environment(env_config),
)

[DoubleDQN] obs_dim: 70, n_actions: 10


In [32]:
runner_double_dqn.run_training(max_training_steps=2000, num_iterations=50)
runner_double_dqn.run_evaluation(max_eval_episodes=5)

Training Episode:   2%|▏         | 1/50 [00:02<02:21,  2.88s/it]

[TRAIN][Step 2040] AvgLen: 46.36 | AvgRew: 67.99 | StdRew: 6.52 | Time/Step: 0.0014


Training Episode:   4%|▍         | 2/50 [00:05<02:18,  2.89s/it]

[TRAIN][Step 4072] AvgLen: 50.80 | AvgRew: 68.95 | StdRew: 7.43 | Time/Step: 0.0014


Training Episode:   6%|▌         | 3/50 [00:08<02:16,  2.90s/it]

[TRAIN][Step 6099] AvgLen: 49.44 | AvgRew: 68.08 | StdRew: 6.50 | Time/Step: 0.0014


Training Episode:   8%|▊         | 4/50 [00:11<02:14,  2.92s/it]

[TRAIN][Step 8157] AvgLen: 51.45 | AvgRew: 69.73 | StdRew: 8.86 | Time/Step: 0.0014


Training Episode:  10%|█         | 5/50 [00:14<02:11,  2.91s/it]

[TRAIN][Step 10187] AvgLen: 52.05 | AvgRew: 71.75 | StdRew: 10.21 | Time/Step: 0.0014


Training Episode:  12%|█▏        | 6/50 [00:17<02:08,  2.93s/it]

[TRAIN][Step 12241] AvgLen: 52.67 | AvgRew: 70.48 | StdRew: 8.76 | Time/Step: 0.0014


Training Episode:  14%|█▍        | 7/50 [00:20<02:05,  2.92s/it]

[TRAIN][Step 14257] AvgLen: 59.29 | AvgRew: 73.43 | StdRew: 11.42 | Time/Step: 0.0014


Training Episode:  16%|█▌        | 8/50 [00:23<02:03,  2.93s/it]

[TRAIN][Step 16298] AvgLen: 56.69 | AvgRew: 73.13 | StdRew: 12.94 | Time/Step: 0.0014


Training Episode:  18%|█▊        | 9/50 [00:26<01:59,  2.93s/it]

[TRAIN][Step 18306] AvgLen: 57.37 | AvgRew: 74.05 | StdRew: 11.22 | Time/Step: 0.0014


Training Episode:  20%|██        | 10/50 [00:29<01:56,  2.91s/it]

[TRAIN][Step 20308] AvgLen: 57.20 | AvgRew: 75.72 | StdRew: 11.67 | Time/Step: 0.0014


Training Episode:  22%|██▏       | 11/50 [00:32<01:54,  2.92s/it]

[TRAIN][Step 22352] AvgLen: 56.78 | AvgRew: 78.77 | StdRew: 14.93 | Time/Step: 0.0014


Training Episode:  24%|██▍       | 12/50 [00:35<01:51,  2.93s/it]

[TRAIN][Step 24396] AvgLen: 61.94 | AvgRew: 79.34 | StdRew: 14.21 | Time/Step: 0.0014


Training Episode:  26%|██▌       | 13/50 [00:38<01:48,  2.94s/it]

[TRAIN][Step 26432] AvgLen: 59.88 | AvgRew: 82.60 | StdRew: 17.19 | Time/Step: 0.0014


Training Episode:  28%|██▊       | 14/50 [00:40<01:46,  2.94s/it]

[TRAIN][Step 28459] AvgLen: 61.42 | AvgRew: 78.10 | StdRew: 15.23 | Time/Step: 0.0014


Training Episode:  30%|███       | 15/50 [00:43<01:42,  2.94s/it]

[TRAIN][Step 30477] AvgLen: 59.35 | AvgRew: 79.54 | StdRew: 16.16 | Time/Step: 0.0014


Training Episode:  32%|███▏      | 16/50 [00:46<01:40,  2.95s/it]

[TRAIN][Step 32550] AvgLen: 60.97 | AvgRew: 80.01 | StdRew: 13.74 | Time/Step: 0.0014


Training Episode:  34%|███▍      | 17/50 [00:49<01:37,  2.95s/it]

[TRAIN][Step 34583] AvgLen: 58.09 | AvgRew: 81.87 | StdRew: 17.00 | Time/Step: 0.0014


Training Episode:  36%|███▌      | 18/50 [00:52<01:34,  2.94s/it]

[TRAIN][Step 36598] AvgLen: 57.57 | AvgRew: 83.12 | StdRew: 15.51 | Time/Step: 0.0014


Training Episode:  38%|███▊      | 19/50 [00:55<01:31,  2.94s/it]

[TRAIN][Step 38637] AvgLen: 59.97 | AvgRew: 74.04 | StdRew: 14.88 | Time/Step: 0.0014


Training Episode:  40%|████      | 20/50 [00:58<01:28,  2.94s/it]

[TRAIN][Step 40669] AvgLen: 63.50 | AvgRew: 79.02 | StdRew: 15.69 | Time/Step: 0.0014


Training Episode:  42%|████▏     | 21/50 [01:01<01:25,  2.95s/it]

[TRAIN][Step 42714] AvgLen: 61.97 | AvgRew: 82.89 | StdRew: 15.59 | Time/Step: 0.0014


Training Episode:  44%|████▍     | 22/50 [01:04<01:22,  2.96s/it]

[TRAIN][Step 44772] AvgLen: 60.53 | AvgRew: 80.86 | StdRew: 16.32 | Time/Step: 0.0014


Training Episode:  46%|████▌     | 23/50 [01:07<01:19,  2.95s/it]

[TRAIN][Step 46786] AvgLen: 59.24 | AvgRew: 79.20 | StdRew: 13.06 | Time/Step: 0.0014


Training Episode:  48%|████▊     | 24/50 [01:10<01:16,  2.95s/it]

[TRAIN][Step 48824] AvgLen: 63.69 | AvgRew: 82.44 | StdRew: 18.22 | Time/Step: 0.0014


Training Episode:  50%|█████     | 25/50 [01:13<01:13,  2.93s/it]

[TRAIN][Step 50831] AvgLen: 62.72 | AvgRew: 85.47 | StdRew: 18.59 | Time/Step: 0.0014


Training Episode:  52%|█████▏    | 26/50 [01:16<01:10,  2.95s/it]

[TRAIN][Step 52914] AvgLen: 67.19 | AvgRew: 81.01 | StdRew: 16.12 | Time/Step: 0.0014


Training Episode:  54%|█████▍    | 27/50 [01:19<01:07,  2.94s/it]

[TRAIN][Step 54950] AvgLen: 65.68 | AvgRew: 84.01 | StdRew: 18.40 | Time/Step: 0.0014


Training Episode:  56%|█████▌    | 28/50 [01:22<01:04,  2.94s/it]

[TRAIN][Step 56991] AvgLen: 63.78 | AvgRew: 79.79 | StdRew: 16.72 | Time/Step: 0.0014


Training Episode:  58%|█████▊    | 29/50 [01:25<01:01,  2.93s/it]

[TRAIN][Step 59015] AvgLen: 59.53 | AvgRew: 78.56 | StdRew: 16.64 | Time/Step: 0.0014


Training Episode:  60%|██████    | 30/50 [01:27<00:58,  2.91s/it]

[TRAIN][Step 61021] AvgLen: 60.79 | AvgRew: 77.12 | StdRew: 13.07 | Time/Step: 0.0014


Training Episode:  62%|██████▏   | 31/50 [01:30<00:55,  2.91s/it]

[TRAIN][Step 63032] AvgLen: 60.94 | AvgRew: 79.48 | StdRew: 16.58 | Time/Step: 0.0014


Training Episode:  64%|██████▍   | 32/50 [01:33<00:52,  2.90s/it]

[TRAIN][Step 65059] AvgLen: 59.62 | AvgRew: 80.35 | StdRew: 17.16 | Time/Step: 0.0014


Training Episode:  66%|██████▌   | 33/50 [01:36<00:49,  2.90s/it]

[TRAIN][Step 67073] AvgLen: 53.00 | AvgRew: 76.74 | StdRew: 11.43 | Time/Step: 0.0014


Training Episode:  68%|██████▊   | 34/50 [01:39<00:46,  2.91s/it]

[TRAIN][Step 69117] AvgLen: 60.12 | AvgRew: 80.99 | StdRew: 15.98 | Time/Step: 0.0014


Training Episode:  70%|███████   | 35/50 [01:42<00:43,  2.89s/it]

[TRAIN][Step 71123] AvgLen: 59.00 | AvgRew: 75.83 | StdRew: 12.64 | Time/Step: 0.0014


Training Episode:  72%|███████▏  | 36/50 [01:45<00:40,  2.89s/it]

[TRAIN][Step 73139] AvgLen: 61.09 | AvgRew: 80.72 | StdRew: 15.71 | Time/Step: 0.0014


Training Episode:  74%|███████▍  | 37/50 [01:48<00:37,  2.88s/it]

[TRAIN][Step 75142] AvgLen: 55.64 | AvgRew: 80.73 | StdRew: 13.12 | Time/Step: 0.0014


Training Episode:  76%|███████▌  | 38/50 [01:51<00:34,  2.90s/it]

[TRAIN][Step 77200] AvgLen: 58.80 | AvgRew: 75.55 | StdRew: 14.06 | Time/Step: 0.0014


Training Episode:  78%|███████▊  | 39/50 [01:54<00:31,  2.90s/it]

[TRAIN][Step 79244] AvgLen: 61.94 | AvgRew: 79.90 | StdRew: 18.46 | Time/Step: 0.0014


Training Episode:  80%|████████  | 40/50 [01:56<00:29,  2.90s/it]

[TRAIN][Step 81275] AvgLen: 59.74 | AvgRew: 82.84 | StdRew: 13.18 | Time/Step: 0.0014


Training Episode:  82%|████████▏ | 41/50 [01:59<00:26,  2.91s/it]

[TRAIN][Step 83318] AvgLen: 60.09 | AvgRew: 83.52 | StdRew: 15.20 | Time/Step: 0.0014


Training Episode:  84%|████████▍ | 42/50 [02:02<00:23,  2.90s/it]

[TRAIN][Step 85326] AvgLen: 55.78 | AvgRew: 78.88 | StdRew: 12.03 | Time/Step: 0.0014


Training Episode:  86%|████████▌ | 43/50 [02:05<00:20,  2.90s/it]

[TRAIN][Step 87355] AvgLen: 61.48 | AvgRew: 80.00 | StdRew: 15.47 | Time/Step: 0.0014


Training Episode:  88%|████████▊ | 44/50 [02:08<00:17,  2.90s/it]

[TRAIN][Step 89355] AvgLen: 62.50 | AvgRew: 79.94 | StdRew: 15.04 | Time/Step: 0.0014


Training Episode:  90%|█████████ | 45/50 [02:11<00:14,  2.90s/it]

[TRAIN][Step 91394] AvgLen: 61.79 | AvgRew: 77.41 | StdRew: 14.81 | Time/Step: 0.0014


Training Episode:  92%|█████████▏| 46/50 [02:14<00:11,  2.90s/it]

[TRAIN][Step 93443] AvgLen: 60.26 | AvgRew: 79.51 | StdRew: 13.80 | Time/Step: 0.0014


Training Episode:  94%|█████████▍| 47/50 [02:17<00:08,  2.91s/it]

[TRAIN][Step 95487] AvgLen: 58.40 | AvgRew: 76.26 | StdRew: 13.22 | Time/Step: 0.0014


Training Episode:  96%|█████████▌| 48/50 [02:20<00:05,  2.89s/it]

[TRAIN][Step 97490] AvgLen: 58.91 | AvgRew: 76.04 | StdRew: 15.87 | Time/Step: 0.0014


Training Episode:  98%|█████████▊| 49/50 [02:23<00:02,  2.90s/it]

[TRAIN][Step 99548] AvgLen: 58.80 | AvgRew: 79.75 | StdRew: 16.08 | Time/Step: 0.0014


Training Episode: 100%|██████████| 50/50 [02:25<00:00,  2.92s/it]


[TRAIN][Step 101568] AvgLen: 59.41 | AvgRew: 81.50 | StdRew: 12.55 | Time/Step: 0.0014
Training plot path:  ./logs/double_dqn/train/plot_data.json


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 15.33it/s]

[EVAL] ckpt_0.pkl | Episode 1 | Reward: 68.04 | Length: 44
[EVAL] ckpt_0.pkl | Episode 2 | Reward: 57.97 | Length: 47


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 15.21it/s]

[EVAL] ckpt_0.pkl | Episode 3 | Reward: 80.00 | Length: 52
[EVAL] ckpt_0.pkl | Episode 4 | Reward: 68.00 | Length: 40


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 15.54it/s]


[EVAL] ckpt_0.pkl | Episode 5 | Reward: 64.00 | Length: 41
[EVAL][Step 2040] AvgLen: 44.80 | AvgRew: 67.60 | StdRew: 7.21 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_1.pkl | Episode 1 | Reward: 82.61 | Length: 46


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 15.58it/s]

[EVAL] ckpt_1.pkl | Episode 2 | Reward: 74.72 | Length: 43


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 15.23it/s]

[EVAL] ckpt_1.pkl | Episode 3 | Reward: 64.00 | Length: 53
[EVAL] ckpt_1.pkl | Episode 4 | Reward: 65.24 | Length: 40


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 15.37it/s]


[EVAL] ckpt_1.pkl | Episode 5 | Reward: 84.00 | Length: 44
[EVAL][Step 4072] AvgLen: 45.20 | AvgRew: 74.11 | StdRew: 8.38 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.84it/s]

[EVAL] ckpt_2.pkl | Episode 1 | Reward: 76.00 | Length: 71
[EVAL] ckpt_2.pkl | Episode 2 | Reward: 66.49 | Length: 40


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.88it/s]

[EVAL] ckpt_2.pkl | Episode 3 | Reward: 70.48 | Length: 69


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.71it/s]


[EVAL] ckpt_2.pkl | Episode 4 | Reward: 62.34 | Length: 45
[EVAL] ckpt_2.pkl | Episode 5 | Reward: 80.00 | Length: 49
[EVAL][Step 6099] AvgLen: 54.80 | AvgRew: 71.06 | StdRew: 6.35 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 17.37it/s]

[EVAL] ckpt_3.pkl | Episode 1 | Reward: 73.82 | Length: 46
[EVAL] ckpt_3.pkl | Episode 2 | Reward: 64.00 | Length: 34


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 15.54it/s]

[EVAL] ckpt_3.pkl | Episode 3 | Reward: 68.00 | Length: 41
[EVAL] ckpt_3.pkl | Episode 4 | Reward: 76.01 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 15.29it/s]


[EVAL] ckpt_3.pkl | Episode 5 | Reward: 67.68 | Length: 51
[EVAL][Step 8157] AvgLen: 45.40 | AvgRew: 69.90 | StdRew: 4.38 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.72it/s]

[EVAL] ckpt_4.pkl | Episode 1 | Reward: 92.00 | Length: 47
[EVAL] ckpt_4.pkl | Episode 2 | Reward: 72.00 | Length: 54
[EVAL] ckpt_4.pkl | Episode 3 | Reward: 76.00 | Length: 52


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.06it/s]


[EVAL] ckpt_4.pkl | Episode 4 | Reward: 68.00 | Length: 57
[EVAL] ckpt_4.pkl | Episode 5 | Reward: 72.42 | Length: 37
[EVAL][Step 10187] AvgLen: 49.40 | AvgRew: 76.08 | StdRew: 8.35 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_5.pkl | Episode 1 | Reward: 80.00 | Length: 63


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.97it/s]

[EVAL] ckpt_5.pkl | Episode 2 | Reward: 64.00 | Length: 64
[EVAL] ckpt_5.pkl | Episode 3 | Reward: 84.58 | Length: 49


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.77it/s]

[EVAL] ckpt_5.pkl | Episode 4 | Reward: 79.89 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.06it/s]


[EVAL] ckpt_5.pkl | Episode 5 | Reward: 58.37 | Length: 65
[EVAL][Step 12241] AvgLen: 57.60 | AvgRew: 73.37 | StdRew: 10.24 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_6.pkl | Episode 1 | Reward: 59.33 | Length: 62


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.12it/s]

[EVAL] ckpt_6.pkl | Episode 2 | Reward: 61.90 | Length: 53
[EVAL] ckpt_6.pkl | Episode 3 | Reward: 64.00 | Length: 43


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.12it/s]

[EVAL] ckpt_6.pkl | Episode 4 | Reward: 64.00 | Length: 57


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.44it/s]


[EVAL] ckpt_6.pkl | Episode 5 | Reward: 64.32 | Length: 44
[EVAL][Step 14257] AvgLen: 51.80 | AvgRew: 62.71 | StdRew: 1.90 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_7.pkl | Episode 1 | Reward: 71.81 | Length: 41


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.13it/s]

[EVAL] ckpt_7.pkl | Episode 2 | Reward: 68.00 | Length: 65
[EVAL] ckpt_7.pkl | Episode 3 | Reward: 74.03 | Length: 45


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.79it/s]

[EVAL] ckpt_7.pkl | Episode 4 | Reward: 88.62 | Length: 52


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.62it/s]


[EVAL] ckpt_7.pkl | Episode 5 | Reward: 88.00 | Length: 72
[EVAL][Step 16298] AvgLen: 55.00 | AvgRew: 78.09 | StdRew: 8.56 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_8.pkl | Episode 1 | Reward: 74.50 | Length: 48


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.60it/s]

[EVAL] ckpt_8.pkl | Episode 2 | Reward: 65.55 | Length: 53
[EVAL] ckpt_8.pkl | Episode 3 | Reward: 91.18 | Length: 50


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.20it/s]

[EVAL] ckpt_8.pkl | Episode 4 | Reward: 85.57 | Length: 57


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.74it/s]


[EVAL] ckpt_8.pkl | Episode 5 | Reward: 80.00 | Length: 43
[EVAL][Step 18306] AvgLen: 50.20 | AvgRew: 79.36 | StdRew: 8.87 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_9.pkl | Episode 1 | Reward: 72.00 | Length: 35


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.19it/s]

[EVAL] ckpt_9.pkl | Episode 2 | Reward: 80.00 | Length: 87


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.64it/s]

[EVAL] ckpt_9.pkl | Episode 3 | Reward: 70.89 | Length: 69
[EVAL] ckpt_9.pkl | Episode 4 | Reward: 77.38 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.83it/s]


[EVAL] ckpt_9.pkl | Episode 5 | Reward: 67.50 | Length: 53
[EVAL][Step 20308] AvgLen: 58.20 | AvgRew: 73.55 | StdRew: 4.53 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.51it/s]

[EVAL] ckpt_10.pkl | Episode 1 | Reward: 93.67 | Length: 55
[EVAL] ckpt_10.pkl | Episode 2 | Reward: 68.00 | Length: 77
[EVAL] ckpt_10.pkl | Episode 3 | Reward: 80.81 | Length: 47


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.06it/s]

[EVAL] ckpt_10.pkl | Episode 4 | Reward: 64.00 | Length: 74


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.71it/s]


[EVAL] ckpt_10.pkl | Episode 5 | Reward: 101.70 | Length: 71
[EVAL][Step 22352] AvgLen: 64.80 | AvgRew: 81.63 | StdRew: 14.46 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_11.pkl | Episode 1 | Reward: 94.83 | Length: 42


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 16.26it/s]

[EVAL] ckpt_11.pkl | Episode 2 | Reward: 67.02 | Length: 43
[EVAL] ckpt_11.pkl | Episode 3 | Reward: 68.00 | Length: 77


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.23it/s]


[EVAL] ckpt_11.pkl | Episode 4 | Reward: 84.32 | Length: 76
[EVAL] ckpt_11.pkl | Episode 5 | Reward: 100.30 | Length: 46
[EVAL][Step 24396] AvgLen: 56.80 | AvgRew: 82.89 | StdRew: 13.57 | Time/Step: 0.0000


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00,  9.80it/s]

[EVAL] ckpt_12.pkl | Episode 1 | Reward: 99.68 | Length: 94
[EVAL] ckpt_12.pkl | Episode 2 | Reward: 77.38 | Length: 63
[EVAL] ckpt_12.pkl | Episode 3 | Reward: 123.88 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.02it/s]


[EVAL] ckpt_12.pkl | Episode 4 | Reward: 91.53 | Length: 42
[EVAL] ckpt_12.pkl | Episode 5 | Reward: 106.77 | Length: 53
[EVAL][Step 26432] AvgLen: 63.20 | AvgRew: 99.85 | StdRew: 15.49 | Time/Step: 0.0000


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 12.60it/s]

[EVAL] ckpt_13.pkl | Episode 1 | Reward: 82.74 | Length: 80
[EVAL] ckpt_13.pkl | Episode 2 | Reward: 72.00 | Length: 55
[EVAL] ckpt_13.pkl | Episode 3 | Reward: 81.65 | Length: 37


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.58it/s]


[EVAL] ckpt_13.pkl | Episode 4 | Reward: 106.08 | Length: 74
[EVAL] ckpt_13.pkl | Episode 5 | Reward: 100.00 | Length: 52
[EVAL][Step 28459] AvgLen: 59.60 | AvgRew: 88.49 | StdRew: 12.60 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_14.pkl | Episode 1 | Reward: 87.20 | Length: 51


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.78it/s]

[EVAL] ckpt_14.pkl | Episode 2 | Reward: 95.79 | Length: 57
[EVAL] ckpt_14.pkl | Episode 3 | Reward: 84.39 | Length: 41


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 14.04it/s]

[EVAL] ckpt_14.pkl | Episode 4 | Reward: 85.18 | Length: 51


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.08it/s]


[EVAL] ckpt_14.pkl | Episode 5 | Reward: 63.07 | Length: 65
[EVAL][Step 30477] AvgLen: 53.00 | AvgRew: 83.13 | StdRew: 10.82 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_15.pkl | Episode 1 | Reward: 66.97 | Length: 54


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.69it/s]

[EVAL] ckpt_15.pkl | Episode 2 | Reward: 52.16 | Length: 76
[EVAL] ckpt_15.pkl | Episode 3 | Reward: 89.71 | Length: 63


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 10.26it/s]

[EVAL] ckpt_15.pkl | Episode 4 | Reward: 76.56 | Length: 76


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.96it/s]


[EVAL] ckpt_15.pkl | Episode 5 | Reward: 91.66 | Length: 47
[EVAL][Step 32550] AvgLen: 63.20 | AvgRew: 75.41 | StdRew: 14.71 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_16.pkl | Episode 1 | Reward: 55.86 | Length: 62


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.70it/s]

[EVAL] ckpt_16.pkl | Episode 2 | Reward: 88.75 | Length: 56


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.04it/s]

[EVAL] ckpt_16.pkl | Episode 3 | Reward: 84.00 | Length: 84
[EVAL] ckpt_16.pkl | Episode 4 | Reward: 82.91 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.66it/s]


[EVAL] ckpt_16.pkl | Episode 5 | Reward: 72.15 | Length: 48
[EVAL][Step 34583] AvgLen: 59.40 | AvgRew: 76.74 | StdRew: 11.76 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.62it/s]

[EVAL] ckpt_17.pkl | Episode 1 | Reward: 71.24 | Length: 50
[EVAL] ckpt_17.pkl | Episode 2 | Reward: 98.10 | Length: 45
[EVAL] ckpt_17.pkl | Episode 3 | Reward: 72.00 | Length: 73


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.70it/s]


[EVAL] ckpt_17.pkl | Episode 4 | Reward: 63.21 | Length: 77
[EVAL] ckpt_17.pkl | Episode 5 | Reward: 64.00 | Length: 52
[EVAL][Step 36598] AvgLen: 59.40 | AvgRew: 73.71 | StdRew: 12.72 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_18.pkl | Episode 1 | Reward: 94.51 | Length: 66


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.52it/s]

[EVAL] ckpt_18.pkl | Episode 2 | Reward: 71.17 | Length: 35
[EVAL] ckpt_18.pkl | Episode 3 | Reward: 54.55 | Length: 86


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.03it/s]

[EVAL] ckpt_18.pkl | Episode 4 | Reward: 86.67 | Length: 38


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.16it/s]


[EVAL] ckpt_18.pkl | Episode 5 | Reward: 88.00 | Length: 36
[EVAL][Step 38637] AvgLen: 52.20 | AvgRew: 78.98 | StdRew: 14.42 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_19.pkl | Episode 1 | Reward: 77.07 | Length: 67


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00,  9.05it/s]

[EVAL] ckpt_19.pkl | Episode 2 | Reward: 64.00 | Length: 86


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 10.90it/s]

[EVAL] ckpt_19.pkl | Episode 3 | Reward: 68.00 | Length: 62
[EVAL] ckpt_19.pkl | Episode 4 | Reward: 69.01 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00,  9.99it/s]


[EVAL] ckpt_19.pkl | Episode 5 | Reward: 75.91 | Length: 85
[EVAL][Step 40669] AvgLen: 69.40 | AvgRew: 70.80 | StdRew: 4.96 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.81it/s]

[EVAL] ckpt_20.pkl | Episode 1 | Reward: 84.00 | Length: 55
[EVAL] ckpt_20.pkl | Episode 2 | Reward: 77.49 | Length: 62
[EVAL] ckpt_20.pkl | Episode 3 | Reward: 72.00 | Length: 77


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.54it/s]


[EVAL] ckpt_20.pkl | Episode 4 | Reward: 60.00 | Length: 58
[EVAL] ckpt_20.pkl | Episode 5 | Reward: 110.30 | Length: 48
[EVAL][Step 42714] AvgLen: 60.00 | AvgRew: 80.76 | StdRew: 16.74 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  8.69it/s]

[EVAL] ckpt_21.pkl | Episode 1 | Reward: 60.00 | Length: 80


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00,  9.38it/s]

[EVAL] ckpt_21.pkl | Episode 2 | Reward: 72.00 | Length: 70
[EVAL] ckpt_21.pkl | Episode 3 | Reward: 77.43 | Length: 55


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.08it/s]

[EVAL] ckpt_21.pkl | Episode 4 | Reward: 96.00 | Length: 57


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.22it/s]


[EVAL] ckpt_21.pkl | Episode 5 | Reward: 88.00 | Length: 78
[EVAL][Step 44772] AvgLen: 68.00 | AvgRew: 78.69 | StdRew: 12.51 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.28it/s]

[EVAL] ckpt_22.pkl | Episode 1 | Reward: 56.00 | Length: 75


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.21it/s]

[EVAL] ckpt_22.pkl | Episode 2 | Reward: 87.60 | Length: 67
[EVAL] ckpt_22.pkl | Episode 3 | Reward: 67.73 | Length: 48
[EVAL] ckpt_22.pkl | Episode 4 | Reward: 84.33 | Length: 54


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.30it/s]


[EVAL] ckpt_22.pkl | Episode 5 | Reward: 64.00 | Length: 63
[EVAL][Step 46786] AvgLen: 61.40 | AvgRew: 71.93 | StdRew: 12.11 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_23.pkl | Episode 1 | Reward: 84.00 | Length: 48


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.53it/s]

[EVAL] ckpt_23.pkl | Episode 2 | Reward: 88.27 | Length: 54
[EVAL] ckpt_23.pkl | Episode 3 | Reward: 68.00 | Length: 43


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.48it/s]

[EVAL] ckpt_23.pkl | Episode 4 | Reward: 88.59 | Length: 60


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.24it/s]


[EVAL] ckpt_23.pkl | Episode 5 | Reward: 60.00 | Length: 56
[EVAL][Step 48824] AvgLen: 52.20 | AvgRew: 77.77 | StdRew: 11.64 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_24.pkl | Episode 1 | Reward: 101.03 | Length: 57


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.07it/s]

[EVAL] ckpt_24.pkl | Episode 2 | Reward: 57.19 | Length: 81
[EVAL] ckpt_24.pkl | Episode 3 | Reward: 64.00 | Length: 88


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00,  8.76it/s]

[EVAL] ckpt_24.pkl | Episode 4 | Reward: 52.00 | Length: 86


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00,  9.34it/s]


[EVAL] ckpt_24.pkl | Episode 5 | Reward: 64.00 | Length: 61
[EVAL][Step 50831] AvgLen: 74.60 | AvgRew: 67.64 | StdRew: 17.29 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.99it/s]

[EVAL] ckpt_25.pkl | Episode 1 | Reward: 119.01 | Length: 69
[EVAL] ckpt_25.pkl | Episode 2 | Reward: 67.19 | Length: 55


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 12.60it/s]

[EVAL] ckpt_25.pkl | Episode 3 | Reward: 67.17 | Length: 45
[EVAL] ckpt_25.pkl | Episode 4 | Reward: 84.07 | Length: 47


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.14it/s]


[EVAL] ckpt_25.pkl | Episode 5 | Reward: 92.42 | Length: 69
[EVAL][Step 52914] AvgLen: 57.00 | AvgRew: 85.97 | StdRew: 19.20 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_26.pkl | Episode 1 | Reward: 72.13 | Length: 49


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.15it/s]

[EVAL] ckpt_26.pkl | Episode 2 | Reward: 72.64 | Length: 65
[EVAL] ckpt_26.pkl | Episode 3 | Reward: 75.84 | Length: 56


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.71it/s]

[EVAL] ckpt_26.pkl | Episode 4 | Reward: 84.00 | Length: 65


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.95it/s]


[EVAL] ckpt_26.pkl | Episode 5 | Reward: 65.27 | Length: 81
[EVAL][Step 54950] AvgLen: 63.20 | AvgRew: 73.97 | StdRew: 6.08 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  9.73it/s]

[EVAL] ckpt_27.pkl | Episode 1 | Reward: 81.28 | Length: 71
[EVAL] ckpt_27.pkl | Episode 2 | Reward: 87.19 | Length: 60


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.79it/s]

[EVAL] ckpt_27.pkl | Episode 3 | Reward: 66.45 | Length: 49


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.78it/s]


[EVAL] ckpt_27.pkl | Episode 4 | Reward: 80.00 | Length: 47
[EVAL] ckpt_27.pkl | Episode 5 | Reward: 72.63 | Length: 67
[EVAL][Step 56991] AvgLen: 58.80 | AvgRew: 77.51 | StdRew: 7.21 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_28.pkl | Episode 1 | Reward: 74.17 | Length: 43


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.34it/s]

[EVAL] ckpt_28.pkl | Episode 2 | Reward: 104.00 | Length: 51
[EVAL] ckpt_28.pkl | Episode 3 | Reward: 64.00 | Length: 56


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.61it/s]

[EVAL] ckpt_28.pkl | Episode 4 | Reward: 85.04 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.88it/s]


[EVAL] ckpt_28.pkl | Episode 5 | Reward: 84.00 | Length: 76
[EVAL][Step 59015] AvgLen: 58.00 | AvgRew: 82.24 | StdRew: 13.28 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_29.pkl | Episode 1 | Reward: 78.05 | Length: 52


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.39it/s]

[EVAL] ckpt_29.pkl | Episode 2 | Reward: 71.87 | Length: 59
[EVAL] ckpt_29.pkl | Episode 3 | Reward: 48.66 | Length: 70


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.27it/s]

[EVAL] ckpt_29.pkl | Episode 4 | Reward: 72.00 | Length: 61


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.52it/s]


[EVAL] ckpt_29.pkl | Episode 5 | Reward: 123.63 | Length: 58
[EVAL][Step 61021] AvgLen: 60.00 | AvgRew: 78.84 | StdRew: 24.55 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_30.pkl | Episode 1 | Reward: 72.00 | Length: 60


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.32it/s]

[EVAL] ckpt_30.pkl | Episode 2 | Reward: 96.00 | Length: 74
[EVAL] ckpt_30.pkl | Episode 3 | Reward: 72.00 | Length: 48


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.69it/s]

[EVAL] ckpt_30.pkl | Episode 4 | Reward: 88.00 | Length: 59


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.17it/s]


[EVAL] ckpt_30.pkl | Episode 5 | Reward: 76.85 | Length: 68
[EVAL][Step 63032] AvgLen: 61.80 | AvgRew: 80.97 | StdRew: 9.52 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_31.pkl | Episode 1 | Reward: 76.00 | Length: 57


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.72it/s]

[EVAL] ckpt_31.pkl | Episode 2 | Reward: 63.03 | Length: 61
[EVAL] ckpt_31.pkl | Episode 3 | Reward: 60.05 | Length: 53


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00,  9.93it/s]

[EVAL] ckpt_31.pkl | Episode 4 | Reward: 97.76 | Length: 102


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.63it/s]


[EVAL] ckpt_31.pkl | Episode 5 | Reward: 88.00 | Length: 53
[EVAL][Step 65059] AvgLen: 65.20 | AvgRew: 76.97 | StdRew: 14.39 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_32.pkl | Episode 1 | Reward: 64.11 | Length: 54


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.14it/s]

[EVAL] ckpt_32.pkl | Episode 2 | Reward: 80.00 | Length: 51


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 15.44it/s]

[EVAL] ckpt_32.pkl | Episode 3 | Reward: 66.32 | Length: 38
[EVAL] ckpt_32.pkl | Episode 4 | Reward: 100.00 | Length: 40


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.16it/s]

[EVAL] ckpt_32.pkl | Episode 5 | Reward: 100.00 | Length: 60





[EVAL][Step 67073] AvgLen: 48.60 | AvgRew: 82.09 | StdRew: 15.61 | Time/Step: 0.0000


Evaluation Episode:  20%|██        | 1/5 [00:00<00:00,  7.57it/s]

[EVAL] ckpt_33.pkl | Episode 1 | Reward: 64.00 | Length: 92


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 10.90it/s]

[EVAL] ckpt_33.pkl | Episode 2 | Reward: 90.21 | Length: 49
[EVAL] ckpt_33.pkl | Episode 3 | Reward: 56.00 | Length: 59
[EVAL] ckpt_33.pkl | Episode 4 | Reward: 73.40 | Length: 38


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.08it/s]


[EVAL] ckpt_33.pkl | Episode 5 | Reward: 88.63 | Length: 49
[EVAL][Step 69117] AvgLen: 57.40 | AvgRew: 74.45 | StdRew: 13.42 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_34.pkl | Episode 1 | Reward: 83.81 | Length: 64


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00,  9.16it/s]

[EVAL] ckpt_34.pkl | Episode 2 | Reward: 82.20 | Length: 88


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 10.84it/s]

[EVAL] ckpt_34.pkl | Episode 3 | Reward: 80.00 | Length: 53
[EVAL] ckpt_34.pkl | Episode 4 | Reward: 68.00 | Length: 59


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.52it/s]


[EVAL] ckpt_34.pkl | Episode 5 | Reward: 60.00 | Length: 67
[EVAL][Step 71123] AvgLen: 66.20 | AvgRew: 74.80 | StdRew: 9.26 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.92it/s]

[EVAL] ckpt_35.pkl | Episode 1 | Reward: 72.00 | Length: 54
[EVAL] ckpt_35.pkl | Episode 2 | Reward: 64.00 | Length: 53
[EVAL] ckpt_35.pkl | Episode 3 | Reward: 64.00 | Length: 71


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.51it/s]


[EVAL] ckpt_35.pkl | Episode 4 | Reward: 100.77 | Length: 59
[EVAL] ckpt_35.pkl | Episode 5 | Reward: 68.97 | Length: 64
[EVAL][Step 73139] AvgLen: 60.20 | AvgRew: 73.95 | StdRew: 13.75 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_36.pkl | Episode 1 | Reward: 74.60 | Length: 54


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.79it/s]

[EVAL] ckpt_36.pkl | Episode 2 | Reward: 76.00 | Length: 54
[EVAL] ckpt_36.pkl | Episode 3 | Reward: 67.69 | Length: 62


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 11.60it/s]

[EVAL] ckpt_36.pkl | Episode 4 | Reward: 89.96 | Length: 65


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.16it/s]


[EVAL] ckpt_36.pkl | Episode 5 | Reward: 73.40 | Length: 49
[EVAL][Step 75142] AvgLen: 56.80 | AvgRew: 76.33 | StdRew: 7.38 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 17.12it/s]

[EVAL] ckpt_37.pkl | Episode 1 | Reward: 84.00 | Length: 43
[EVAL] ckpt_37.pkl | Episode 2 | Reward: 89.08 | Length: 37
[EVAL] ckpt_37.pkl | Episode 3 | Reward: 68.00 | Length: 59


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 14.23it/s]


[EVAL] ckpt_37.pkl | Episode 4 | Reward: 64.00 | Length: 47
[EVAL] ckpt_37.pkl | Episode 5 | Reward: 96.00 | Length: 56
[EVAL][Step 77200] AvgLen: 48.40 | AvgRew: 80.22 | StdRew: 12.28 | Time/Step: 0.0000


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 11.38it/s]

[EVAL] ckpt_38.pkl | Episode 1 | Reward: 61.34 | Length: 74
[EVAL] ckpt_38.pkl | Episode 2 | Reward: 88.00 | Length: 49
[EVAL] ckpt_38.pkl | Episode 3 | Reward: 56.79 | Length: 64


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.08it/s]


[EVAL] ckpt_38.pkl | Episode 4 | Reward: 76.00 | Length: 44
[EVAL] ckpt_38.pkl | Episode 5 | Reward: 56.00 | Length: 82
[EVAL][Step 79244] AvgLen: 62.60 | AvgRew: 67.63 | StdRew: 12.47 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.10it/s]

[EVAL] ckpt_39.pkl | Episode 1 | Reward: 97.66 | Length: 50
[EVAL] ckpt_39.pkl | Episode 2 | Reward: 72.00 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.63it/s]


[EVAL] ckpt_39.pkl | Episode 3 | Reward: 56.00 | Length: 94
[EVAL] ckpt_39.pkl | Episode 4 | Reward: 64.00 | Length: 50
[EVAL] ckpt_39.pkl | Episode 5 | Reward: 82.67 | Length: 48
[EVAL][Step 81275] AvgLen: 59.40 | AvgRew: 74.47 | StdRew: 14.57 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00,  9.75it/s]

[EVAL] ckpt_40.pkl | Episode 1 | Reward: 100.16 | Length: 65
[EVAL] ckpt_40.pkl | Episode 2 | Reward: 108.00 | Length: 77


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.10it/s]


[EVAL] ckpt_40.pkl | Episode 3 | Reward: 54.53 | Length: 95
[EVAL] ckpt_40.pkl | Episode 4 | Reward: 57.60 | Length: 52
[EVAL] ckpt_40.pkl | Episode 5 | Reward: 92.00 | Length: 54
[EVAL][Step 83318] AvgLen: 68.60 | AvgRew: 82.46 | StdRew: 22.15 | Time/Step: 0.0000


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 12.65it/s]

[EVAL] ckpt_41.pkl | Episode 1 | Reward: 87.31 | Length: 50
[EVAL] ckpt_41.pkl | Episode 2 | Reward: 89.18 | Length: 59
[EVAL] ckpt_41.pkl | Episode 3 | Reward: 95.90 | Length: 55


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.29it/s]


[EVAL] ckpt_41.pkl | Episode 4 | Reward: 60.33 | Length: 95
[EVAL] ckpt_41.pkl | Episode 5 | Reward: 92.00 | Length: 48
[EVAL][Step 85326] AvgLen: 61.40 | AvgRew: 84.94 | StdRew: 12.64 | Time/Step: 0.0000


Evaluation Episode:  60%|██████    | 3/5 [00:00<00:00, 10.60it/s]

[EVAL] ckpt_42.pkl | Episode 1 | Reward: 84.00 | Length: 104
[EVAL] ckpt_42.pkl | Episode 2 | Reward: 89.79 | Length: 47
[EVAL] ckpt_42.pkl | Episode 3 | Reward: 91.84 | Length: 58


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 10.37it/s]


[EVAL] ckpt_42.pkl | Episode 4 | Reward: 64.20 | Length: 63
[EVAL] ckpt_42.pkl | Episode 5 | Reward: 111.64 | Length: 64
[EVAL][Step 87355] AvgLen: 67.20 | AvgRew: 88.29 | StdRew: 15.22 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_43.pkl | Episode 1 | Reward: 61.10 | Length: 35


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.12it/s]

[EVAL] ckpt_43.pkl | Episode 2 | Reward: 68.00 | Length: 63
[EVAL] ckpt_43.pkl | Episode 3 | Reward: 60.85 | Length: 76


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.09it/s]


[EVAL] ckpt_43.pkl | Episode 4 | Reward: 114.21 | Length: 82
[EVAL] ckpt_43.pkl | Episode 5 | Reward: 70.06 | Length: 57
[EVAL][Step 89355] AvgLen: 62.60 | AvgRew: 74.84 | StdRew: 20.02 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_44.pkl | Episode 1 | Reward: 64.00 | Length: 38


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 16.77it/s]

[EVAL] ckpt_44.pkl | Episode 2 | Reward: 81.17 | Length: 44
[EVAL] ckpt_44.pkl | Episode 3 | Reward: 112.00 | Length: 65


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.56it/s]

[EVAL] ckpt_44.pkl | Episode 4 | Reward: 76.00 | Length: 51


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.59it/s]


[EVAL] ckpt_44.pkl | Episode 5 | Reward: 92.28 | Length: 56
[EVAL][Step 91394] AvgLen: 50.80 | AvgRew: 85.09 | StdRew: 16.24 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_45.pkl | Episode 1 | Reward: 89.57 | Length: 59


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 11.88it/s]

[EVAL] ckpt_45.pkl | Episode 2 | Reward: 68.00 | Length: 57
[EVAL] ckpt_45.pkl | Episode 3 | Reward: 65.76 | Length: 57


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.60it/s]

[EVAL] ckpt_45.pkl | Episode 4 | Reward: 85.05 | Length: 48


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 11.72it/s]


[EVAL] ckpt_45.pkl | Episode 5 | Reward: 93.76 | Length: 73
[EVAL][Step 93443] AvgLen: 58.80 | AvgRew: 80.43 | StdRew: 11.42 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_46.pkl | Episode 1 | Reward: 91.54 | Length: 61


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 10.88it/s]

[EVAL] ckpt_46.pkl | Episode 2 | Reward: 82.57 | Length: 66
[EVAL] ckpt_46.pkl | Episode 3 | Reward: 72.00 | Length: 61


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.30it/s]

[EVAL] ckpt_46.pkl | Episode 4 | Reward: 88.00 | Length: 41


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.36it/s]


[EVAL] ckpt_46.pkl | Episode 5 | Reward: 105.67 | Length: 50
[EVAL][Step 95487] AvgLen: 55.80 | AvgRew: 87.96 | StdRew: 11.05 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_47.pkl | Episode 1 | Reward: 64.00 | Length: 56


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.06it/s]

[EVAL] ckpt_47.pkl | Episode 2 | Reward: 64.00 | Length: 42


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 14.96it/s]

[EVAL] ckpt_47.pkl | Episode 3 | Reward: 68.00 | Length: 49
[EVAL] ckpt_47.pkl | Episode 4 | Reward: 83.70 | Length: 39


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 15.10it/s]


[EVAL] ckpt_47.pkl | Episode 5 | Reward: 91.87 | Length: 42
[EVAL][Step 97490] AvgLen: 45.60 | AvgRew: 74.31 | StdRew: 11.39 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_48.pkl | Episode 1 | Reward: 109.53 | Length: 49


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 13.52it/s]

[EVAL] ckpt_48.pkl | Episode 2 | Reward: 71.15 | Length: 53
[EVAL] ckpt_48.pkl | Episode 3 | Reward: 65.36 | Length: 60


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 12.16it/s]

[EVAL] ckpt_48.pkl | Episode 4 | Reward: 82.82 | Length: 62


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 12.98it/s]


[EVAL] ckpt_48.pkl | Episode 5 | Reward: 80.00 | Length: 42
[EVAL][Step 99548] AvgLen: 53.20 | AvgRew: 81.77 | StdRew: 15.21 | Time/Step: 0.0000


Evaluation Episode:   0%|          | 0/5 [00:00<?, ?it/s]

[EVAL] ckpt_49.pkl | Episode 1 | Reward: 94.43 | Length: 52


Evaluation Episode:  40%|████      | 2/5 [00:00<00:00, 14.58it/s]

[EVAL] ckpt_49.pkl | Episode 2 | Reward: 104.00 | Length: 42
[EVAL] ckpt_49.pkl | Episode 3 | Reward: 76.00 | Length: 39


Evaluation Episode:  80%|████████  | 4/5 [00:00<00:00, 13.46it/s]

[EVAL] ckpt_49.pkl | Episode 4 | Reward: 88.00 | Length: 69


Evaluation Episode: 100%|██████████| 5/5 [00:00<00:00, 13.60it/s]

[EVAL] ckpt_49.pkl | Episode 5 | Reward: 70.14 | Length: 51
[EVAL][Step 101568] AvgLen: 50.60 | AvgRew: 86.51 | StdRew: 12.24 | Time/Step: 0.0000
Plot path:  ./logs/double_dqn/train/eval_plot_data.json



