In [None]:
import os
from datetime import datetime

from envs.arpod_HCW import HCWSE2Env
from stable_baselines3 import TD3
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import is_wrapped

run_specs="20kg_256x1024x64"
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = os.path.join("runs", f"td3_arpod_{run_id}_{run_specs}")
os.makedirs(out_dir, exist_ok=True)
best_dir = os.path.join(out_dir, "best_models")
logs_dir = os.path.join(out_dir, "logs")
os.makedirs(best_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)


n_envs = 12
seed = 42
spawn_radius=(2.5, 5.0)
env = DummyVecEnv([lambda: Monitor(HCWSE2Env(seed=seed,spawn_radius=spawn_radius), logs_dir)] * n_envs)
# env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)0)

policy_kwargs = dict(
    net_arch=dict(pi=[256, 1024,64], qf=[256, 1024,64])
)
model = TD3(
    policy="MlpPolicy",
    env=env,
    learning_rate=3e-4,
    buffer_size=int(1e6),
    learning_starts=20_000,
    batch_size=256,
    tau=0.005,
    gamma=0.999,
    train_freq=(64,"step"),
    gradient_steps=128,
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    seed=seed
)
model.save(os.path.join(best_dir, "model_0M"))

# model_path = os.path.join(best_dir, "model_0M.zip")

In [2]:

eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)

model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback])
model.save(os.path.join(out_dir, "model_1M"))



Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.11e+03 |
|    ep_rew_mean     | -648     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 10       |
|    fps             | 7707     |
|    time_elapsed    | 3        |
|    total_timesteps | 23184    |
| train/             |          |
|    actor_loss      | 3.26     |
|    critic_loss     | 1.92     |
|    learning_rate   | 0.0003   |
|    n_updates       | 512      |
---------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.35e+03  |
|    ep_rew_mean     | -1.22e+03 |
|    success_rate    | 0         |
| time/              |           |
|    episodes        | 20        |
|    fps             | 3405      |
|    time_elapsed    | 10        |
|    total_timesteps | 36000     |
| train/             |           |
|

In [3]:

# > 1M steps, increase spawn radius
spawn_radius=(2.5, 10.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
    print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)
model.save(os.path.join(out_dir, "model_2M"))




Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.37e+03 |
|    ep_rew_mean     | -571     |
|    success_rate    | 0.02     |
| time/              |          |
|    episodes        | 380      |
|    fps             | 12705    |
|    time_elapsed    | 0        |
|    total_timesteps | 1000992  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_

In [4]:

# > 2M steps, increase spawn radius
spawn_radius=(2.5, 20.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)
model.save(os.path.join(out_dir, "model_3M"))




Increasing spawn radius to  (2.5, 20.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 279      |
|    ep_rew_mean     | 56.2     |
|    success_rate    | 0.34     |
| time/              |          |
|    episodes        | 2310     |
|    fps             | 1932     |
|    time_elapsed    | 2        |
|    total_timesteps | 2005860  |
| train/             |          |
|    actor_loss      | 85.6     |
|    critic_loss     | 46.8     |
|    learning_rate   | 0.0003   |
|    n_updates       | 330880   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 278      |
|    ep_rew_mean     | 65.6     |
|    success_rate    | 0.38     |
| time/              |          |
|    episodes        | 2320     |
|    fps             | 1748     |
|    time_elapsed    | 4        |
|    total_timesteps | 2009280  |
| trai

In [5]:

# > 3M steps, increase spawn radius
spawn_radius=(2.5, 40.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_4M"))



Increasing spawn radius to  (2.5, 40.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 651      |
|    ep_rew_mean     | 129      |
|    success_rate    | 0.73     |
| time/              |          |
|    episodes        | 4700     |
|    fps             | 1707     |
|    time_elapsed    | 0        |
|    total_timesteps | 3002928  |
| train/             |          |
|    actor_loss      | -90.7    |
|    critic_loss     | 18.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 497152   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 722      |
|    ep_rew_mean     | 142      |
|    success_rate    | 0.76     |
| time/              |          |
|    episodes        | 4710     |
|    fps             | 1930     |
|    time_elapsed    | 2        |
|    total_timesteps | 3006708  |
| trai

In [6]:
# > 4M steps, increase spawn radius
spawn_radius=(2.5, 60.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_5M"))



Increasing spawn radius to  (2.5, 60.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 305      |
|    ep_rew_mean     | 208      |
|    success_rate    | 0.98     |
| time/              |          |
|    episodes        | 6890     |
|    fps             | 1871     |
|    time_elapsed    | 2        |
|    total_timesteps | 4007316  |
| train/             |          |
|    actor_loss      | -117     |
|    critic_loss     | 63.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 664448   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 339      |
|    ep_rew_mean     | 210      |
|    success_rate    | 0.98     |
| time/              |          |
|    episodes        | 6900     |
|    fps             | 1640     |
|    time_elapsed    | 5        |
|    total_timesteps | 4012092  |
| trai

In [7]:

# > 5M steps, increase spawn radius
spawn_radius=(2.5, 60.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_6M"))


Increasing spawn radius to  (2.5, 60.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 375      |
|    ep_rew_mean     | 223      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 8320     |
|    fps             | 1805     |
|    time_elapsed    | 0        |
|    total_timesteps | 5005104  |
| train/             |          |
|    actor_loss      | -96.4    |
|    critic_loss     | 8.4      |
|    learning_rate   | 0.0003   |
|    n_updates       | 830848   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 390      |
|    ep_rew_mean     | 219      |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 8330     |
|    fps             | 1928     |
|    time_elapsed    | 2        |
|    total_timesteps | 5008752  |
| trai

In [8]:

# > 6M steps, increase spawn radius
spawn_radius=(2.5, 80.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_7M"))


Increasing spawn radius to  (2.5, 80.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 430      |
|    ep_rew_mean     | 225      |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 10260    |
|    fps             | 1815     |
|    time_elapsed    | 2        |
|    total_timesteps | 6008976  |
| train/             |          |
|    actor_loss      | -104     |
|    critic_loss     | 247      |
|    learning_rate   | 0.0003   |
|    n_updates       | 998144   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 408      |
|    ep_rew_mean     | 225      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 10270    |
|    fps             | 1781     |
|    time_elapsed    | 6        |
|    total_timesteps | 6015792  |
| trai

In [9]:

# > 7M steps, increase spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_8M"))


Increasing spawn radius to  (2.5, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.12e+03 |
|    ep_rew_mean     | 121      |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 10850    |
|    fps             | 1795     |
|    time_elapsed    | 1        |
|    total_timesteps | 7008108  |
| train/             |          |
|    actor_loss      | -71.7    |
|    critic_loss     | 186      |
|    learning_rate   | 0.0003   |
|    n_updates       | 1164672  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.23e+03 |
|    ep_rew_mean     | 131      |
|    success_rate    | 0.14     |
| time/              |          |
|    episodes        | 10860    |
|    fps             | 1763     |
|    time_elapsed    | 20       |
|    total_timesteps | 7040316  |
| tra

In [10]:
# > 8M steps, stabilize spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_9M"))


Increasing spawn radius to  (2.5, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.44e+03 |
|    ep_rew_mean     | 173      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 11240    |
|    fps             | 1777     |
|    time_elapsed    | 3        |
|    total_timesteps | 8011056  |
| train/             |          |
|    actor_loss      | -84.3    |
|    critic_loss     | 88.7     |
|    learning_rate   | 0.0003   |
|    n_updates       | 1331840  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.37e+03 |
|    ep_rew_mean     | 95.5     |
|    success_rate    | 0.01     |
| time/              |          |
|    episodes        | 11250    |
|    fps             | 1804     |
|    time_elapsed    | 14       |
|    total_timesteps | 8031432  |
| tra

In [11]:
# > 9M steps, stabilize spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_10M"))

Increasing spawn radius to  (2.5, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.55e+03 |
|    ep_rew_mean     | 82.8     |
|    success_rate    | 0.06     |
| time/              |          |
|    episodes        | 11650    |
|    fps             | 1781     |
|    time_elapsed    | 8        |
|    total_timesteps | 9021048  |
| train/             |          |
|    actor_loss      | 547      |
|    critic_loss     | 295      |
|    learning_rate   | 0.0003   |
|    n_updates       | 1500160  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.6e+03  |
|    ep_rew_mean     | 99.8     |
|    success_rate    | 0.08     |
| time/              |          |
|    episodes        | 11660    |
|    fps             | 1777     |
|    time_elapsed    | 23       |
|    total_timesteps | 9048336  |
| tra

In [12]:
# > 10M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_11M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.65e+03 |
|    ep_rew_mean     | 105      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12040    |
|    fps             | 1736     |
|    time_elapsed    | 18       |
|    total_timesteps | 10039548 |
| train/             |          |
|    actor_loss      | 1.15e+03 |
|    critic_loss     | 662      |
|    learning_rate   | 0.0003   |
|    n_updates       | 1669888  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.65e+03 |
|    ep_rew_mean     | 102      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12050    |
|    fps             | 1676     |
|    time_elapsed    | 38       |
|    total_timesteps | 10070940 |
| trai

In [13]:
# > 11M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_12M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.38e+03 |
|    ep_rew_mean     | 88.1     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12430    |
|    fps             | 14803    |
|    time_elapsed    | 0        |
|    total_timesteps | 11008236 |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.4e+03  |
|    ep_rew_mean     | 143      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12440    |
|    fps             | 1748     |
|    time_elapsed    | 17       |
|    total_timesteps | 11038428 |
| train/             |          |
|    actor_loss      | 4.58e+03 |
|    critic_loss     | 1.68e+04 |
|    learning_rate   | 0.0003   |
|    n_updates       | 1836288  |
------

In [14]:
# > 12M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_13M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.14e+03 |
|    ep_rew_mean     | 160      |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12880    |
|    fps             | 1688     |
|    time_elapsed    | 5        |
|    total_timesteps | 12017088 |
| train/             |          |
|    actor_loss      | 1.03e+04 |
|    critic_loss     | 4.36e+04 |
|    learning_rate   | 0.0003   |
|    n_updates       | 1999488  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.02e+03 |
|    ep_rew_mean     | 95.2     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 12890    |
|    fps             | 1698     |
|    time_elapsed    | 12       |
|    total_timesteps | 12029664 |
| trai

In [15]:
# > 13M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_14M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.81e+03 |
|    ep_rew_mean     | 57       |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 13460    |
|    fps             | 1617     |
|    time_elapsed    | 5        |
|    total_timesteps | 13017648 |
| train/             |          |
|    actor_loss      | 1.63e+04 |
|    critic_loss     | 1.41e+05 |
|    learning_rate   | 0.0003   |
|    n_updates       | 2166272  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.81e+03 |
|    ep_rew_mean     | -63.7    |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 13470    |
|    fps             | 1669     |
|    time_elapsed    | 17       |
|    total_timesteps | 13038864 |
| trai

In [16]:
# > 14M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_15M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/td3_arpod_20250912_121710_20kg_256x1024x64/logs/TD3_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.06e+03 |
|    ep_rew_mean     | 48.2     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 14210    |
|    fps             | 1937     |
|    time_elapsed    | 2        |
|    total_timesteps | 14015052 |
| train/             |          |
|    actor_loss      | 2.25e+04 |
|    critic_loss     | 1.53e+05 |
|    learning_rate   | 0.0003   |
|    n_updates       | 2332416  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.06e+03 |
|    ep_rew_mean     | 43.6     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 14220    |
|    fps             | 1805     |
|    time_elapsed    | 7        |
|    total_timesteps | 14023476 |
| trai

In [None]:
# > 15M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_16M"))

In [None]:
# > 16M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_17M"))

In [None]:
# > 17M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_18M"))

In [None]:
# > 18M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_19M"))

In [None]:
# > 19M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_20M"))

In [None]:
# > 20M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)

for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(5e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_25M"))