In [2]:
import os
from datetime import datetime

from envs.arpod_HCW import HCWSE2Env
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_util import is_wrapped

run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = os.path.join("runs", f"sac_arpod_{run_id}")
os.makedirs(out_dir, exist_ok=True)
best_dir = os.path.join(out_dir, "best_models")
logs_dir = os.path.join(out_dir, "logs")
os.makedirs(best_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)


n_envs = 12
seed = 42
spawn_radius=(2.5, 5.0)
env = DummyVecEnv([lambda: Monitor(HCWSE2Env(seed=seed,spawn_radius=spawn_radius), logs_dir)] * n_envs)
# env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)0)

policy_kwargs = dict(
    net_arch=dict(pi=[256, 256,64], qf=[256, 256,64])
)
model = SAC(
    policy="MlpPolicy",
    env=env,
    learning_rate=3e-4,
    buffer_size=int(1e6),
    learning_starts=20_000,
    batch_size=256,
    tau=0.005,
    gamma=0.999,
    train_freq=(64,"step"),
    gradient_steps=128,
    ent_coef="auto_0.1",
    target_entropy="auto",
    policy_kwargs=policy_kwargs,
    verbose=1,
    tensorboard_log=logs_dir,
    seed=seed
)
model.save(os.path.join(best_dir, "model_0M"))

# model_path = os.path.join(best_dir, "model_0M.zip")

Using cuda device


In [3]:

eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)

model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback])
model.save(os.path.join(out_dir, "model_1M"))



Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.02e+03 |
|    ep_rew_mean     | -722     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 10       |
|    fps             | 4350     |
|    time_elapsed    | 5        |
|    total_timesteps | 24516    |
| train/             |          |
|    actor_loss      | 3.81     |
|    critic_loss     | 0.364    |
|    ent_coef        | 0.0841   |
|    ent_coef_loss   | -8.2     |
|    learning_rate   | 0.0003   |
|    n_updates       | 640      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.52e+03 |
|    ep_rew_mean     | -827     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 20       |
|    fps             | 1549     |
|    time_elapsed    | 29       |
|    total_timesteps | 45828 

In [4]:

# > 1M steps, increase spawn radius
spawn_radius=(2.5, 10.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
    print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)
model.save(os.path.join(out_dir, "model_2M"))




Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Increasing spawn radius to  (2.5, 10.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 113      |
|    ep_rew_mean     | 66.1     |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 4070     |
|    fps             | 13013    |
|    time_elapsed    | 0        |
|    total_timesteps | 1001220  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 11

In [5]:

# > 2M steps, increase spawn radius
spawn_radius=(2.5, 20.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)
model.save(os.path.join(out_dir, "model_3M"))




Increasing spawn radius to  (2.5, 20.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 177      |
|    ep_rew_mean     | 68.3     |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 9080     |
|    fps             | 13884    |
|    time_elapsed    | 0        |
|    total_timesteps | 2001972  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 171      |
|    ep_rew_mean     | 67.1     |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 9090     |
|    fps             | 1038     |
|    time_elapsed    | 2        |
|    total_timesteps | 2003904  |
| train/             |          |
|    actor_loss      | -43.2    |
|    critic_loss     | 1.3      |
|    ent_coef        | 0.0261   |
|    ent_coef_loss   | -0.0497  |
|    learning_rate   | 

In [6]:

# > 3M steps, increase spawn radius
spawn_radius=(2.5, 40.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_4M"))



Increasing spawn radius to  (2.5, 40.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 301      |
|    ep_rew_mean     | 80.6     |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 12610    |
|    fps             | 1154     |
|    time_elapsed    | 0        |
|    total_timesteps | 3003024  |
| train/             |          |
|    actor_loss      | -33.6    |
|    critic_loss     | 1.86     |
|    ent_coef        | 0.0246   |
|    ent_coef_loss   | 0.0896   |
|    learning_rate   | 0.0003   |
|    n_updates       | 497152   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 294      |
|    ep_rew_mean     | 80.2     |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 12620    |
|    fps             | 1034     |
|    time_elapsed    | 

In [7]:
# > 4M steps, increase spawn radius
spawn_radius=(2.5, 60.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_5M"))



Increasing spawn radius to  (2.5, 60.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 362      |
|    ep_rew_mean     | 74.2     |
|    success_rate    | 0.85     |
| time/              |          |
|    episodes        | 14970    |
|    fps             | 1010     |
|    time_elapsed    | 1        |
|    total_timesteps | 4004484  |
| train/             |          |
|    actor_loss      | 102      |
|    critic_loss     | 13.2     |
|    ent_coef        | 0.0589   |
|    ent_coef_loss   | -0.0649  |
|    learning_rate   | 0.0003   |
|    n_updates       | 664064   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 375      |
|    ep_rew_mean     | 77.9     |
|    success_rate    | 0.87     |
| time/              |          |
|    episodes        | 14980    |
|    fps             | 957      |
|    time_elapsed    | 

In [8]:

# > 5M steps, increase spawn radius
spawn_radius=(2.5, 60.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_6M"))


Increasing spawn radius to  (2.5, 60.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 353      |
|    ep_rew_mean     | 84.2     |
|    success_rate    | 0.91     |
| time/              |          |
|    episodes        | 17350    |
|    fps             | 1175     |
|    time_elapsed    | 1        |
|    total_timesteps | 5005368  |
| train/             |          |
|    actor_loss      | -22.2    |
|    critic_loss     | 4.25     |
|    ent_coef        | 0.0388   |
|    ent_coef_loss   | -0.102   |
|    learning_rate   | 0.0003   |
|    n_updates       | 830848   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 354      |
|    ep_rew_mean     | 84.3     |
|    success_rate    | 0.91     |
| time/              |          |
|    episodes        | 17360    |
|    fps             | 1053     |
|    time_elapsed    | 

In [9]:

# > 6M steps, increase spawn radius
spawn_radius=(2.5, 80.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_7M"))


Increasing spawn radius to  (2.5, 80.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 340      |
|    ep_rew_mean     | 92.6     |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 20220    |
|    fps             | 963      |
|    time_elapsed    | 3        |
|    total_timesteps | 6007452  |
| train/             |          |
|    actor_loss      | -37.9    |
|    critic_loss     | 4.04     |
|    ent_coef        | 0.0231   |
|    ent_coef_loss   | 0.0806   |
|    learning_rate   | 0.0003   |
|    n_updates       | 997888   |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 344      |
|    ep_rew_mean     | 93.7     |
|    success_rate    | 0.99     |
| time/              |          |
|    episodes        | 20230    |
|    fps             | 927      |
|    time_elapsed    | 

In [10]:

# > 7M steps, increase spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_8M"))


Increasing spawn radius to  (2.5, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 386      |
|    ep_rew_mean     | 106      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 22620    |
|    fps             | 1236     |
|    time_elapsed    | 1        |
|    total_timesteps | 7006944  |
| train/             |          |
|    actor_loss      | -40.9    |
|    critic_loss     | 2.1      |
|    ent_coef        | 0.0212   |
|    ent_coef_loss   | 0.369    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1164416  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 379      |
|    ep_rew_mean     | 105      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 22630    |
|    fps             | 948      |
|    time_elapsed    |

In [11]:
# > 8M steps, stabilize spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_9M"))


Increasing spawn radius to  (2.5, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 523      |
|    ep_rew_mean     | 83.7     |
|    success_rate    | 0.94     |
| time/              |          |
|    episodes        | 24760    |
|    fps             | 1089     |
|    time_elapsed    | 3        |
|    total_timesteps | 8009352  |
| train/             |          |
|    actor_loss      | -29.2    |
|    critic_loss     | 5.31     |
|    ent_coef        | 0.0157   |
|    ent_coef_loss   | -0.371   |
|    learning_rate   | 0.0003   |
|    n_updates       | 1331456  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 528      |
|    ep_rew_mean     | 85.6     |
|    success_rate    | 0.94     |
| time/              |          |
|    episodes        | 24770    |
|    fps             | 926      |
|    time_elapsed    |

In [None]:
# > 9M steps, stabilize spawn radius
spawn_radius=(2.5, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_10M"))

Increasing spawn radius to  (2.5, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 492      |
|    ep_rew_mean     | 66.3     |
|    success_rate    | 0.47     |
| time/              |          |
|    episodes        | 26690    |
|    fps             | 941      |
|    time_elapsed    | 0        |
|    total_timesteps | 9007140  |
| train/             |          |
|    actor_loss      | 90.2     |
|    critic_loss     | 62.3     |
|    ent_coef        | 0.0877   |
|    ent_coef_loss   | -0.0389  |
|    learning_rate   | 0.0003   |
|    n_updates       | 1497856  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 501      |
|    ep_rew_mean     | 68.3     |
|    success_rate    | 0.48     |
| time/              |          |
|    episodes        | 26700    |
|    fps             | 976      |
|    time_elapsed    |

In [13]:
# > 10M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_11M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 611      |
|    ep_rew_mean     | 64       |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 28730    |
|    fps             | 894      |
|    time_elapsed    | 3        |
|    total_timesteps | 10010304 |
| train/             |          |
|    actor_loss      | 82.3     |
|    critic_loss     | 25.9     |
|    ent_coef        | 0.0692   |
|    ent_coef_loss   | 0.0473   |
|    learning_rate   | 0.0003   |
|    n_updates       | 1665024  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 611      |
|    ep_rew_mean     | 64       |
|    success_rate    | 0.37     |
| time/              |          |
|    episodes        | 28740    |
|    fps             | 904      |
|    time_elapsed    | 

In [14]:
# > 11M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_12M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 399      |
|    ep_rew_mean     | 59.2     |
|    success_rate    | 0.42     |
| time/              |          |
|    episodes        | 30690    |
|    fps             | 1142     |
|    time_elapsed    | 0        |
|    total_timesteps | 11008788 |
| train/             |          |
|    actor_loss      | 40.4     |
|    critic_loss     | 10.9     |
|    ent_coef        | 0.0403   |
|    ent_coef_loss   | -0.177   |
|    learning_rate   | 0.0003   |
|    n_updates       | 1831424  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 395      |
|    ep_rew_mean     | 58.4     |
|    success_rate    | 0.41     |
| time/              |          |
|    episodes        | 30700    |
|    fps             | 904      |
|    time_elapsed    | 

In [15]:
# > 12M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_13M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 486      |
|    ep_rew_mean     | 37.5     |
|    success_rate    | 0.97     |
| time/              |          |
|    episodes        | 33180    |
|    fps             | 1067     |
|    time_elapsed    | 1        |
|    total_timesteps | 12010320 |
| train/             |          |
|    actor_loss      | 3.83     |
|    critic_loss     | 9.25     |
|    ent_coef        | 0.0374   |
|    ent_coef_loss   | 0.214    |
|    learning_rate   | 0.0003   |
|    n_updates       | 1998336  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 470      |
|    ep_rew_mean     | 34.9     |
|    success_rate    | 0.96     |
| time/              |          |
|    episodes        | 33190    |
|    fps             | 917      |
|    time_elapsed    | 

In [16]:
# > 13M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_14M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 351      |
|    ep_rew_mean     | 46.8     |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 35700    |
|    fps             | 11408    |
|    time_elapsed    | 0        |
|    total_timesteps | 13009620 |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 345      |
|    ep_rew_mean     | 45.8     |
|    success_rate    | 0.4      |
| time/              |          |
|    episodes        | 35710    |
|    fps             | 1115     |
|    time_elapsed    | 2        |
|    total_timesteps | 13012152 |
| train/             |          |
|    actor_loss      | 430      |
|    critic_loss     | 261      |
|    ent_coef        | 0.181    |
|    ent_coef_loss   | -0.0168  |
|    learning_rate   | 

In [17]:
# > 14M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_15M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 336      |
|    ep_rew_mean     | 61.3     |
|    success_rate    | 0.5      |
| time/              |          |
|    episodes        | 38770    |
|    fps             | 1532     |
|    time_elapsed    | 0        |
|    total_timesteps | 14011152 |
| train/             |          |
|    actor_loss      | 40.6     |
|    critic_loss     | 58.7     |
|    ent_coef        | 0.0788   |
|    ent_coef_loss   | 0.0803   |
|    learning_rate   | 0.0003   |
|    n_updates       | 2331776  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 331      |
|    ep_rew_mean     | 61.4     |
|    success_rate    | 0.51     |
| time/              |          |
|    episodes        | 38780    |
|    fps             | 1052     |
|    time_elapsed    | 

In [18]:
# > 15M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_16M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 328      |
|    ep_rew_mean     | 108      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 41570    |
|    fps             | 1044     |
|    time_elapsed    | 3        |
|    total_timesteps | 15014304 |
| train/             |          |
|    actor_loss      | 4.69     |
|    critic_loss     | 13.9     |
|    ent_coef        | 0.0683   |
|    ent_coef_loss   | -0.0533  |
|    learning_rate   | 0.0003   |
|    n_updates       | 2498944  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 324      |
|    ep_rew_mean     | 107      |
|    success_rate    | 1        |
| time/              |          |
|    episodes        | 41580    |
|    fps             | 909      |
|    time_elapsed    | 

In [19]:
# > 16M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_17M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 458      |
|    ep_rew_mean     | 2.09     |
|    success_rate    | 0.58     |
| time/              |          |
|    episodes        | 44210    |
|    fps             | 1274     |
|    time_elapsed    | 1        |
|    total_timesteps | 16013328 |
| train/             |          |
|    actor_loss      | 6.87     |
|    critic_loss     | 60.3     |
|    ent_coef        | 0.0561   |
|    ent_coef_loss   | 0.00652  |
|    learning_rate   | 0.0003   |
|    n_updates       | 2665472  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 467      |
|    ep_rew_mean     | 4.6      |
|    success_rate    | 0.61     |
| time/              |          |
|    episodes        | 44220    |
|    fps             | 1051     |
|    time_elapsed    | 

In [20]:
# > 17M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_18M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 357      |
|    ep_rew_mean     | 85.2     |
|    success_rate    | 0.81     |
| time/              |          |
|    episodes        | 46820    |
|    fps             | 12717    |
|    time_elapsed    | 0        |
|    total_timesteps | 17012256 |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 358      |
|    ep_rew_mean     | 87.8     |
|    success_rate    | 0.83     |
| time/              |          |
|    episodes        | 46830    |
|    fps             | 1114     |
|    time_elapsed    | 3        |
|    total_timesteps | 17015592 |
| train/             |          |
|    actor_loss      | 87.9     |
|    critic_loss     | 155      |
|    ent_coef        | 0.0736   |
|    ent_coef_loss   | -0.0272  |
|    learning_rate   | 

In [21]:
# > 18M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_19M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 475      |
|    ep_rew_mean     | 22       |
|    success_rate    | 0.09     |
| time/              |          |
|    episodes        | 49320    |
|    fps             | 1040     |
|    time_elapsed    | 2        |
|    total_timesteps | 18015216 |
| train/             |          |
|    actor_loss      | 1.12e+03 |
|    critic_loss     | 3.24e+03 |
|    ent_coef        | 0.482    |
|    ent_coef_loss   | 0.0149   |
|    learning_rate   | 0.0003   |
|    n_updates       | 2999168  |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 492      |
|    ep_rew_mean     | 24.6     |
|    success_rate    | 0.11     |
| time/              |          |
|    episodes        | 49330    |
|    fps             | 1029     |
|    time_elapsed    | 

In [22]:
# > 19M steps, stabilize spawn radius
spawn_radius=(10, 100.0)
eval_env = Monitor(HCWSE2Env(seed=seed,render_mode="rgb_array",render_folder=logs_dir,spawn_radius=spawn_radius), logs_dir)
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=best_dir,
    log_path=logs_dir,
    eval_freq=20_000,
    n_eval_episodes=3,
    deterministic=True,
    render=True,
    verbose=1,
)
for e in env.envs:
    e.unwrapped.spawn_radius=spawn_radius
print("Increasing spawn radius to ", env.envs[0].unwrapped.spawn_radius)    
model.learn(total_timesteps=int(1e6), log_interval=10, callback=[eval_callback],reset_num_timesteps=False)   
model.save(os.path.join(out_dir, "model_20M"))

Increasing spawn radius to  (10, 100.0)
Logging to runs/sac_arpod_20250910_021828/logs/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 304      |
|    ep_rew_mean     | 6.59     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 52370    |
|    fps             | 12492    |
|    time_elapsed    | 0        |
|    total_timesteps | 19013640 |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 300      |
|    ep_rew_mean     | 5.46     |
|    success_rate    | 0        |
| time/              |          |
|    episodes        | 52380    |
|    fps             | 1322     |
|    time_elapsed    | 1        |
|    total_timesteps | 19015500 |
| train/             |          |
|    actor_loss      | 233      |
|    critic_loss     | 887      |
|    ent_coef        | 0.248    |
|    ent_coef_loss   | -0.299   |
|    learning_rate   | 