From 9c5cb3b7c6e2a4ce994e995495fd08c303d77b7f Mon Sep 17 00:00:00 2001 From: ShaoshuSu Date: Sat, 19 Jul 2025 23:03:59 -0400 Subject: [PATCH 1/4] Display run name during training in OnPolicyRunner --- rsl_rl/runners/on_policy_runner.py | 124 ++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py index 2d9bac6c..45c5d049 100644 --- a/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/runners/on_policy_runner.py @@ -128,8 +128,128 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals self.save(os.path.join(self.logger.log_dir, f"model_{it}.pt")) # type: ignore # Save the final model after training - if self.logger.log_dir is not None and not self.logger.disable_logs: - self.save(os.path.join(self.logger.log_dir, f"model_{self.current_learning_iteration}.pt")) + if self.log_dir is not None and not self.disable_logs: + self.save(os.path.join(self.log_dir, f"model_{self.current_learning_iteration}.pt")) + + def log(self, locs: dict, width: int = 80, pad: int = 35) -> None: + # Compute the collection size + collection_size = self.num_steps_per_env * self.env.num_envs * self.gpu_world_size + # Update total time-steps and time + self.tot_timesteps += collection_size + self.tot_time += locs["collection_time"] + locs["learn_time"] + iteration_time = locs["collection_time"] + locs["learn_time"] + + # Log episode information + ep_string = "" + if locs["ep_infos"]: + for key in locs["ep_infos"][0]: + infotensor = torch.tensor([], device=self.device) + for ep_info in locs["ep_infos"]: + # Handle scalar and zero dimensional tensor infos + if key not in ep_info: + continue + if not isinstance(ep_info[key], torch.Tensor): + ep_info[key] = torch.Tensor([ep_info[key]]) + if len(ep_info[key].shape) == 0: + ep_info[key] = ep_info[key].unsqueeze(0) + infotensor = torch.cat((infotensor, ep_info[key].to(self.device))) + value = torch.mean(infotensor) + # Log to logger and terminal + if "/" in key: + self.writer.add_scalar(key, value, locs["it"]) + ep_string += f"""{f"{key}:":>{pad}} {value:.4f}\n""" + else: + self.writer.add_scalar("Episode/" + key, value, locs["it"]) + ep_string += f"""{f"Mean episode {key}:":>{pad}} {value:.4f}\n""" + + mean_std = self.alg.policy.action_std.mean() + fps = int(collection_size / (locs["collection_time"] + locs["learn_time"])) + + # Log losses + for key, value in locs["loss_dict"].items(): + self.writer.add_scalar(f"Loss/{key}", value, locs["it"]) + self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"]) + + # Log noise std + self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"]) + + # Log performance + self.writer.add_scalar("Perf/total_fps", fps, locs["it"]) + self.writer.add_scalar("Perf/collection time", locs["collection_time"], locs["it"]) + self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"]) + + # Log training + if len(locs["rewbuffer"]) > 0: + # Separate logging for intrinsic and extrinsic rewards + if hasattr(self.alg, "rnd") and self.alg.rnd: + self.writer.add_scalar("Rnd/mean_extrinsic_reward", statistics.mean(locs["erewbuffer"]), locs["it"]) + self.writer.add_scalar("Rnd/mean_intrinsic_reward", statistics.mean(locs["irewbuffer"]), locs["it"]) + self.writer.add_scalar("Rnd/weight", self.alg.rnd.weight, locs["it"]) + # Everything else + self.writer.add_scalar("Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]) + self.writer.add_scalar("Train/mean_episode_length", statistics.mean(locs["lenbuffer"]), locs["it"]) + if self.logger_type != "wandb": # wandb does not support non-integer x-axis logging + self.writer.add_scalar("Train/mean_reward/time", statistics.mean(locs["rewbuffer"]), self.tot_time) + self.writer.add_scalar( + "Train/mean_episode_length/time", statistics.mean(locs["lenbuffer"]), self.tot_time + ) + + str = f" \033[1m Learning iteration {locs['it']}/{locs['tot_iter']} \033[0m " + + run_name = self.cfg.get("run_name") + run_name_string = f"""{"Run name:":>{pad}} {run_name}\n""" if run_name else "" + + if len(locs["rewbuffer"]) > 0: + log_string = ( + f"""{"#" * width}\n""" + f"""{str.center(width, " ")}\n\n""" + f"""{run_name_string}""" + f"""{"Computation:":>{pad}} {fps:.0f} steps/s (collection: {locs["collection_time"]:.3f}s, learning { + locs["learn_time"]:.3f}s)\n""" + f"""{"Mean action noise std:":>{pad}} {mean_std.item():.2f}\n""" + ) + # Print losses + for key, value in locs["loss_dict"].items(): + log_string += f"""{f"Mean {key} loss:":>{pad}} {value:.4f}\n""" + # Print rewards + if hasattr(self.alg, "rnd") and self.alg.rnd: + log_string += ( + f"""{"Mean extrinsic reward:":>{pad}} {statistics.mean(locs["erewbuffer"]):.2f}\n""" + f"""{"Mean intrinsic reward:":>{pad}} {statistics.mean(locs["irewbuffer"]):.2f}\n""" + ) + log_string += f"""{"Mean reward:":>{pad}} {statistics.mean(locs["rewbuffer"]):.2f}\n""" + # Print episode information + log_string += f"""{"Mean episode length:":>{pad}} {statistics.mean(locs["lenbuffer"]):.2f}\n""" + else: + log_string = ( + f"""{"#" * width}\n""" + f"""{str.center(width, " ")}\n\n""" + f"""{run_name_string}""" + f"""{"Computation:":>{pad}} {fps:.0f} steps/s (collection: {locs["collection_time"]:.3f}s, learning { + locs["learn_time"]:.3f}s)\n""" + f"""{"Mean action noise std:":>{pad}} {mean_std.item():.2f}\n""" + ) + for key, value in locs["loss_dict"].items(): + log_string += f"""{f"{key}:":>{pad}} {value:.4f}\n""" + + log_string += ep_string + log_string += ( + f"""{"-" * width}\n""" + f"""{"Total timesteps:":>{pad}} {self.tot_timesteps}\n""" + f"""{"Iteration time:":>{pad}} {iteration_time:.2f}s\n""" + f"""{"Time elapsed:":>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time))}\n""" + f"""{"ETA:":>{pad}} { + time.strftime( + "%H:%M:%S", + time.gmtime( + self.tot_time + / (locs["it"] - locs["start_iter"] + 1) + * (locs["start_iter"] + locs["num_learning_iterations"] - locs["it"]) + ), + ) + }\n""" + ) + print(log_string) def save(self, path: str, infos: dict | None = None) -> None: # Save model From 337fd71c17ce56618c89c63af217d41c2bd6b173 Mon Sep 17 00:00:00 2001 From: ShaoshuSu Date: Sat, 19 Jul 2025 23:23:09 -0400 Subject: [PATCH 2/4] Add name to CONTRIBUTORS.md --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 00692ba7..bafc69ed 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -37,5 +37,6 @@ Please keep the lists sorted alphabetically. * Matthijs van der Boon * Özhan Özen * Pascal Roth +* Shaoshu Su * Zhang Chong * Ziqi Fan From cd8eec4015dced39c1619130d157e301aa9fd2d0 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Fri, 28 Nov 2025 15:31:05 +0100 Subject: [PATCH 3/4] Revert "Display run name during training in OnPolicyRunner" This reverts commit 9c5cb3b7c6e2a4ce994e995495fd08c303d77b7f. --- rsl_rl/runners/on_policy_runner.py | 124 +---------------------------- 1 file changed, 2 insertions(+), 122 deletions(-) diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py index 45c5d049..2d9bac6c 100644 --- a/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/runners/on_policy_runner.py @@ -128,128 +128,8 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals self.save(os.path.join(self.logger.log_dir, f"model_{it}.pt")) # type: ignore # Save the final model after training - if self.log_dir is not None and not self.disable_logs: - self.save(os.path.join(self.log_dir, f"model_{self.current_learning_iteration}.pt")) - - def log(self, locs: dict, width: int = 80, pad: int = 35) -> None: - # Compute the collection size - collection_size = self.num_steps_per_env * self.env.num_envs * self.gpu_world_size - # Update total time-steps and time - self.tot_timesteps += collection_size - self.tot_time += locs["collection_time"] + locs["learn_time"] - iteration_time = locs["collection_time"] + locs["learn_time"] - - # Log episode information - ep_string = "" - if locs["ep_infos"]: - for key in locs["ep_infos"][0]: - infotensor = torch.tensor([], device=self.device) - for ep_info in locs["ep_infos"]: - # Handle scalar and zero dimensional tensor infos - if key not in ep_info: - continue - if not isinstance(ep_info[key], torch.Tensor): - ep_info[key] = torch.Tensor([ep_info[key]]) - if len(ep_info[key].shape) == 0: - ep_info[key] = ep_info[key].unsqueeze(0) - infotensor = torch.cat((infotensor, ep_info[key].to(self.device))) - value = torch.mean(infotensor) - # Log to logger and terminal - if "/" in key: - self.writer.add_scalar(key, value, locs["it"]) - ep_string += f"""{f"{key}:":>{pad}} {value:.4f}\n""" - else: - self.writer.add_scalar("Episode/" + key, value, locs["it"]) - ep_string += f"""{f"Mean episode {key}:":>{pad}} {value:.4f}\n""" - - mean_std = self.alg.policy.action_std.mean() - fps = int(collection_size / (locs["collection_time"] + locs["learn_time"])) - - # Log losses - for key, value in locs["loss_dict"].items(): - self.writer.add_scalar(f"Loss/{key}", value, locs["it"]) - self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"]) - - # Log noise std - self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"]) - - # Log performance - self.writer.add_scalar("Perf/total_fps", fps, locs["it"]) - self.writer.add_scalar("Perf/collection time", locs["collection_time"], locs["it"]) - self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"]) - - # Log training - if len(locs["rewbuffer"]) > 0: - # Separate logging for intrinsic and extrinsic rewards - if hasattr(self.alg, "rnd") and self.alg.rnd: - self.writer.add_scalar("Rnd/mean_extrinsic_reward", statistics.mean(locs["erewbuffer"]), locs["it"]) - self.writer.add_scalar("Rnd/mean_intrinsic_reward", statistics.mean(locs["irewbuffer"]), locs["it"]) - self.writer.add_scalar("Rnd/weight", self.alg.rnd.weight, locs["it"]) - # Everything else - self.writer.add_scalar("Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"]) - self.writer.add_scalar("Train/mean_episode_length", statistics.mean(locs["lenbuffer"]), locs["it"]) - if self.logger_type != "wandb": # wandb does not support non-integer x-axis logging - self.writer.add_scalar("Train/mean_reward/time", statistics.mean(locs["rewbuffer"]), self.tot_time) - self.writer.add_scalar( - "Train/mean_episode_length/time", statistics.mean(locs["lenbuffer"]), self.tot_time - ) - - str = f" \033[1m Learning iteration {locs['it']}/{locs['tot_iter']} \033[0m " - - run_name = self.cfg.get("run_name") - run_name_string = f"""{"Run name:":>{pad}} {run_name}\n""" if run_name else "" - - if len(locs["rewbuffer"]) > 0: - log_string = ( - f"""{"#" * width}\n""" - f"""{str.center(width, " ")}\n\n""" - f"""{run_name_string}""" - f"""{"Computation:":>{pad}} {fps:.0f} steps/s (collection: {locs["collection_time"]:.3f}s, learning { - locs["learn_time"]:.3f}s)\n""" - f"""{"Mean action noise std:":>{pad}} {mean_std.item():.2f}\n""" - ) - # Print losses - for key, value in locs["loss_dict"].items(): - log_string += f"""{f"Mean {key} loss:":>{pad}} {value:.4f}\n""" - # Print rewards - if hasattr(self.alg, "rnd") and self.alg.rnd: - log_string += ( - f"""{"Mean extrinsic reward:":>{pad}} {statistics.mean(locs["erewbuffer"]):.2f}\n""" - f"""{"Mean intrinsic reward:":>{pad}} {statistics.mean(locs["irewbuffer"]):.2f}\n""" - ) - log_string += f"""{"Mean reward:":>{pad}} {statistics.mean(locs["rewbuffer"]):.2f}\n""" - # Print episode information - log_string += f"""{"Mean episode length:":>{pad}} {statistics.mean(locs["lenbuffer"]):.2f}\n""" - else: - log_string = ( - f"""{"#" * width}\n""" - f"""{str.center(width, " ")}\n\n""" - f"""{run_name_string}""" - f"""{"Computation:":>{pad}} {fps:.0f} steps/s (collection: {locs["collection_time"]:.3f}s, learning { - locs["learn_time"]:.3f}s)\n""" - f"""{"Mean action noise std:":>{pad}} {mean_std.item():.2f}\n""" - ) - for key, value in locs["loss_dict"].items(): - log_string += f"""{f"{key}:":>{pad}} {value:.4f}\n""" - - log_string += ep_string - log_string += ( - f"""{"-" * width}\n""" - f"""{"Total timesteps:":>{pad}} {self.tot_timesteps}\n""" - f"""{"Iteration time:":>{pad}} {iteration_time:.2f}s\n""" - f"""{"Time elapsed:":>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time))}\n""" - f"""{"ETA:":>{pad}} { - time.strftime( - "%H:%M:%S", - time.gmtime( - self.tot_time - / (locs["it"] - locs["start_iter"] + 1) - * (locs["start_iter"] + locs["num_learning_iterations"] - locs["it"]) - ), - ) - }\n""" - ) - print(log_string) + if self.logger.log_dir is not None and not self.logger.disable_logs: + self.save(os.path.join(self.logger.log_dir, f"model_{self.current_learning_iteration}.pt")) def save(self, path: str, infos: dict | None = None) -> None: # Save model From 2a6755c212666f441828cc9aa0bb0d0db9f9ae1a Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Fri, 28 Nov 2025 15:40:29 +0100 Subject: [PATCH 4/4] add run name to print --- rsl_rl/utils/logger.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rsl_rl/utils/logger.py b/rsl_rl/utils/logger.py index c53e44c6..2dc29231 100644 --- a/rsl_rl/utils/logger.py +++ b/rsl_rl/utils/logger.py @@ -182,6 +182,10 @@ def log( log_string = f"""{"#" * width}\n""" log_string += f"""\033[1m{f" Learning iteration {it}/{total_it} ".center(width)}\033[0m \n\n""" + # Print run name if provided + run_name = self.cfg.get("run_name") + log_string += f"""{"Run name:":>{pad}} {run_name}\n""" if run_name else "" + # Print performance log_string += ( f"""{"Total steps:":>{pad}} {self.tot_timesteps} \n"""