diff --git a/torch/distributed/fsdp/_debug_utils.py b/torch/distributed/fsdp/_debug_utils.py index 4ed76476e56b6..a41a817724e57 100644 --- a/torch/distributed/fsdp/_debug_utils.py +++ b/torch/distributed/fsdp/_debug_utils.py @@ -57,7 +57,7 @@ def dump_and_reset(cls, msg: str) -> None: # This cannot be combined with DETAIL distributed log # as the profiling will be very incorrect. if dist.get_rank() == 0 and dist.get_debug_level() == dist.DebugLevel.INFO: - logger.warning("%s %s", msg, cls.results) + logger.info("%s %s", msg, cls.results) cls.reset() diff --git a/torch/distributed/fsdp/_optim_utils.py b/torch/distributed/fsdp/_optim_utils.py index 163cde70b3f97..b066f930ebaf5 100644 --- a/torch/distributed/fsdp/_optim_utils.py +++ b/torch/distributed/fsdp/_optim_utils.py @@ -1511,7 +1511,7 @@ def _allgather_orig_param_states( """ fsdp_state = fsdp_param_info.state if fsdp_state.rank == 0 and dist.get_debug_level() == dist.DebugLevel.DETAIL: - logger.warning( + logger.info( "Memory Summary before calling to _allgather_orig_param_states %s", fsdp_state._device_handle.memory_summary(), )