Skip to content

Commit

Permalink
Update on "[PG NCCL] Add TDD, NCCL_DEBUG log"
Browse files Browse the repository at this point in the history
Prints these env var setting during setup for easier debug.

Differential Revision: [D44430875](https://our.internmc.facebook.com/intern/diff/D44430875/)

[ghstack-poisoned]
  • Loading branch information
rohan-varma committed Apr 6, 2023
2 parents 3a66807 + 5715338 commit f022842
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
9 changes: 5 additions & 4 deletions torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,8 +659,10 @@ ProcessGroupNCCL::ProcessGroupNCCL(
#endif

init();
char * torch_distributed_debug = parseEnvVarString("TORCH_DISTRIBUTED_DEBUG", "OFF");
char * nccl_debug = parseEnvVarString("NCCL_DEBUG", "OFF");
const std::string OFF = "OFF";
const char* torch_distributed_debug =
parseEnvVarString("TORCH_DISTRIBUTED_DEBUG", OFF.c_str());
const char* nccl_debug = parseEnvVarString("NCCL_DEBUG", OFF.c_str());
LOG(INFO) << "[Rank " << rank_
<< "] ProcessGroupNCCL initialized with following options:"
<< "\nNCCL_ASYNC_ERROR_HANDLING: " << asyncErrorHandling_
Expand All @@ -671,8 +673,7 @@ ProcessGroupNCCL::ProcessGroupNCCL(
<< options_->is_high_priority_stream
<< "\n TORCH_DISTRIBUTED_DEBUG: "
<< std::string(torch_distributed_debug)
<< "\n NCCL_DEBUG: "
<< std::string(nccl_debug);
<< "\n NCCL_DEBUG: " << std::string(nccl_debug);

RECORD_PARAM_COMMS(
0, // seq
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/distributed/c10d/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ inline int parseEnvVarInt(const char* envVarName) {
return C10D_ENV_NOT_SET;
}

inline char* parseEnvVarString(const char* envVarName, char* default_val) {
char* val = std::getenv(envVarName);
inline const char* parseEnvVarString(const char* envVarName, const char* default_val) {
const char* val = std::getenv(envVarName);
if (val == nullptr) {
val = default_val;
}
Expand Down

0 comments on commit f022842

Please sign in to comment.