Skip to content

Commit

Permalink
Merge pull request #29231 from dubey/cherrypicks_YA6FR
Browse files Browse the repository at this point in the history
1.14-rc1 cherry-pick request: NCCL broadcast bug
  • Loading branch information
bananabowl committed May 31, 2019
2 parents d841fd5 + 85a9202 commit a8c0738
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
2 changes: 1 addition & 1 deletion tensorflow/core/kernels/nccl_ops.cc
Expand Up @@ -248,7 +248,7 @@ class NcclBroadcastRecvKernel : public NcclAsyncOpBase {
compute_stream->parent(), compute_stream, gpu_info->event_mgr,
gpu_info->gpu_id, /*input=*/nullptr, output, /*global_rank=*/-1,
std::move(actual_done));
NcclManager::instance()->AddBroadcastSend(
NcclManager::instance()->AddBroadcastRecv(
std::move(participant), {GetCollectiveKey(c),
/*num_local_devices=*/num_devices(),
/*num_global_devices=*/num_devices(),
Expand Down
7 changes: 6 additions & 1 deletion tensorflow/core/nccl/nccl_manager.cc
Expand Up @@ -410,7 +410,12 @@ void NcclManager::AddParticipant(std::unique_ptr<Participant> participant,
CollectiveType collective_type,
ncclRedOp_t reduction_op) {
Collective* to_run = nullptr;
const DataType data_type = participant->input->dtype();
DataType data_type;
if (participant->input != nullptr) {
data_type = participant->input->dtype();
} else {
data_type = participant->output->dtype();
}
{
mutex_lock l(mu_);
auto collective_it = collectives_.find(context.collective_key);
Expand Down

0 comments on commit a8c0738

Please sign in to comment.