Skip to content

Commit

Permalink
Add C10_CUDA_KERNEL_LAUNCH_CHECK
Browse files Browse the repository at this point in the history
  • Loading branch information
cyyever committed Apr 28, 2024
1 parent b34eac4 commit 4694f99
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions torch/csrc/distributed/c10d/quantization/quantization_gpu.cu
Expand Up @@ -95,11 +95,12 @@ at::Tensor _float_to_bfloat16_cuda(const at::Tensor& input) {
nrows,
ncols,
#if HAS_NCCL_BF16_DATATYPE
reinterpret_cast<uint16_t*>(output.mutable_data_ptr<at::BFloat16>()));
reinterpret_cast<uint16_t*>(output.mutable_data_ptr<at::BFloat16>())
#else
reinterpret_cast<uint16_t*>(output.mutable_data_ptr<at::Half>()));
reinterpret_cast<uint16_t*>(output.mutable_data_ptr<at::Half>())
#endif
//C10_CUDA_KERNEL_LAUNCH_CHECK();
);
C10_CUDA_KERNEL_LAUNCH_CHECK();
return output;
}
Expand Down

0 comments on commit 4694f99

Please sign in to comment.