Skip to content

Commit

Permalink
[CUDA] Fixed order of words in the names of shfl builtins.
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D38147

llvm-svn: 313899
  • Loading branch information
Artem-B committed Sep 21, 2017
1 parent 42960b4 commit b542f1f
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions clang/lib/Headers/__clang_cuda_intrinsics.h
Expand Up @@ -148,13 +148,12 @@ __MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,
__nvvm_shfl_sync_idx_f32, 0x1f);
// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=
// maxLane.
__MAKE_SYNC_SHUFFLES(__shfl_sync_up, __nvvm_shfl_sync_up_i32,
__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,
__nvvm_shfl_sync_up_f32, 0);
__MAKE_SYNC_SHUFFLES(__shfl_sync_down, __nvvm_shfl_sync_down_i32,
__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,
__nvvm_shfl_sync_down_f32, 0x1f);
__MAKE_SYNC_SHUFFLES(__shfl_sync_xor, __nvvm_shfl_sync_bfly_i32,
__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,
__nvvm_shfl_sync_bfly_f32, 0x1f);

#pragma pop_macro("__MAKE_SYNC_SHUFFLES")

inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {
Expand Down

0 comments on commit b542f1f

Please sign in to comment.