release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295

llvmbot · 2025-04-03T19:24:40Z

Backport 2d1517d

Requested by: @Artem-B

llvmbot · 2025-04-03T19:24:47Z

@durga4github What do you think about merging this PR to the release branch?

llvmbot · 2025-04-03T19:25:17Z

@llvm/pr-subscribers-clang

Author: None (llvmbot)

Changes

Backport 2d1517d

Requested by: @Artem-B

Full diff: https://github.com/llvm/llvm-project/pull/134295.diff

1 Files Affected:

(modified) clang/lib/Headers/__clang_cuda_intrinsics.h (+8-8)

diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index a04e8b6de44d0..8b230af6f6647 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
 __device__ inline unsigned __reduce_add_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_add(__mask, __value);
+  return __nvvm_redux_sync_add(__value, __mask);
 }
 __device__ inline unsigned __reduce_min_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_umin(__mask, __value);
+  return __nvvm_redux_sync_umin(__value, __mask);
 }
 __device__ inline unsigned __reduce_max_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_umax(__mask, __value);
+  return __nvvm_redux_sync_umax(__value, __mask);
 }
 __device__ inline int __reduce_min_sync(unsigned __mask, int __value) {
-  return __nvvm_redux_sync_min(__mask, __value);
+  return __nvvm_redux_sync_min(__value, __mask);
 }
 __device__ inline int __reduce_max_sync(unsigned __mask, int __value) {
-  return __nvvm_redux_sync_max(__mask, __value);
+  return __nvvm_redux_sync_max(__value, __mask);
 }
 __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) {
-  return __nvvm_redux_sync_or(__mask, __value);
+  return __nvvm_redux_sync_or(__value, __mask);
 }
 __device__ inline unsigned __reduce_and_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_and(__mask, __value);
+  return __nvvm_redux_sync_and(__value, __mask);
 }
 __device__ inline unsigned __reduce_xor_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_xor(__mask, __value);
+  return __nvvm_redux_sync_xor(__value, __mask);
 }
 
 __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst,

llvmbot · 2025-04-03T19:25:17Z

@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)

Changes

Backport 2d1517d

Requested by: @Artem-B

Full diff: https://github.com/llvm/llvm-project/pull/134295.diff

1 Files Affected:

(modified) clang/lib/Headers/__clang_cuda_intrinsics.h (+8-8)

diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index a04e8b6de44d0..8b230af6f6647 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
 __device__ inline unsigned __reduce_add_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_add(__mask, __value);
+  return __nvvm_redux_sync_add(__value, __mask);
 }
 __device__ inline unsigned __reduce_min_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_umin(__mask, __value);
+  return __nvvm_redux_sync_umin(__value, __mask);
 }
 __device__ inline unsigned __reduce_max_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_umax(__mask, __value);
+  return __nvvm_redux_sync_umax(__value, __mask);
 }
 __device__ inline int __reduce_min_sync(unsigned __mask, int __value) {
-  return __nvvm_redux_sync_min(__mask, __value);
+  return __nvvm_redux_sync_min(__value, __mask);
 }
 __device__ inline int __reduce_max_sync(unsigned __mask, int __value) {
-  return __nvvm_redux_sync_max(__mask, __value);
+  return __nvvm_redux_sync_max(__value, __mask);
 }
 __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) {
-  return __nvvm_redux_sync_or(__mask, __value);
+  return __nvvm_redux_sync_or(__value, __mask);
 }
 __device__ inline unsigned __reduce_and_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_and(__mask, __value);
+  return __nvvm_redux_sync_and(__value, __mask);
 }
 __device__ inline unsigned __reduce_xor_sync(unsigned __mask,
                                              unsigned __value) {
-  return __nvvm_redux_sync_xor(__mask, __value);
+  return __nvvm_redux_sync_xor(__value, __mask);
 }
 
 __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst,

github-actions · 2025-04-03T19:26:23Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Discourse for more information.

Artem-B · 2025-04-03T21:33:48Z

I was the reviewer of the original PR.
The change is a low risk bug fix impacting newer GPUs and should be backported into 20.x

durga4github · 2025-04-04T08:27:31Z

@durga4github What do you think about merging this PR to the release branch?

As Artem mentioned, it is a low-risk fix impacting the lowering to the redux_sync family of intrinsics only. So, it is good to merge it to the release branch.

Fixes: llvm#131415 --------- Signed-off-by: Austin Schuh <austin.linux@gmail.com> (cherry picked from commit 2d1517d)

github-actions · 2025-04-11T17:59:42Z

@Artem-B (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

llvmbot added this to the LLVM 20.X Release milestone Apr 3, 2025

github-project-automation bot added this to LLVM Release Status Apr 3, 2025

github-project-automation bot moved this to Needs Triage in LLVM Release Status Apr 3, 2025

llvmbot requested a review from durga4github April 3, 2025 19:24

llvmbot mentioned this pull request Apr 3, 2025

[cuclang 20.1.0] Using __reduce_max_sync in a cuda kernel fails with an illegal instruction was encountered #131415

Closed

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Apr 3, 2025

durga4github approved these changes Apr 4, 2025

View reviewed changes

github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Apr 4, 2025

cuda clang: Fix argument order for __reduce_max_sync (llvm#132881)

41aefdb

Fixes: llvm#131415 --------- Signed-off-by: Austin Schuh <austin.linux@gmail.com> (cherry picked from commit 2d1517d)

llvmbot force-pushed the issue131415 branch from b55d128 to 41aefdb Compare April 11, 2025 17:58

tstellar merged commit 41aefdb into llvm:release/20.x Apr 11, 2025
7 of 10 checks passed

github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Apr 11, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295

release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

github-actions bot commented Apr 3, 2025

Uh oh!

Artem-B commented Apr 3, 2025

Uh oh!

durga4github commented Apr 4, 2025

Uh oh!

Uh oh!

github-actions bot commented Apr 11, 2025

Uh oh!

Uh oh!

release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295

release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295

Uh oh!

Conversation

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

github-actions bot commented Apr 3, 2025

Uh oh!

Artem-B commented Apr 3, 2025

Uh oh!

durga4github commented Apr 4, 2025

Uh oh!

Uh oh!

github-actions bot commented Apr 11, 2025

Uh oh!

Uh oh!