-
Notifications
You must be signed in to change notification settings - Fork 15.1k
release/20.x: cuda clang: Fix argument order for __reduce_max_sync (#132881) #134295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@durga4github What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-clang Author: None (llvmbot) ChangesBackport 2d1517d Requested by: @Artem-B Full diff: https://github.com/llvm/llvm-project/pull/134295.diff 1 Files Affected:
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index a04e8b6de44d0..8b230af6f6647 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
__device__ inline unsigned __reduce_add_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_add(__mask, __value);
+ return __nvvm_redux_sync_add(__value, __mask);
}
__device__ inline unsigned __reduce_min_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_umin(__mask, __value);
+ return __nvvm_redux_sync_umin(__value, __mask);
}
__device__ inline unsigned __reduce_max_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_umax(__mask, __value);
+ return __nvvm_redux_sync_umax(__value, __mask);
}
__device__ inline int __reduce_min_sync(unsigned __mask, int __value) {
- return __nvvm_redux_sync_min(__mask, __value);
+ return __nvvm_redux_sync_min(__value, __mask);
}
__device__ inline int __reduce_max_sync(unsigned __mask, int __value) {
- return __nvvm_redux_sync_max(__mask, __value);
+ return __nvvm_redux_sync_max(__value, __mask);
}
__device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) {
- return __nvvm_redux_sync_or(__mask, __value);
+ return __nvvm_redux_sync_or(__value, __mask);
}
__device__ inline unsigned __reduce_and_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_and(__mask, __value);
+ return __nvvm_redux_sync_and(__value, __mask);
}
__device__ inline unsigned __reduce_xor_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_xor(__mask, __value);
+ return __nvvm_redux_sync_xor(__value, __mask);
}
__device__ inline void __nv_memcpy_async_shared_global_4(void *__dst,
|
@llvm/pr-subscribers-backend-x86 Author: None (llvmbot) ChangesBackport 2d1517d Requested by: @Artem-B Full diff: https://github.com/llvm/llvm-project/pull/134295.diff 1 Files Affected:
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index a04e8b6de44d0..8b230af6f6647 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) {
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800
__device__ inline unsigned __reduce_add_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_add(__mask, __value);
+ return __nvvm_redux_sync_add(__value, __mask);
}
__device__ inline unsigned __reduce_min_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_umin(__mask, __value);
+ return __nvvm_redux_sync_umin(__value, __mask);
}
__device__ inline unsigned __reduce_max_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_umax(__mask, __value);
+ return __nvvm_redux_sync_umax(__value, __mask);
}
__device__ inline int __reduce_min_sync(unsigned __mask, int __value) {
- return __nvvm_redux_sync_min(__mask, __value);
+ return __nvvm_redux_sync_min(__value, __mask);
}
__device__ inline int __reduce_max_sync(unsigned __mask, int __value) {
- return __nvvm_redux_sync_max(__mask, __value);
+ return __nvvm_redux_sync_max(__value, __mask);
}
__device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) {
- return __nvvm_redux_sync_or(__mask, __value);
+ return __nvvm_redux_sync_or(__value, __mask);
}
__device__ inline unsigned __reduce_and_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_and(__mask, __value);
+ return __nvvm_redux_sync_and(__value, __mask);
}
__device__ inline unsigned __reduce_xor_sync(unsigned __mask,
unsigned __value) {
- return __nvvm_redux_sync_xor(__mask, __value);
+ return __nvvm_redux_sync_xor(__value, __mask);
}
__device__ inline void __nv_memcpy_async_shared_global_4(void *__dst,
|
|
I was the reviewer of the original PR. |
As Artem mentioned, it is a low-risk fix impacting the lowering to the redux_sync family of intrinsics only. So, it is good to merge it to the release branch. |
Fixes: llvm#131415 --------- Signed-off-by: Austin Schuh <austin.linux@gmail.com> (cherry picked from commit 2d1517d)
@Artem-B (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 2d1517d
Requested by: @Artem-B