Skip to content

Commit

Permalink
[libomptarget] Refactor syncwarp macro to inline function
Browse files Browse the repository at this point in the history
Summary:
[libomptarget] Refactor syncwarp macro to inline function
See also abandoned D66846, split into this diff and others.

Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D66857

llvm-svn: 370149
  • Loading branch information
JonChesterfield committed Aug 28, 2019
1 parent e73e301 commit be3d487
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
3 changes: 0 additions & 3 deletions openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
Expand Up @@ -52,11 +52,8 @@
#error CUDA_VERSION macro is undefined, something wrong with cuda.
#elif CUDA_VERSION >= 9000
#define __ACTIVEMASK() __activemask()
#define __SYNCWARP(Mask) __syncwarp(Mask)
#else
#define __ACTIVEMASK() __ballot(1)
// In Cuda < 9.0 no need to sync threads in warps.
#define __SYNCWARP(Mask)
#endif // CUDA_VERSION

#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
Expand Down
10 changes: 6 additions & 4 deletions openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
Expand Up @@ -14,6 +14,8 @@
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////

#include "target_impl.h"

INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
execution_param = EMode;
execution_param |= RMode;
Expand Down Expand Up @@ -203,7 +205,7 @@ INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }

INLINE void IncParallelLevel(bool ActiveParallel) {
unsigned Active = __ACTIVEMASK();
__SYNCWARP(Active);
__kmpc_impl_syncwarp(Active);
unsigned LaneMaskLt;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
unsigned Rank = __popc(Active & LaneMaskLt);
Expand All @@ -212,12 +214,12 @@ INLINE void IncParallelLevel(bool ActiveParallel) {
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
__threadfence();
}
__SYNCWARP(Active);
__kmpc_impl_syncwarp(Active);
}

INLINE void DecParallelLevel(bool ActiveParallel) {
unsigned Active = __ACTIVEMASK();
__SYNCWARP(Active);
__kmpc_impl_syncwarp(Active);
unsigned LaneMaskLt;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
unsigned Rank = __popc(Active & LaneMaskLt);
Expand All @@ -226,7 +228,7 @@ INLINE void DecParallelLevel(bool ActiveParallel) {
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
__threadfence();
}
__SYNCWARP(Active);
__kmpc_impl_syncwarp(Active);
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
8 changes: 7 additions & 1 deletion openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Expand Up @@ -63,6 +63,12 @@ INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
#endif // CUDA_VERSION
}

INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
#if CUDA_VERSION >= 9000
__syncwarp(Mask);
#else
// In Cuda < 9.0 no need to sync threads in warps.
#endif // CUDA_VERSION
}

#endif

0 comments on commit be3d487

Please sign in to comment.