diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index 4e8232974e705..9b7b8e3fd95dd 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -124,8 +124,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { return __ballot64(1); } -EXTERN bool __kmpc_impl_is_first_active_thread(); - EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var, int32_t SrcLane); @@ -157,6 +155,8 @@ INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); } INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); } INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); } +EXTERN bool __kmpc_impl_is_first_active_thread(); + // Locks EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock); EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock); diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 8461a93913a38..6d58528826838 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -133,15 +133,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { #endif } -// Return true if this is the first active thread in the warp. -INLINE bool __kmpc_impl_is_first_active_thread() { - unsigned long long Mask = __kmpc_impl_activemask(); - unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE); - unsigned long long Sh = Mask << ShNum; - // Truncate Sh to the 32 lower bits - return (unsigned)Sh == 0; -} - // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'. INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var, @@ -197,6 +188,15 @@ INLINE int GetBlockIdInKernel() { return blockIdx.x; } INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; } INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; } +// Return true if this is the first active thread in the warp. +INLINE bool __kmpc_impl_is_first_active_thread() { + unsigned long long Mask = __kmpc_impl_activemask(); + unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE); + unsigned long long Sh = Mask << ShNum; + // Truncate Sh to the 32 lower bits + return (unsigned)Sh == 0; +} + // Locks EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock); EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);