10 changes: 5 additions & 5 deletions clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ int bar(int n){
// CHECK: [[ELT_CAST:%.+]] = bitcast double* [[ELT]] to i64*
// CHECK: [[REMOTE_ELT_CAST:%.+]] = bitcast double* [[REMOTE_ELT]] to i64*
// CHECK: [[ELT_VAL:%.+]] = load i64, i64* [[ELT_CAST]], align
// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size()
// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16
// CHECK: [[REMOTE_ELT_VAL64:%.+]] = call i64 @__kmpc_shuffle_int64(i64 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]])
//
Expand Down Expand Up @@ -320,7 +320,7 @@ int bar(int n){
// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align
//
// CHECK: [[ELT_CAST:%.+]] = sext i8 [[ELT_VAL]] to i32
// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size()
// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16
// CHECK: [[REMOTE_ELT1_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]])
// CHECK: [[REMOTE_ELT1_VAL:%.+]] = trunc i32 [[REMOTE_ELT1_VAL32]] to i8
Expand All @@ -336,7 +336,7 @@ int bar(int n){
// CHECK: [[ELT_CAST:%.+]] = bitcast float* [[ELT]] to i32*
// CHECK: [[REMOTE_ELT2_CAST:%.+]] = bitcast float* [[REMOTE_ELT2]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT_CAST]], align
// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size()
// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16
// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]])
//
Expand Down Expand Up @@ -617,7 +617,7 @@ int bar(int n){
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
//
// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size()
// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16
// CHECK: [[REMOTE_ELT1_VAL:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_VAL]], i16 [[LANEOFFSET]], i16 [[WS]])
//
Expand All @@ -632,7 +632,7 @@ int bar(int n){
// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
//
// CHECK: [[ELT_CAST:%.+]] = sext i16 [[ELT_VAL]] to i32
// CHECK: [[WS32:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
// CHECK: [[WS32:%.+]] = call i32 @__kmpc_get_warp_size()
// CHECK: [[WS:%.+]] = trunc i32 [[WS32]] to i16
// CHECK: [[REMOTE_ELT2_VAL32:%.+]] = call i32 @__kmpc_shuffle_int32(i32 [[ELT_CAST]], i16 [[LANEOFFSET]], i16 [[WS]])
// CHECK: [[REMOTE_ELT2_VAL:%.+]] = trunc i32 [[REMOTE_ELT2_VAL32]] to i16
Expand Down

Large diffs are not rendered by default.

2,592 changes: 1,296 additions & 1,296 deletions clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,8 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)

__OMP_RTL(__kmpc_get_warp_size, false, Int32, )

__OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)

__OMP_RTL(__last, false, Void, )
Expand Down
5 changes: 5 additions & 0 deletions openmp/libomptarget/DeviceRTL/src/Mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,5 +277,10 @@ __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
FunctionTracingRAII();
return impl::getNumHardwareThreadsInBlock();
}

__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
FunctionTracingRAII();
return impl::getWarpSize();
}
}
#pragma omp end declare target
1 change: 1 addition & 0 deletions openmp/libomptarget/DeviceRTL/src/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace _OMP {
__attribute__((used, weak, optnone)) void keepAlive() {
__kmpc_get_hardware_thread_id_in_block();
__kmpc_get_hardware_num_threads_in_block();
__kmpc_get_warp_size();
__kmpc_barrier_simple_spmd(nullptr, 0);
__kmpc_barrier_simple_generic(nullptr, 0);
}
Expand Down
5 changes: 4 additions & 1 deletion openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,11 @@ EXTERN int __kmpc_get_hardware_num_threads_in_block() {
__builtin_amdgcn_workgroup_size_x());
}

EXTERN unsigned __kmpc_get_warp_size() {
return WARPSIZE;
}

EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
///{
extern "C" {
unsigned GetLaneId();
unsigned GetWarpSize();
unsigned __kmpc_get_warp_size();
void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi);
uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
}
Expand All @@ -60,7 +60,7 @@ int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var, uint32_t Delta,

inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,
int32_t SrcLane) {
int Width = GetWarpSize();
int Width = __kmpc_get_warp_size();
int Self = GetLaneId();
int Index = SrcLane + (Self & ~(Width - 1));
return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
Expand Down Expand Up @@ -90,7 +90,7 @@ inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,

inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var,
uint32_t Delta, int32_t Width) {
int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f;
int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f;
return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
}

Expand Down
2 changes: 1 addition & 1 deletion openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ EXTERN int __kmpc_get_hardware_num_blocks() {
EXTERN int __kmpc_get_hardware_num_threads_in_block() {
return __nvvm_read_ptx_sreg_ntid_x();
}
EXTERN unsigned __kmpc_get_warp_size() { return WARPSIZE; }
EXTERN unsigned GetWarpId() {
return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE;
}
EXTERN unsigned GetWarpSize() { return WARPSIZE; }
EXTERN unsigned GetLaneId() {
return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1);
}
Expand Down