diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp index ccffdf43549fe1..33d4ab838af128 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__ockl_get_local_size"; + const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateTrunc( - Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); } diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index 85ef69942a50d1..416ed06083b078 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -13,9 +13,8 @@ int test_amdgcn_target_tid_threads() { int arr[N]; -// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) -// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 -// CHECK-NEXT: sub nuw i32 [[VAR]], 64 +// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads() +// CHECK: sub nuw i32 [[NUM_THREADS]], 64 // CHECK: call i32 @llvm.amdgcn.workitem.id.x() #pragma omp target for (int i = 0; i < N; i++) { @@ -30,9 +29,8 @@ int test_amdgcn_target_tid_threads_simd() { int arr[N]; -// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) -// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 -// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0) +// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[NUM_THREADS]], i16 0) #pragma omp target simd for (int i = 0; i < N; i++) { arr[i] = 1; diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h index f7c75c09362a24..80409d611f6f3e 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -15,4 +15,6 @@ typedef uint64_t __kmpc_impl_lanemask_t; typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(); + #endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip index 9fbdc67b56ab61..3e70beb85d5b2e 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -144,6 +144,10 @@ DEVICE unsigned GetLaneId() { return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { + return GetNumberOfThreadsInBlock(); +} + // Stub implementations -DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr } +DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; } DEVICE void __kmpc_impl_free(void *) {}