From 67ae789e8f20acfb2f36b270d475177c72d65688 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Wed, 8 Oct 2025 07:52:02 -0700 Subject: [PATCH 1/2] [OMPIRBuilder] Fix reduction codegen for SPIR-V Signed-off-by: Sarnie, Nick --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 7 +++++- clang/test/OpenMP/spirv_reduction.cpp | 22 +++++++++++++++++++ .../include/llvm/Frontend/OpenMP/OMPKinds.def | 3 ++- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 ++++++---- 4 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 clang/test/OpenMP/spirv_reduction.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 4272d8b1a1f51..3613b6a143d42 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -869,6 +869,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) CGM.getLangOpts().OpenMPOffloadMandatory, /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); + Config.setDefaultTargetAS( + CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default)); OMPBuilder.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsTargetDevice) @@ -1243,7 +1245,10 @@ void CGOpenMPRuntimeGPU::emitParallelCall( llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy); if (WFn) ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); - llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy); + llvm::Type *FnPtrTy = llvm::PointerType::get( + CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace()); + + llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy); // Create a private scope that will globalize the arguments // passed from the outside of the target region. diff --git a/clang/test/OpenMP/spirv_reduction.cpp b/clang/test/OpenMP/spirv_reduction.cpp new file mode 100644 index 0000000000000..e0e7549de716c --- /dev/null +++ b/clang/test/OpenMP/spirv_reduction.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s + +// expected-no-diagnostics + +// CHECK: call spir_func addrspace(9) void @__kmpc_parallel_51(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)), +// CHECK-SAME: i32 %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) %{{.*}}, i64 {{.*}}) + +// CHECK: call addrspace(9) i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)), +// CHECK-SAME: ptr addrspace(4) %{{.*}}, i32 1024, i64 4, ptr addrspace(4) %{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}) + +int main() { + int matrix_sum = 0; + #pragma omp target teams distribute parallel for \ + reduction(+:matrix_sum) \ + map(tofrom:matrix_sum) + for (int i = 0; i < 100; i++) { + + } + + return 0; +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 01ca8da759ef7..1694a33510d79 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -42,6 +42,7 @@ __OMP_TYPE(Double) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) OMP_TYPE(Int63, Type::getIntNTy(Ctx, 63)) +OMP_TYPE(FuncPtrTy, PointerType::get(Ctx, M.getDataLayout().getProgramAddressSpace())) __OMP_PTR_TYPE(VoidPtr) __OMP_PTR_TYPE(VoidPtrPtr) @@ -471,7 +472,7 @@ __OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr, KernelLaunchEn __OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, - VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) + FuncPtrTy, VoidPtr, VoidPtrPtr, SizeTy) __OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8) __OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32, Int8) __OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64, Int8) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 5980ee35a5cd2..286ed039b1214 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3623,7 +3623,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( // 1. Build a list of reduction variables. // void *RedList[] = {[0], ..., [-1]}; auto Size = ReductionInfos.size(); - Type *PtrTy = PointerType::getUnqual(Ctx); + Type *PtrTy = PointerType::get(Ctx, Config.getDefaultTargetAS()); + Type *FuncPtrTy = + Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace()); Type *RedArrayTy = ArrayType::get(PtrTy, Size); CodeGenIP = Builder.saveIP(); Builder.restoreIP(AllocaIP); @@ -3667,9 +3669,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( Builder.getInt64(MaxDataSize * ReductionInfos.size()); if (!IsTeamsReduction) { Value *SarFuncCast = - Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy); + Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy); Value *WcFuncCast = - Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy); + Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy); Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast, WcFuncCast}; Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr( @@ -10072,13 +10074,14 @@ void OpenMPIRBuilder::initializeTypes(Module &M) { LLVMContext &Ctx = M.getContext(); StructType *T; unsigned DefaultTargetAS = Config.getDefaultTargetAS(); + unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace(); #define OMP_TYPE(VarName, InitValue) VarName = InitValue; #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ VarName##Ty = ArrayType::get(ElemTy, ArraySize); \ VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS); #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ - VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS); + VarName##Ptr = PointerType::get(Ctx, ProgramAS); #define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \ T = StructType::getTypeByName(Ctx, StructName); \ if (!T) \ From a8b2dd2469de9b9649b4b8615d9995b6231f6852 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Wed, 8 Oct 2025 14:48:30 -0700 Subject: [PATCH 2/2] use existing test Signed-off-by: Sarnie, Nick --- clang/test/OpenMP/reduction_complex.c | 17 ++++++++++++++--- clang/test/OpenMP/spirv_reduction.cpp | 22 ---------------------- 2 files changed, 14 insertions(+), 25 deletions(-) delete mode 100644 clang/test/OpenMP/spirv_reduction.cpp diff --git a/clang/test/OpenMP/reduction_complex.c b/clang/test/OpenMP/reduction_complex.c index e00caa8f90fdf..b79903ff4d37b 100644 --- a/clang/test/OpenMP/reduction_complex.c +++ b/clang/test/OpenMP/reduction_complex.c @@ -10,6 +10,17 @@ // RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc \ // RUN: -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ +// RUN: -triple powerpc64le-unknown-unknown \ +// RUN: -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o \ +// RUN: %t-ppc-host-spv.bc + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ +// RUN: -triple spirv64-intel -DCUA \ +// RUN: -fopenmp-targets=spirv64-intel -emit-llvm %s \ +// RUN: -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host-spv.bc \ +// RUN: -o - | FileCheck %s --check-prefix CHECK + // expected-no-diagnostics int foo() { int i; @@ -46,9 +57,9 @@ int foo() { // CHECK-NEXT: %[[VAL_244:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0 // CHECK-NEXT: %[[VAL_245:.*]] = getelementptr { float, float }, ptr %[[VAL_243]], i64 1 // CHECK-NEXT: %[[VAL_246:.*]] = load i64, ptr %[[VAL_243]], align 8 -// CHECK-NEXT: %[[VAL_247:.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-NEXT: %[[VAL_247:.*]] = call{{.*}}i32 @__kmpc_get_warp_size() // CHECK-NEXT: %[[VAL_248:.*]] = trunc i32 %[[VAL_247]] to i16 -// CHECK-NEXT: %[[VAL_249:.*]] = call i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]]) +// CHECK-NEXT: %[[VAL_249:.*]] = call{{.*}}i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]]) // CHECK-NEXT: store i64 %[[VAL_249]], ptr %[[VAL_233]], align 8 // CHECK-NEXT: %[[VAL_250:.*]] = getelementptr i64, ptr %[[VAL_243]], i64 1 // CHECK-NEXT: %[[VAL_251:.*]] = getelementptr i64, ptr %[[VAL_233]], i64 1 @@ -67,7 +78,7 @@ int foo() { // CHECK-NEXT: %[[VAL_263:.*]] = or i1 %[[VAL_262]], %[[VAL_261]] // CHECK-NEXT: br i1 %[[VAL_263]], label %[[VAL_264:.*]], label %[[VAL_265:.*]] // CHECK: then: ; preds = %[[VAL_266:.*]] -// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2 +// CHECK-NEXT: call{{.*}}void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2 // CHECK-NEXT: br label %[[VAL_267:.*]] // CHECK: else: ; preds = %[[VAL_266]] // CHECK-NEXT: br label %[[VAL_267]] diff --git a/clang/test/OpenMP/spirv_reduction.cpp b/clang/test/OpenMP/spirv_reduction.cpp deleted file mode 100644 index e0e7549de716c..0000000000000 --- a/clang/test/OpenMP/spirv_reduction.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s - -// expected-no-diagnostics - -// CHECK: call spir_func addrspace(9) void @__kmpc_parallel_51(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)), -// CHECK-SAME: i32 %{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(4) {{.*}}, ptr addrspace(4) %{{.*}}, i64 {{.*}}) - -// CHECK: call addrspace(9) i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}} to ptr addrspace(4)), -// CHECK-SAME: ptr addrspace(4) %{{.*}}, i32 1024, i64 4, ptr addrspace(4) %{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}, ptr addrspace(9) @{{.*}}) - -int main() { - int matrix_sum = 0; - #pragma omp target teams distribute parallel for \ - reduction(+:matrix_sum) \ - map(tofrom:matrix_sum) - for (int i = 0; i < 100; i++) { - - } - - return 0; -}