diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index b61224ff4f1b3..5c59c99675699 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -109,17 +109,6 @@ void setOffloadModuleInterfaceAttributes( } } -// Shares assinging of the OpenMP OffloadModuleInterface and its TargetCPU -// attribute accross Flang tools (bbc/flang) -void setOffloadModuleInterfaceTargetAttribute(mlir::ModuleOp &module, - llvm::StringRef targetCPU, llvm::StringRef targetFeatures) { - // Should be registered by the OpenMPDialect - if (auto offloadMod = llvm::dyn_cast( - module.getOperation())) { - offloadMod.setTarget(targetCPU, targetFeatures); - } -} - void setOpenMPVersionAttribute(mlir::ModuleOp &module, int64_t version) { module.getOperation()->setAttr( mlir::StringAttr::get(module.getContext(), llvm::Twine{"omp.version"}), diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 4738f7ba57042..09e861db3ec6d 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -301,9 +301,6 @@ bool CodeGenAction::beginSourceFileAction() { Fortran::common::LanguageFeature::OpenMP)) { setOffloadModuleInterfaceAttributes(*mlirModule, ci.getInvocation().getLangOpts()); - setOffloadModuleInterfaceTargetAttribute( - *mlirModule, targetMachine.getTargetCPU(), - targetMachine.getTargetFeatureString()); setOpenMPVersionAttribute(*mlirModule, ci.getInvocation().getLangOpts().OpenMPVersion); } diff --git a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 index 179b71b3f0cfa..5154782e1ae17 100644 --- a/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/FIR/target_cpu_features.f90 @@ -1,22 +1,19 @@ !REQUIRES: amdgpu-registered-target, nvptx-registered-target -!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s -!RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s - +!RUN: %flang_fc1 -emit-fir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s +!RUN: %flang_fc1 -emit-fir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s !=============================================================================== ! Target_Enter Simple !=============================================================================== -!CHECK: omp.target = #omp.target -!NVPTX: omp.target = #omp.target -!CHECK-LABEL: func.func @_QPomp_target_simple() -subroutine omp_target_simple - ! Directive needed to prevent subroutine from being filtered out when - ! compiling for the device. - !$omp declare target -end subroutine omp_target_simple +!AMDGCN: module attributes { +!AMDGCN-SAME: fir.target_cpu = "gfx908" +!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", +!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", +!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", +!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", +!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> +!NVPTX: module attributes { +!NVPTX-SAME: fir.target_cpu = "sm_80" +!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]> diff --git a/flang/test/Lower/OpenMP/target_cpu_features.f90 b/flang/test/Lower/OpenMP/target_cpu_features.f90 index ea1e5e38fca88..ea8efcf5d256b 100644 --- a/flang/test/Lower/OpenMP/target_cpu_features.f90 +++ b/flang/test/Lower/OpenMP/target_cpu_features.f90 @@ -1,21 +1,19 @@ !REQUIRES: amdgpu-registered-target, nvptx-registered-target -!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=AMDGCN %s !RUN: %flang_fc1 -emit-hlfir -triple nvptx64-nvidia-cuda -target-cpu sm_80 -fopenmp -fopenmp-is-target-device %s -o - | FileCheck --check-prefix=NVPTX %s !=============================================================================== ! Target_Enter Simple !=============================================================================== -!CHECK: omp.target = #omp.target -!NVPTX: omp.target = #omp.target -!CHECK-LABEL: func.func @_QPomp_target_simple() -subroutine omp_target_simple - ! Directive needed to prevent subroutine from being filtered out when - ! compiling for the device. - !$omp declare target -end subroutine omp_target_simple +!AMDGCN: module attributes { +!AMDGCN-SAME: fir.target_cpu = "gfx908" +!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", +!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts", +!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", +!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mai-insts", +!AMDGCN-SAME: "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]> +!NVPTX: module attributes { +!NVPTX-SAME: fir.target_cpu = "sm_80" +!NVPTX-SAME: fir.target_features = #llvm.target_features<["+ptx61", "+sm_80"]> diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 96c15e775a302..451828ec4ba77 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -72,15 +72,6 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> { let assemblyFormat = "`<` struct(params) `>`"; } -def TargetAttr : OpenMP_Attr<"Target", "target"> { - let parameters = (ins - StringRefParameter<>:$target_cpu, - StringRefParameter<>:$target_features - ); - - let assemblyFormat = "`<` struct(params) `>`"; -} - class OpenMP_Op traits = []> : Op; diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index 18c5335c63fb9..198a9a2357f2f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -205,34 +205,6 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> { assumeTeamsOversubscription, assumeThreadsOversubscription, assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion)); }]>, - InterfaceMethod< - /*description=*/[{ - Get the Target attribute on the current module if it exists - and return the attribute, if it doesn't exist it returns a nullptr. - }], - /*retTy=*/"mlir::omp::TargetAttr", - /*methodName=*/"getTarget", - (ins), [{}], [{ - if (Attribute flags = $_op->getAttr("omp.target")) - return ::llvm::dyn_cast_or_null(flags); - return nullptr; - }]>, - InterfaceMethod< - /*description=*/[{ - Set the attribute target on the current module with the - specified string arguments - name of cpu and corresponding features. - }], - /*retTy=*/"void", - /*methodName=*/"setTarget", - (ins "llvm::StringRef":$targetCPU, - "llvm::StringRef":$targetFeatures), [{}], [{ - if (targetCPU.empty()) - return; - $_op->setAttr(("omp." + mlir::omp::TargetAttr::getMnemonic()).str(), - mlir::omp::TargetAttr::get($_op->getContext(), - targetCPU.str(), - targetFeatures.str())); - }]>, InterfaceMethod< /*description=*/[{ Set a StringAttr on the current module containing the host IR file path. This diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 23e101f1e4527..17ce14fe642be 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2336,6 +2336,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, if (!targetOpSupported(opInst)) return failure(); + auto parentFn = opInst.getParentOfType(); auto targetOp = cast(opInst); auto &targetRegion = targetOp.getRegion(); DataLayout dl = DataLayout(opInst.getParentOfType()); @@ -2345,6 +2346,22 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) -> InsertPointTy { + // Forward target-cpu and target-features function attributes from the + // original function to the new outlined function. + llvm::Function *llvmParentFn = + moduleTranslation.lookupFunction(parentFn.getName()); + llvm::Function *llvmOutlinedFn = codeGenIP.getBlock()->getParent(); + assert(llvmParentFn && llvmOutlinedFn && + "Both parent and outlined functions must exist at this point"); + + if (auto attr = llvmParentFn->getFnAttribute("target-cpu"); + attr.isStringAttribute()) + llvmOutlinedFn->addFnAttr(attr); + + if (auto attr = llvmParentFn->getFnAttribute("target-features"); + attr.isStringAttribute()) + llvmOutlinedFn->addFnAttr(attr); + builder.restoreIP(codeGenIP); unsigned argIndex = 0; for (auto &mapOp : mapOperands) { @@ -2363,7 +2380,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - StringRef parentName = opInst.getParentOfType().getName(); + StringRef parentName = parentFn.getName(); llvm::TargetRegionEntryInfo entryInfo; diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index 1fccb441f4d59..6c148944e034c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -3,7 +3,7 @@ // The aim of the test is to check the LLVM IR codegen for the device // for omp target parallel construct -module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true, omp.target = #omp.target} { +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %0 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir new file mode 100644 index 0000000000000..fddb799142820 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir @@ -0,0 +1,23 @@ +// Test that the target_features and target_cpu llvm.func attributes are +// forwarded to outlined target region functions. + +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = false} { + llvm.func @omp_target_region() attributes { + target_cpu = "x86-64", + target_features = #llvm.target_features<["+mmx", "+sse"]> + } { + omp.target { + omp.terminator + } + llvm.return + } +} + +// CHECK: define void @omp_target_region() #[[ATTRS:.*]] { +// CHECK: define internal void @__omp_offloading_{{.*}}_omp_target_region_{{.*}}() #[[ATTRS]] { + +// CHECK: attributes #[[ATTRS]] = { +// CHECK-SAME: "target-cpu"="x86-64" +// CHECK-SAME: "target-features"="+mmx,+sse"