diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8eb8a13686dd3..02b333e9ccd56 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -679,6 +679,15 @@ void OpenMPIRBuilder::finalize(Function *Fn) { Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); + // Forward target-cpu, target-features attributes to the outlined function. + auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu"); + if (TargetCpuAttr.isStringAttribute()) + OutlinedFn->addFnAttr(TargetCpuAttr); + + auto TargetFeaturesAttr = OuterFn->getFnAttribute("target-features"); + if (TargetFeaturesAttr.isStringAttribute()) + OutlinedFn->addFnAttr(TargetFeaturesAttr); + LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); assert(OutlinedFn->getReturnType()->isVoidTy() && diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index 43d0934d3a931..8ab50f05f0716 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -4,7 +4,10 @@ // for nested omp do loop inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr ){ + llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes { + target_cpu = "gfx90a", + target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]> + } { omp.parallel { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 @@ -29,8 +32,16 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: define internal void @[[PARALLEL_FUNC]] // CHECK-SAME: (ptr noalias noundef %[[TID_ADDR:.*]], ptr noalias noundef %[[ZERO_ADDR:.*]], // CHECK-SAME: ptr %[[ARG_PTR:.*]]) +// CHECK-SAME: #[[ATTRS1:[0-9]+]] // CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB]] to ptr), // CHECK-SAME: ptr @[[LOOP_BODY_FUNC:.*]], ptr %[[LOO_BODY_FUNC_ARG:.*]], i32 10, // CHECK-SAME: i32 %[[THREAD_NUM:.*]], i32 0) -// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) { +// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) #[[ATTRS2:[0-9]+]] { + +// CHECK: attributes #[[ATTRS2]] = { +// CHECK-SAME: "target-cpu"="gfx90a" +// CHECK-SAME: "target-features"="+gfx9-insts,+wavefrontsize64" +// CHECK: attributes #[[ATTRS1]] = { +// CHECK-SAME: "target-cpu"="gfx90a" +// CHECK-SAME: "target-features"="+gfx9-insts,+wavefrontsize64" diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 29baa84e7e19d..036367b262f07 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -179,6 +179,27 @@ llvm.func @test_omp_parallel_if_1(%arg0: i32) -> () { // ----- +// CHECK-LABEL: define void @test_omp_parallel_attrs() +llvm.func @test_omp_parallel_attrs() -> () attributes { + target_cpu = "x86-64", + target_features = #llvm.target_features<["+mmx", "+sse"]> +} { + // CHECK: call void{{.*}}@__kmpc_fork_call{{.*}}@[[OMP_OUTLINED_FN:.*]]) + omp.parallel { + omp.barrier + omp.terminator + } + + llvm.return +} + +// CHECK: define {{.*}} @[[OMP_OUTLINED_FN]]{{.*}} #[[ATTRS:[0-9]+]] +// CHECK: attributes #[[ATTRS]] = { +// CHECK-SAME: "target-cpu"="x86-64" +// CHECK-SAME: "target-features"="+mmx,+sse" + +// ----- + // CHECK-LABEL: define void @test_omp_parallel_3() llvm.func @test_omp_parallel_3() -> () { // CHECK: [[OMP_THREAD_3_1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) @@ -2238,6 +2259,28 @@ llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // ----- +// CHECK-LABEL: define void @omp_task_attrs() +llvm.func @omp_task_attrs() -> () attributes { + target_cpu = "x86-64", + target_features = #llvm.target_features<["+mmx", "+sse"]> +} { + // CHECK: %[[task_data:.*]] = call {{.*}}@__kmpc_omp_task_alloc{{.*}}@[[outlined_fn:.*]]) + // CHECK: call {{.*}}@__kmpc_omp_task( + // CHECK-SAME: ptr %[[task_data]] + omp.task { + omp.terminator + } + + llvm.return +} + +// CHECK: define {{.*}} @[[outlined_fn]]{{.*}} #[[attrs:[0-9]+]] +// CHECK: attributes #[[attrs]] = { +// CHECK-SAME: "target-cpu"="x86-64" +// CHECK-SAME: "target-features"="+mmx,+sse" + +// ----- + // CHECK-LABEL: define void @omp_task_with_deps // CHECK-SAME: (ptr %[[zaddr:.+]]) // CHECK: %[[dep_arr_addr:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 diff --git a/mlir/test/Target/LLVMIR/openmp-teams.mlir b/mlir/test/Target/LLVMIR/openmp-teams.mlir index a7e579d9db492..4690b51122beb 100644 --- a/mlir/test/Target/LLVMIR/openmp-teams.mlir +++ b/mlir/test/Target/LLVMIR/openmp-teams.mlir @@ -19,6 +19,31 @@ llvm.func @omp_teams_simple() { // ----- +llvm.func @foo() + +// CHECK-LABEL: @omp_teams_func_attrs +// CHECK: call void {{.*}} @__kmpc_fork_teams(ptr @{{.+}}, i32 0, ptr @[[OUTLINED_FN:.+]]) +llvm.func @omp_teams_func_attrs() attributes { + target_cpu = "x86-64", + target_features = #llvm.target_features<["+mmx", "+sse"]> +} { + omp.teams { + llvm.call @foo() : () -> () + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[OUTLINED_FN]](ptr {{.+}}, ptr {{.+}}) +// CHECK-SAME: #[[ATTR:[0-9]+]] +// CHECK: call void @foo() + +// CHECK: attributes #[[ATTR]] = { +// CHECK-SAME: "target-cpu"="x86-64" +// CHECK-SAME: "target-features"="+mmx,+sse" + +// ----- + llvm.func @foo(i32) -> () // CHECK-LABEL: @omp_teams_shared_simple