Skip to content

Commit

Permalink
[OpenMP][FIX] Enforce a function boundary for a new data environment
Browse files Browse the repository at this point in the history
Whenever we enter a new OpenMP data environment we want to enter a
function to simplify reasoning. Later we probably want to remove the
entire specialization wrt. the if clause and pass the result to the
runtime, for now this should fix PR48686.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D94315
  • Loading branch information
jdoerfert committed Jan 26, 2021
1 parent f19849a commit bd75628
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
8 changes: 8 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Expand Up @@ -2098,6 +2098,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());

// Ensure we do not inline the function. This is trivially true for the ones
// passed to __kmpc_fork_call but the ones calles in serialized regions
// could be inlined. This is not a perfect but it is closer to the invariant
// we want, namely, every data environment starts with a new function.
// TODO: We should pass the if condition to the runtime function and do the
// handling there. Much cleaner code.
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);

// __kmpc_end_serialized_parallel(&Loc, GTid);
Expand Down
23 changes: 8 additions & 15 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Expand Up @@ -1576,11 +1576,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
if (CGM.getLangOpts().Optimize) {
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
}
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
Expand Down Expand Up @@ -1698,11 +1693,6 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
CodeGen.setAction(Action);
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
if (CGM.getLangOpts().Optimize) {
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
}

return OutlinedFun;
}
Expand Down Expand Up @@ -2102,6 +2092,14 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);

// Ensure we do not inline the function. This is trivially true for the ones
// passed to __kmpc_fork_call but the ones calles in serialized regions
// could be inlined. This is not a perfect but it is closer to the invariant
// we want, namely, every data environment starts with a new function.
// TODO: We should pass the if condition to the runtime function and do the
// handling there. Much cleaner code.
cast<llvm::Function>(OutlinedFn)->addFnAttr(llvm::Attribute::NoInline);

Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
Expand Down Expand Up @@ -3134,11 +3132,6 @@ static llvm::Function *emitShuffleAndReduceFunction(
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setDoesNotRecurse();
if (CGM.getLangOpts().Optimize) {
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
}

CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
Expand Down
13 changes: 13 additions & 0 deletions clang/test/OpenMP/parallel_if_codegen.cpp
Expand Up @@ -7,6 +7,7 @@
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}

// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -disable-O0-optnone -o - | FileCheck %s --check-prefix=WOPT
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
Expand Down Expand Up @@ -96,14 +97,20 @@ int main() {
return tmain(Arg);
}

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN4]]
// CHECK: call {{.*}}void @{{.+}}fn4
// CHECK: ret void

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN5]]
// CHECK: call {{.*}}void @{{.+}}fn5
// CHECK: ret void

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN6]]
// CHECK: call {{.*}}void @{{.+}}fn6
// CHECK: ret void
Expand All @@ -129,14 +136,20 @@ int main() {
// CHECK: br label %[[OMP_END]]
// CHECK: [[OMP_END]]

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN1]]
// CHECK: call {{.*}}void @{{.+}}fn1
// CHECK: ret void

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN2]]
// CHECK: call {{.*}}void @{{.+}}fn2
// CHECK: ret void

// WOPT: noinline
// WOPT-NOT: optnone
// CHECK: define internal {{.*}}void [[CAP_FN3]]
// CHECK: call {{.*}}void @{{.+}}fn3
// CHECK: ret void
Expand Down

0 comments on commit bd75628

Please sign in to comment.