Skip to content

Commit

Permalink
[OPENMP][NVPTX]Mark more functions as always_inline for better
Browse files Browse the repository at this point in the history
performance.

Internally generated functions must be marked as always_inlines in most
cases. Patch marks some extra reduction function + outlined parallel
functions as always_inline for better performance, but only if the
optimization is requested.

llvm-svn: 361269
  • Loading branch information
alexey-bataev committed May 21, 2019
1 parent 78c3f58 commit 8c5555c
Show file tree
Hide file tree
Showing 23 changed files with 305 additions and 203 deletions.
16 changes: 10 additions & 6 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Expand Up @@ -1274,9 +1274,11 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
Name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
if (CGM.getLangOpts().Optimize) {
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
}
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
Expand Down Expand Up @@ -4671,9 +4673,11 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
&CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
TaskPrivatesMapFnInfo);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
if (CGM.getLangOpts().Optimize) {
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
}
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
TaskPrivatesMapFnInfo, Args, Loc, Loc);
Expand Down
19 changes: 16 additions & 3 deletions clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
Expand Up @@ -1929,6 +1929,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
if (CGM.getLangOpts().Optimize) {
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
}
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
Expand Down Expand Up @@ -2045,9 +2050,11 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
CodeGen.setAction(Action);
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
if (CGM.getLangOpts().Optimize) {
OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
}

return OutlinedFun;
}
Expand Down Expand Up @@ -3422,6 +3429,12 @@ static llvm::Function *emitShuffleAndReduceFunction(
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setDoesNotRecurse();
if (CGM.getLangOpts().Optimize) {
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
}

CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);

Expand Down
2 changes: 1 addition & 1 deletion clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s
// expected-no-diagnostics

int foo(int &a) { return a; }
Expand Down

0 comments on commit 8c5555c

Please sign in to comment.