Skip to content

Commit

Permalink
[NewPM][Inliner] Make inlined calls to functions in same SCC as calle…
Browse files Browse the repository at this point in the history
…e exponentially expensive

Introduce a new attribute "function-inline-cost-multiplier" which
multiplies the inline cost of a call site (or all calls to a callee) by
the multiplier.

When processing the list of calls created by inlining, check each call
to see if the new call's callee is in the same SCC as the original
callee. If so, set the "function-inline-cost-multiplier" attribute of
the new call site to double the original call site's attribute value.
This does not happen when the original call site is intra-SCC.

This is an alternative to D120584, which marks the call sites as
noinline.

Hopefully fixes PR45253.

Reviewed By: davidxl

Differential Revision: https://reviews.llvm.org/D121084
  • Loading branch information
aeubanks committed Mar 8, 2022
1 parent b204ce0 commit 53e5e58
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 8 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/InlineCost.h
Expand Up @@ -55,6 +55,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
/// Do not inline dynamic allocas that have been constant propagated to be
/// static allocas above this amount in bytes.
const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;

const char FunctionInlineCostMultiplierAttributeName[] =
"function-inline-cost-multiplier";
} // namespace InlineConstants

// The cost-benefit pair computed by cost-benefit analysis.
Expand Down Expand Up @@ -220,6 +223,8 @@ struct InlineParams {
Optional<bool> AllowRecursiveCall = false;
};

Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);

/// Generate the parameters to tune the inline cost analysis based only on the
/// commandline options.
InlineParams getInlineParams();
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Analysis/InlineCost.cpp
Expand Up @@ -133,8 +133,6 @@ static cl::opt<bool> DisableGEPConstOperand(
cl::desc("Disables evaluation of GetElementPtr with constant operands"));

namespace {
class InlineCostCallAnalyzer;

/// This function behaves more like CallBase::hasFnAttr: when it looks for the
/// requested attribute, it check both the call instruction and the called
/// function (if it's available and operand bundles don't prohibit that).
Expand All @@ -151,14 +149,20 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {

return {};
}
} // namespace

namespace llvm {
Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
Attribute Attr = getFnAttr(CB, AttrKind);
int AttrValue;
if (Attr.getValueAsString().getAsInteger(10, AttrValue))
return None;
return AttrValue;
}
} // namespace llvm

namespace {
class InlineCostCallAnalyzer;

// This struct is used to store information about inline cost of a
// particular instruction
Expand Down Expand Up @@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
Cost = *AttrCost;

if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
CandidateCall,
InlineConstants::FunctionInlineCostMultiplierAttributeName))
Cost *= *AttrCostMult;

if (Optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
Expand Down
45 changes: 41 additions & 4 deletions llvm/lib/Transforms/IPO/Inliner.cpp
Expand Up @@ -23,6 +23,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
Expand Down Expand Up @@ -93,6 +94,18 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);

static cl::opt<int> IntraSCCCostMultiplier(
"intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
cl::desc(
"Cost multiplier to multiply onto inlined call sites where the "
"new call was previously an intra-SCC call (not relevant when the "
"original call was already intra-SCC). This can accumulate over "
"multiple inlinings (e.g. if a call site already had a cost "
"multiplier and one of its inlined calls was also subject to "
"this, the inlined call would have the original multiplier "
"multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
"inlining through a child SCC which can cause terrible compile times"));

/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
Expand Down Expand Up @@ -877,8 +890,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
UR.InlinedInternalEdges.count({&N, C})) {
LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
Expand All @@ -898,6 +911,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}

int CBCostMult =
getStringFnAttrAsInt(
*CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
.getValueOr(1);

// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
Expand Down Expand Up @@ -936,9 +954,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
if (NewCallee)
if (!NewCallee->isDeclaration())
if (NewCallee) {
if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
// Continually inlining through an SCC can result in huge compile
// times and bloated code since we arbitrarily stop at some point
// when the inliner decides it's not profitable to inline anymore.
// We attempt to mitigate this by making these calls exponentially
// more expensive.
// This doesn't apply to calls in the same SCC since if we do
// inline through the SCC the function will end up being
// self-recursive which the inliner bails out on, and inlining
// within an SCC is necessary for performance.
if (CalleeSCC != C &&
CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
Attribute NewCBCostMult = Attribute::get(
M.getContext(),
InlineConstants::FunctionInlineCostMultiplierAttributeName,
itostr(CBCostMult * IntraSCCCostMultiplier));
ICB->addFnAttr(NewCBCostMult);
}
}
}
}
}

Expand Down
8 changes: 6 additions & 2 deletions llvm/test/Transforms/Inline/inline-cost-attributes.ll
Expand Up @@ -11,8 +11,9 @@ entry:

define void @fn2() "function-inline-threshold"="41" {
; INLINER-LABEL: Inlining calls in: fn2
; INLINER-NEXT: Function size: 6
; INLINER-NEXT: Function size: 7
; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=963, threshold=123), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call: call void @fn1()
; INLINER-NEXT: Inlining (cost=197, threshold=321), Call: call void @fn1()
Expand All @@ -23,6 +24,8 @@ define void @fn2() "function-inline-threshold"="41" {
; COST-NEXT: call void @extern()
; COST-NEXT: cost delta = 132, threshold delta = 193
; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 132, threshold delta = 193
; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 0
; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 271, threshold delta = 17
Expand All @@ -33,6 +36,7 @@ define void @fn2() "function-inline-threshold"="41" {
entry:
call void @extern()
call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193"
call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193" "function-inline-cost-multiplier"="3"
call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321"
call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197"
call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321"
Expand All @@ -44,7 +48,7 @@ define void @fn3() {
; INLINER-NEXT: Function size: 3
; INLINER-NEXT: Inlining (cost=386, threshold=849), Call: call void @fn1()
; INLINER-NEXT: Size after inlining: 2
; INLINER-NEXT: NOT Inlining (cost=403, threshold=41), Call: call void @fn2()
; INLINER-NEXT: NOT Inlining (cost=535, threshold=41), Call: call void @fn2()

entry:
call void @fn1() "function-inline-cost"="386" "function-inline-threshold"="849"
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Transforms/Inline/mut-rec-scc-2.ll
@@ -0,0 +1,19 @@
; RUN: opt -S -passes='inline' < %s | FileCheck %s

; Make sure we don't mark calls within the same SCC as original function with noinline.
; CHECK-NOT: function-inline-cost-multiplier

define void @samescc1() {
call void @samescc2()
ret void
}

define void @samescc2() {
call void @samescc3()
ret void
}

define void @samescc3() {
call void @samescc1()
ret void
}
75 changes: 75 additions & 0 deletions llvm/test/Transforms/Inline/mut-rec-scc.ll
@@ -0,0 +1,75 @@
; RUN: opt -S -passes='cgscc(inline,instcombine)' < %s | FileCheck %s
; RUN: opt -S -intra-scc-cost-multiplier=3 -passes='cgscc(inline,instcombine)' < %s | FileCheck %s --check-prefix=THREE

; We use call to a dummy function to avoid inlining test1 into test2 or vice
; versa, such that we aren't left with a trivial cycle, as trivial cycles are
; special-cased to never be inlined.
; However, InstCombine will eliminate these calls after inlining, and thus
; make the functions eligible for inlining in their callers.
declare void @dummy() readnone nounwind willreturn

define void @test1() {
; CHECK-LABEL: define void @test1(
; CHECK-NEXT: call void @test2()
; CHECK-NEXT: call void @test2()
; CHECK-NEXT: ret void
;
call void @test2()
call void @test2()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
ret void
}

define void @test2() {
; CHECK-LABEL: define void @test2(
; CHECK-NEXT: call void @test1()
; CHECK-NEXT: call void @test1()
; CHECK-NEXT: ret void
;
call void @test1()
call void @test1()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
ret void
}

; The inlined call sites should have the "function-inline-cost-multiplier" call site attribute.
; This test is a bit fragile in the exact number of inlining that happens based on thresholds.
define void @test3() {
; CHECK-LABEL: define void @test3(
; CHECK-NEXT: call void @test2() #[[COSTMULT:[0-9]+]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: call void @test2() #[[COSTMULT]]
; CHECK-NEXT: ret void
;
call void @test2()
call void @test2()
ret void
}

; CHECK: [[COSTMULT]] = { "function-inline-cost-multiplier"="4" }
; THREE: "function-inline-cost-multiplier"="9"

0 comments on commit 53e5e58

Please sign in to comment.