Skip to content

Commit

Permalink
[NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D34312

llvm-svn: 306484
  • Loading branch information
Easwaran Raman committed Jun 27, 2017
1 parent 0a5b03b commit c5fa635
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 44 deletions.
31 changes: 30 additions & 1 deletion llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));

static cl::opt<int> ColdCallSiteRelFreq(
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));

namespace {

class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
Expand Down Expand Up @@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Return true if size growth is allowed when inlining the callee at CS.
bool allowSizeGrowth(CallSite CS);

/// Return true if \p CS is a cold callsite.
bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);

// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);

Expand Down Expand Up @@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
return true;
}

bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI->hasProfileSummary())
return PSI->isColdCallSite(CS, CallerBFI);
if (!CallerBFI)
return false;

// In the absence of global profile summary, determine if the callsite is cold
// relative to caller's entry. We could potentially cache the computation of
// scaled entry frequency, but the added complexity is not worth it unless
// this scaling shows up high in the profiles.
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
auto CallSiteBB = CS.getInstruction()->getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}

void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
Expand Down Expand Up @@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (PSI->isHotCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Hot callsite.\n");
Threshold = Params.HotCallSiteThreshold.getValue();
} else if (PSI->isColdCallSite(CS, CallerBFI)) {
} else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s

; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.

define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
call void @extern()
ret i32 %x3
}

define i32 @caller(i32 %n) !prof !22 {
; CHECK-LABEL: @caller(
%cond = icmp sle i32 %n, 100
br i1 %cond, label %cond_true, label %cond_false, !prof !0

cond_true:
; CHECK-LABEL: cond_true:
; CHECK-NOT: call i32 @callee1
; CHECK: ret i32 %x3.i
%i = call i32 @callee1(i32 %n)
ret i32 %i
cond_false:
; CHECK-LABEL: cond_false:
; CHECK: call i32 @callee1
; CHECK: ret i32 %j
%j = call i32 @callee1(i32 %n)
ret i32 %j
}
declare void @extern()

!0 = !{!"branch_weights", i32 200, i32 1}

!llvm.module.flags = !{!1}
!21 = !{!"function_entry_count", i64 200}
!22 = !{!"function_entry_count", i64 200}

!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 1000, i32 1}
!13 = !{i32 999000, i64 1000, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
79 changes: 36 additions & 43 deletions llvm/test/Transforms/Inline/inline-cold-callsite.ll
Original file line number Diff line number Diff line change
@@ -1,54 +1,47 @@

; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s

; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.

define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
define void @callee() {
call void @extern()
call void @extern()
ret i32 %x3
ret void
}

define i32 @caller(i32 %n) !prof !22 {
; CHECK-LABEL: @caller(
%cond = icmp sle i32 %n, 100
br i1 %cond, label %cond_true, label %cond_false, !prof !0

cond_true:
; CHECK-LABEL: cond_true:
; CHECK-NOT: call i32 @callee1
; CHECK: ret i32 %x3.i
%i = call i32 @callee1(i32 %n)
ret i32 %i
cond_false:
; CHECK-LABEL: cond_false:
; CHECK: call i32 @callee1
; CHECK: ret i32 %j
%j = call i32 @callee1(i32 %n)
ret i32 %j
}
declare void @extern()
declare i1 @ext(i32)

; CHECK-LABEL: caller
define i32 @caller(i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.cond.cleanup

for.cond.cleanup:
ret i32 0

for.body:
%i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
; CHECK: %call = tail call
%call = tail call zeroext i1 @ext(i32 %i.05)
; CHECK-NOT: call void @callee
; CHECK-NEXT: call void @extern
call void @callee()
br i1 %call, label %cold, label %for.inc, !prof !0

cold:
; CHECK: call void @callee
call void @callee()
br label %for.inc

for.inc:
%inc = add nuw nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}


!0 = !{!"branch_weights", i32 200, i32 1}

!llvm.module.flags = !{!1}
!21 = !{!"function_entry_count", i64 200}
!22 = !{!"function_entry_count", i64 200}

!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 1000, i32 1}
!13 = !{i32 999000, i64 1000, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!0 = !{!"branch_weights", i32 1, i32 2000}

0 comments on commit c5fa635

Please sign in to comment.