Skip to content

Commit

Permalink
[SampleFDO] handle ProfileSampleAccurate when initializing function e…
Browse files Browse the repository at this point in the history
…ntry count

ProfileSampleAccurate is used to indicate the profile has exact match to the
code to be optimized.

Previously ProfileSampleAccurate is handled in ProfileSummaryInfo::isColdCallSite
and ProfileSummaryInfo::isColdBlock. A better solution is to initialize function
entry count to 0 when ProfileSampleAccurate is true, so we don't have to handle
ProfileSampleAccurate in multiple places.

Differential Revision: https://reviews.llvm.org/D55660

llvm-svn: 349088
  • Loading branch information
wmi-11 committed Dec 13, 2018
1 parent 7b05666 commit 66c6c5a
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 98 deletions.
20 changes: 2 additions & 18 deletions llvm/lib/Analysis/ProfileSummaryInfo.cpp
Expand Up @@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));

static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"callsite as cold. Otherwise, treat un-sampled callsites as if "
"we have no profile."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
Expand Down Expand Up @@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BF
bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
BlockFrequencyInfo *BFI) {
auto Count = BFI->getBlockProfileCount(BB);
if (Count)
return isColdCount(*Count);
if (!hasSampleProfile())
return false;

const Function *F = BB->getParent();
return ProfileSampleAccurate ||
(F && F->hasFnAttribute("profile-sample-accurate"));
return Count && isColdCount(*Count);
}

bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
Expand All @@ -285,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,

// In SamplePGO, if the caller has been sampled, and there is no profile
// annotated on the callsite, we consider the callsite as cold.
// If there is no profile for the caller, and we know the profile is
// accurate, we consider the callsite as cold.
return (hasSampleProfile() &&
(CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
return hasSampleProfile() && CS.getCaller()->hasProfileData();
}

INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
Expand Down
22 changes: 18 additions & 4 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Expand Up @@ -123,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));

static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));

namespace {

using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
Expand Down Expand Up @@ -1604,10 +1610,18 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}

bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
// Initialize the entry count to -1, which will be treated conservatively
// by getEntryCount as the same as unknown (None). If we have samples this
// will be overwritten in emitAnnotations.
F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
// If ProfileSampleAccurate is true or F has profile-sample-accurate
// attribute, initialize the entry count to 0 so callsites or functions
// unsampled will be treated as cold.
uint64_t initialEntryCount =
(ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
? 0
: -1;
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
Expand Down
29 changes: 0 additions & 29 deletions llvm/test/Transforms/CodeGenPrepare/section-samplepgo.ll
@@ -1,19 +1,16 @@
; RUN: opt < %s -codegenprepare -S | FileCheck %s
; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE

target triple = "x86_64-pc-linux-gnu"

; This tests that hot/cold functions get correct section prefix assigned

; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
define void @hot_func() !prof !15 {
ret void
}

; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
; The sum of 2 callsites are hot
define void @hot_call_func() !prof !16 {
call void @hot_func(), !prof !17
Expand All @@ -22,7 +19,6 @@ define void @hot_call_func() !prof !16 {
}

; CHECK-NOT: normal_func{{.*}}!section_prefix
; ACCURATE-NOT: normal_func{{.*}}!section_prefix
; The sum of all callsites are neither hot or cold
define void @normal_func() !prof !16 {
call void @hot_func(), !prof !17
Expand All @@ -32,36 +28,12 @@ define void @normal_func() !prof !16 {
}

; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The entry and the callsite are both cold
define void @cold_func() !prof !16 {
call void @hot_func(), !prof !18
ret void
}


; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
; The function not appearing in profile is neither hot nor cold
;
; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when -profile-sample-accurate
; is on
define void @foo_not_in_profile() !prof !19 {
call void @hot_func()
ret void
}

; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when the func has
; profile-sample-accurate attribute
define void @bar_not_in_profile() #0 !prof !19 {
call void @hot_func()
ret void
}

attributes #0 = { "profile-sample-accurate" }

; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}
Expand All @@ -83,4 +55,3 @@ attributes #0 = { "profile-sample-accurate" }
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
!19 = !{!"function_entry_count", i64 -1}
47 changes: 0 additions & 47 deletions llvm/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll

This file was deleted.

@@ -0,0 +1,31 @@
; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
; heuristics should be honored if the caller has no profile.

; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE

declare void @extern()
define void @callee() {
call void @extern()
ret void
}

define void @caller(i32 %y1) {
; CHECK-LABEL: @caller
; CHECK-NOT: call void @callee
; ACCURATE-LABEL: @caller
; ACCURATE: call void @callee
call void @callee()
ret void
}

define void @caller_accurate(i32 %y1) #0 {
; CHECK-LABEL: @caller_accurate
; CHECK: call void @callee
; ACCURATE-LABEL: @caller_accurate
; ACCURATE: call void @callee
call void @callee()
ret void
}

attributes #0 = { "profile-sample-accurate" }
52 changes: 52 additions & 0 deletions llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
@@ -0,0 +1,52 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE

target triple = "x86_64-pc-linux-gnu"

; The test checks that function without profile gets unlikely section prefix
; if -profile-sample-accurate is specified or the function has the
; profile-sample-accurate attribute.

declare void @hot_func()

; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when -profile-sample-accurate
; is on.
define void @foo_not_in_profile() {
call void @hot_func()
ret void
}

; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
; The function not appearing in profile is cold when the func has
; profile-sample-accurate attribute.
define void @bar_not_in_profile() #0 {
call void @hot_func()
ret void
}

attributes #0 = { "profile-sample-accurate" }

; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"SampleProfile"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 1000}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}

0 comments on commit 66c6c5a

Please sign in to comment.