-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CSSPGO] Fix the issue of missing callee profile matches #85715
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Lei Wang (wlei-llvm) ChangesTwo fixes related to the callee/inlinee profile:
More context: before we run matchings for all imported functions even checksums are matched, however, after we fix 1), we got a regression, it's likely due to the matching is not no-op for checksum matched function, so we want to make it consistent to only run matching for checksum mismatched (imported)functions. Since the metadata(pseudo_probe_desc) are dropped for imported function, we leverage the function attribute mechanism and add a new function attribute( Full diff: https://github.com/llvm/llvm-project/pull/85715.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index ffc26f1a0a972d..877ec56cb26fe8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -438,6 +438,7 @@ class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
+ const ThinOrFullLTOPhase LTOPhase;
SampleProfileMap FlattenedProfiles;
// For each function, the matcher generates a map, of which each entry is a
// mapping from the source location of current build to the source location in
@@ -489,8 +490,9 @@ class SampleProfileMatcher {
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
- const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager){};
+ const PseudoProbeManager *ProbeManager,
+ ThinOrFullLTOPhase LTOPhase)
+ : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
void runOnModule();
void clearMatchingData() {
// Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
@@ -506,7 +508,7 @@ class SampleProfileMatcher {
return &It->second;
return nullptr;
}
- void runOnFunction(const Function &F);
+ void runOnFunction(Function &F);
void findIRAnchors(const Function &F,
std::map<LineLocation, StringRef> &IRAnchors);
void findProfileAnchors(
@@ -2177,8 +2179,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (ReportProfileStaleness || PersistProfileStaleness ||
SalvageStaleProfile) {
- MatchingManager =
- std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
+ MatchingManager = std::make_unique<SampleProfileMatcher>(
+ M, *Reader, ProbeManager.get(), LTOPhase);
}
return true;
@@ -2383,7 +2385,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
}
}
-void SampleProfileMatcher::runOnFunction(const Function &F) {
+void SampleProfileMatcher::runOnFunction(Function &F) {
// We need to use flattened function samples for matching.
// Unlike IR, which includes all callsites from the source code, the callsites
// in profile only show up when they are hit by samples, i,e. the profile
@@ -2410,17 +2412,26 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
// Run profile matching for checksum mismatched profile, currently only
// support for pseudo-probe.
- if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened)) {
- // The matching result will be saved to IRToProfileLocationMap, create a new
- // map for each function.
- auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
- runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
- IRToProfileLocationMap);
- // Find and update callsite match states after matching.
- if (ReportProfileStaleness || PersistProfileStaleness)
- recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
- &IRToProfileLocationMap);
+ if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased) {
+ const auto *Desc = ProbeManager->getDesc(F);
+ // For imported functions, the checksum metadata(pseudo_probe_desc) are
+ // dropped, so we leverage function attribute(profile-checksum-mismatch) to
+ // transfer the info: add the attribute during pre-link phase and check it
+ // during post-link phase.
+ if ((Desc && ProbeManager->profileIsHashMismatched(*Desc, *FSFlattened)) ||
+ F.hasFnAttribute("profile-checksum-mismatch")) {
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
+ F.addFnAttr("profile-checksum-mismatch");
+ // The matching result will be saved to IRToProfileLocationMap, create a
+ // new map for each function.
+ auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
+ runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
+ IRToProfileLocationMap);
+ // Find and update callsite match states after matching.
+ if (ReportProfileStaleness || PersistProfileStaleness)
+ recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
+ &IRToProfileLocationMap);
+ }
}
}
@@ -2689,8 +2700,9 @@ void SampleProfileMatcher::distributeIRToProfileLocationMap(
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
}
- for (auto &Inlinees : FS.getCallsiteSamples()) {
- for (auto FS : Inlinees.second) {
+ for (auto &Callees :
+ const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
+ for (auto &FS : Callees.second) {
distributeIRToProfileLocationMap(FS.second);
}
}
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof
new file mode 100644
index 00000000000000..ff8456486ca610
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof
@@ -0,0 +1,13 @@
+main:252:0
+ 1: 0
+ 2: 50
+ 5: 50
+ 7: bar:102
+ 1: 51
+ 2: baz:51
+ 1: 51
+ !CFGChecksum: 4294967295
+ !Attributes: 3
+ !CFGChecksum: 281479271677951
+ !Attributes: 2
+ !CFGChecksum: 281582081721716
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll
new file mode 100644
index 00000000000000..df56b55dcdf3c0
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll
@@ -0,0 +1,67 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof -pass-remarks=inline -S -o %t 2>&1 | FileCheck %s --check-prefix=INLINE
+; RUN: FileCheck %s < %t
+; RUN: FileCheck %s < %t --check-prefix=MERGE
+
+
+; Make sure bar is inlined into main for attr merging verification.
+; INLINE: 'bar' inlined into 'main'
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @baz() #0 {
+entry:
+ ret i32 0
+}
+
+define i32 @bar() #0 !dbg !11 {
+; CHECK: define {{.*}} @bar() {{.*}} #[[#BAR_ATTR:]] !
+entry:
+ %call = call i32 @baz()
+ ret i32 0
+}
+
+define i32 @main() #0 {
+; MERGE: define {{.*}} @main() {{.*}} #[[#MAIN_ATTR:]] !
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond, %entry
+ %call = call i32 @bar(), !dbg !14
+ br label %for.cond
+}
+
+; CHECK: attributes #[[#BAR_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
+
+; Verify the attribute is not merged into the caller.
+; MERGE-NOT: attributes #[[#MAIN_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7}
+!llvm.pseudo_probe_desc = !{!8, !9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "0df0c950a93a603a7d13f0a9d4623642")
+!2 = !{!3}
+!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
+!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i64 7546896869197086323, i64 4294967295, !"baz"}
+!9 = !{i64 -2012135647395072713, i64 281530612780802, !"bar"}
+!10 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
+!11 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
+!12 = distinct !DISubroutineType(types: !13)
+!13 = !{}
+!14 = !DILocation(line: 15, column: 10, scope: !15)
+!15 = !DILexicalBlockFile(scope: !16, file: !1, discriminator: 186646591)
+!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 14, column: 40)
+!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 14, column: 3)
+!18 = distinct !DILexicalBlock(scope: !19, file: !1, line: 14, column: 3)
+!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 12, type: !20, scopeLine: 13, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
+!20 = !DISubroutineType(types: !13)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll
new file mode 100644
index 00000000000000..19bc26f131d7b3
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll
@@ -0,0 +1,59 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s
+
+
+; CHECK: Run stale profile matching for bar
+; CHECK: Callsite with callee:baz is matched from 4 to 2
+; CHECK: 'baz' inlined into 'main' to match profiling context with (cost=always): preinliner at callsite bar:3:8.4 @ main:3:10.7
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+ %1 = call i32 @bar(), !dbg !13
+ ret i32 0
+}
+
+define available_externally i32 @bar() #1 !dbg !21 {
+ %1 = call i32 @baz(), !dbg !23
+ ret i32 0
+}
+
+define available_externally i32 @baz() #0 !dbg !25 {
+ ret i32 0
+}
+
+attributes #0 = { "use-sample-profile" }
+attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0, !7, !9}
+!llvm.module.flags = !{!11}
+!llvm.pseudo_probe_desc = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "7220f1a2d70ff869f1a6ab7958e3c393")
+!2 = !{!3}
+!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
+!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!8 = !DIFile(filename: "test1.v1.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "76696bd6bfe16a9f227fe03cfdb6a82c")
+!9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532")
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
+!13 = !DILocation(line: 8, column: 10, scope: !14)
+!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40)
+!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 7, column: 3)
+!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 7, column: 3)
+!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !19, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
+!19 = distinct !DISubroutineType(types: !20)
+!20 = !{}
+!21 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 3, type: !22, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !20)
+!22 = !DISubroutineType(types: !20)
+!23 = !DILocation(line: 6, column: 8, scope: !24)
+!24 = !DILexicalBlockFile(scope: !21, file: !8, discriminator: 186646567)
+!25 = distinct !DISubprogram(name: "baz", scope: !10, file: !10, line: 1, type: !22, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !20)
|
The patch is good in general, but I think this problem is still worth fixing separately? |
// transfer the info: add the attribute during pre-link phase and check it | ||
// during post-link phase. | ||
if ((Desc && ProbeManager->profileIsHashMismatched(*Desc, *FSFlattened)) || | ||
F.hasFnAttribute("profile-checksum-mismatch")) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the attribute check also be hidden behind profileIsValid
?
We call profileIsValid
from emitAnnotations
too for statistics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the attribute check also be hidden behind
profileIsValid
?We call
profileIsValid
fromemitAnnotations
too for statistics.
Good catch! Done in the new commit:
- rewrite
profileIsValid
to include the attribute check. - fix the wrong statistics(NumMatchedProfile, NumMismatchedProfile)
Thanks!
Yeah, I have some pending patches to mitigate this issue(use a diff alg), TBH, I'm not sure if that would completely solve this, there are some challenges, such as lack of the anchor in the profile, misleading indirect call, incorrect profile merging.. Or I'm thinking maybe we can add a defensive check before the matching: if all/most of(tuned by a flag) the profile anchors are matched to the IR, even the checksum is mismatched, we still don't run the stale matching. we can easily get those info from the Any thoughts? |
// The desc for import function is unavailable. Check the function attribute | ||
// for mismatch. | ||
if (F.hasFnAttribute("profile-checksum-mismatch")) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we check attribute only when descriptor isn't available? that's what the comment says..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And when both desc and attribute are available, maybe assert they are consistent?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, done.
I meant the problem that applying stale profile matching to a perfectly matched profile is not a no-op. And we should improve the algorithm to try to make it a no-op, hoping that it will help improve things overall even for mismatched cases.. |
I see, makes sense to me, will try to improve this. |
assert((Desc && | ||
ProbeManager->profileIsHashMismatched(*Desc, *FSFlattened)) && | ||
"The profile-checksum-mismatch attribute is set but there is no " | ||
"pseudo-probe descriptor or the checksum is matched"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assertion is a bit confusing -- it's asserting on something we just checked above from profileIsValid
?
Originally what I meant was that if both attribute and desc exists, we should make sure they agree. That can only happen in post-link.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, thanks! Note that it's needed to fix an old test(missing the attr).
return false; | ||
} | ||
return true; | ||
assert(!(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink && Desc && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: i found it easier to parse/read if we change it to:
skip for pre-link or if we don't have desc, otherwise for post-link with desc, we expect attribute to match desc
assert(LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc || profileIsHashMismatched(*Desc, Samples) == F.hasFnAttribute("profile-checksum-mismatch") && "...")
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: i found it easier to parse/read if we change it to:
skip for pre-link or if we don't have desc, otherwise for post-link with desc, we expect attribute to match desc
assert(LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc || profileIsHashMismatched(*Desc, Samples) == F.hasFnAttribute("profile-checksum-mismatch") && "...")
Good point, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Two fixes related to the callee/inlinee profile:
More context: before we run matchings for all imported functions even checksums are matched, however, after we fix 1), we got a regression, it's likely due to the matching is not no-op for checksum matched function, so we want to make it consistent to only run matching for checksum mismatched (imported)functions. Since the metadata(pseudo_probe_desc) are dropped for imported function, we leverage the function attribute mechanism and add a new function attribute(
profile-checksum-mismatch
) to transfer the info from pre-link to post-link.