diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 66814d3952730..bd7496a799c57 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -86,9 +86,12 @@ template <> struct IRTraits { // SampleProfileProber. class PseudoProbeManager { DenseMap GUIDToProbeDescMap; + const ThinOrFullLTOPhase LTOPhase; public: - PseudoProbeManager(const Module &M) { + PseudoProbeManager(const Module &M, + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) + : LTOPhase(LTOPhase) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -126,17 +129,15 @@ class PseudoProbeManager { bool profileIsValid(const Function &F, const FunctionSamples &Samples) const { const auto *Desc = getDesc(F); - if (!Desc) { - LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " - << F.getName() << "\n"); - return false; - } - if (Desc->getFunctionHash() != Samples.getFunctionHash()) { - LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName() - << "\n"); - return false; - } - return true; + assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc || + profileIsHashMismatched(*Desc, Samples) == + F.hasFnAttribute("profile-checksum-mismatch")) && + "In post-link, profile checksum matching state doesn't match " + "function 'profile-checksum-mismatch' attribute."); + // The desc for import function is unavailable. Check the function attribute + // for mismatch. + return (!Desc && !F.hasFnAttribute("profile-checksum-mismatch")) || + (Desc && !profileIsHashMismatched(*Desc, Samples)); } }; diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2cbef8a7ae611..7545a92c114ef 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -453,6 +453,7 @@ class SampleProfileMatcher { Module &M; SampleProfileReader &Reader; const PseudoProbeManager *ProbeManager; + const ThinOrFullLTOPhase LTOPhase; SampleProfileMap FlattenedProfiles; // For each function, the matcher generates a map, of which each entry is a // mapping from the source location of current build to the source location in @@ -504,8 +505,9 @@ class SampleProfileMatcher { public: SampleProfileMatcher(Module &M, SampleProfileReader &Reader, - const PseudoProbeManager *ProbeManager) - : M(M), Reader(Reader), ProbeManager(ProbeManager){}; + const PseudoProbeManager *ProbeManager, + ThinOrFullLTOPhase LTOPhase) + : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){}; void runOnModule(); void clearMatchingData() { // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which @@ -521,7 +523,7 @@ class SampleProfileMatcher { return &It->second; return nullptr; } - void runOnFunction(const Function &F); + void runOnFunction(Function &F); void findIRAnchors(const Function &F, std::map &IRAnchors); void findProfileAnchors( @@ -1911,15 +1913,22 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { bool Changed = false; if (FunctionSamples::ProfileIsProbeBased) { - if (!ProbeManager->profileIsValid(F, *Samples)) { + LLVM_DEBUG({ + if (!ProbeManager->getDesc(F)) + dbgs() << "Probe descriptor missing for Function " << F.getName() + << "\n"; + }); + + if (ProbeManager->profileIsValid(F, *Samples)) { + ++NumMatchedProfile; + } else { + ++NumMismatchedProfile; LLVM_DEBUG( dbgs() << "Profile is invalid due to CFG mismatch for Function " << F.getName() << "\n"); - ++NumMismatchedProfile; if (!SalvageStaleProfile) return false; } - ++NumMatchedProfile; } else { if (getFunctionLoc(F) == 0) return false; @@ -2185,7 +2194,7 @@ bool SampleProfileLoader::doInitialization(Module &M, // Load pseudo probe descriptors for probe-based function samples. if (Reader->profileIsProbeBased()) { - ProbeManager = std::make_unique(M); + ProbeManager = std::make_unique(M, LTOPhase); if (!ProbeManager->moduleIsProbed(M)) { const char *Msg = "Pseudo-probe-based profile requires SampleProfileProbePass"; @@ -2197,8 +2206,8 @@ bool SampleProfileLoader::doInitialization(Module &M, if (ReportProfileStaleness || PersistProfileStaleness || SalvageStaleProfile) { - MatchingManager = - std::make_unique(M, *Reader, ProbeManager.get()); + MatchingManager = std::make_unique( + M, *Reader, ProbeManager.get(), LTOPhase); } return true; @@ -2452,7 +2461,7 @@ void SampleProfileMatcher::runStaleProfileMatching( } } -void SampleProfileMatcher::runOnFunction(const Function &F) { +void SampleProfileMatcher::runOnFunction(Function &F) { // We need to use flattened function samples for matching. // Unlike IR, which includes all callsites from the source code, the callsites // in profile only show up when they are hit by samples, i,e. the profile @@ -2481,8 +2490,16 @@ void SampleProfileMatcher::runOnFunction(const Function &F) { // support for pseudo-probe. if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased && !ProbeManager->profileIsValid(F, *FSFlattened)) { - // The matching result will be saved to IRToProfileLocationMap, create a new - // map for each function. + // For imported functions, the checksum metadata(pseudo_probe_desc) are + // dropped, so we leverage function attribute(profile-checksum-mismatch) to + // transfer the info: add the attribute during pre-link phase and check it + // during post-link phase(see "profileIsValid"). + if (FunctionSamples::ProfileIsProbeBased && + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) + F.addFnAttr("profile-checksum-mismatch"); + + // The matching result will be saved to IRToProfileLocationMap, create a + // new map for each function. auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap); @@ -2758,8 +2775,9 @@ void SampleProfileMatcher::distributeIRToProfileLocationMap( FS.setIRToProfileLocationMap(&(ProfileMappings->second)); } - for (auto &Inlinees : FS.getCallsiteSamples()) { - for (auto FS : Inlinees.second) { + for (auto &Callees : + const_cast(FS.getCallsiteSamples())) { + for (auto &FS : Callees.second) { distributeIRToProfileLocationMap(FS.second); } } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof new file mode 100644 index 0000000000000..76a8fc9d19a85 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof @@ -0,0 +1,16 @@ +main:252:0 + 1: 0 + 2: 50 + 5: 50 + 7: bar:102 + 1: 51 + 2: baz:51 + 1: 51 + !CFGChecksum: 4294967295 + !Attributes: 3 + !CFGChecksum: 281479271677951 + !Attributes: 2 + !CFGChecksum: 281582081721716 +bar:1:1 + 1: 1 + !CFGChecksum: 281479271677951 diff --git a/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll new file mode 100644 index 0000000000000..df56b55dcdf3c --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll @@ -0,0 +1,67 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof -pass-remarks=inline -S -o %t 2>&1 | FileCheck %s --check-prefix=INLINE +; RUN: FileCheck %s < %t +; RUN: FileCheck %s < %t --check-prefix=MERGE + + +; Make sure bar is inlined into main for attr merging verification. +; INLINE: 'bar' inlined into 'main' + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @baz() #0 { +entry: + ret i32 0 +} + +define i32 @bar() #0 !dbg !11 { +; CHECK: define {{.*}} @bar() {{.*}} #[[#BAR_ATTR:]] ! +entry: + %call = call i32 @baz() + ret i32 0 +} + +define i32 @main() #0 { +; MERGE: define {{.*}} @main() {{.*}} #[[#MAIN_ATTR:]] ! +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %call = call i32 @bar(), !dbg !14 + br label %for.cond +} + +; CHECK: attributes #[[#BAR_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}} + +; Verify the attribute is not merged into the caller. +; MERGE-NOT: attributes #[[#MAIN_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7} +!llvm.pseudo_probe_desc = !{!8, !9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "0df0c950a93a603a7d13f0a9d4623642") +!2 = !{!3} +!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression()) +!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true) +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !{i64 7546896869197086323, i64 4294967295, !"baz"} +!9 = !{i64 -2012135647395072713, i64 281530612780802, !"bar"} +!10 = !{i64 -2624081020897602054, i64 281582081721716, !"main"} +!11 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!12 = distinct !DISubroutineType(types: !13) +!13 = !{} +!14 = !DILocation(line: 15, column: 10, scope: !15) +!15 = !DILexicalBlockFile(scope: !16, file: !1, discriminator: 186646591) +!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 14, column: 40) +!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 14, column: 3) +!18 = distinct !DILexicalBlock(scope: !19, file: !1, line: 14, column: 3) +!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 12, type: !20, scopeLine: 13, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!20 = !DISubroutineType(types: !13) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll new file mode 100644 index 0000000000000..e00b737cae4e8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll @@ -0,0 +1,63 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s + + +; CHECK: Run stale profile matching for bar +; CHECK: Callsite with callee:baz is matched from 4 to 2 +; CHECK: 'baz' inlined into 'main' to match profiling context with (cost=always): preinliner at callsite bar:3:8.4 @ main:3:10.7 + +; CHECK: Probe descriptor missing for Function bar +; CHECK: Profile is invalid due to CFG mismatch for Function bar + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { + %1 = call i32 @bar(), !dbg !13 + ret i32 0 +} + +define available_externally i32 @bar() #1 !dbg !21 { + %1 = call i32 @baz(), !dbg !23 + ret i32 0 +} + +define available_externally i32 @baz() #0 !dbg !25 { + ret i32 0 +} + +attributes #0 = { "use-sample-profile" } +attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" } + +!llvm.dbg.cu = !{!0, !7, !9} +!llvm.module.flags = !{!11} +!llvm.pseudo_probe_desc = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "7220f1a2d70ff869f1a6ab7958e3c393") +!2 = !{!3} +!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression()) +!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true) +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!8 = !DIFile(filename: "test1.v1.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "76696bd6bfe16a9f227fe03cfdb6a82c") +!9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532") +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"} +!13 = !DILocation(line: 8, column: 10, scope: !14) +!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591) +!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40) +!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 7, column: 3) +!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 7, column: 3) +!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !19, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20) +!19 = distinct !DISubroutineType(types: !20) +!20 = !{} +!21 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 3, type: !22, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !20) +!22 = !DISubroutineType(types: !20) +!23 = !DILocation(line: 6, column: 8, scope: !24) +!24 = !DILexicalBlockFile(scope: !21, file: !8, discriminator: 186646567) +!25 = distinct !DISubprogram(name: "baz", scope: !10, file: !10, line: 1, type: !22, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !20) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll index 55225b415d4ab..270beee4ebc2b 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll @@ -106,7 +106,7 @@ define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_add ret i32 %2, !dbg !132 } -attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" "profile-checksum-mismatch"} attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll index 89477ea5fecf1..29877fb22a2c2 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll @@ -48,6 +48,8 @@ ; } ; } +; Verify not running profile matching for checksum matched function. +; CHECK-NOT: Run stale profile matching for bar ; CHECK: Run stale profile matching for main