diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index 25953f43e1aa9..46c64d7876ad3 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -373,38 +373,57 @@ static void addVPMetadata(Module &M, Instruction &I, if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty()) return; + // Prepare the vector of value data, initializing from any existing + // value-profile metadata present on the instruction so that we merge the + // new CalleeGuids into the existing entries. + SmallVector VDs; + uint64_t TotalCount = 0; + if (I.getMetadata(LLVMContext::MD_prof)) { - uint64_t Unused; - // TODO: When merging is implemented, increase this to a typical ICP value - // (e.g., 3-6) For now, we only need to check if existing data exists, so 1 - // is sufficient - auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget, - /*MaxNumValueData=*/1, Unused); - // We don't know how to merge value profile data yet. - if (!ExistingVD.empty()) { - return; - } + // Read all existing entries so we can merge them. Use a large + // MaxNumValueData to retrieve all existing entries. + VDs = getValueProfDataFromInst(I, IPVK_IndirectCallTarget, + /*MaxNumValueData=*/UINT32_MAX, TotalCount); } - SmallVector VDs; - uint64_t TotalCount = 0; + // Save the original size for use later in detecting whether any were added. + const size_t OriginalSize = VDs.size(); + + // Initialize the set of existing guids with the original list. + DenseSet ExistingValues( + llvm::from_range, + llvm::map_range( + VDs, [](const InstrProfValueData &Entry) { return Entry.Value; })); - for (const GlobalValue::GUID CalleeGUID : CalleeGuids) { - InstrProfValueData VD; - VD.Value = CalleeGUID; + // Merge CalleeGuids into list of existing VDs, by appending any that are not + // already included. + VDs.reserve(OriginalSize + CalleeGuids.size()); + for (auto G : CalleeGuids) { + if (!ExistingValues.insert(G).second) + continue; + InstrProfValueData NewEntry; + NewEntry.Value = G; // For MemProf, we don't have actual call counts, so we assign // a weight of 1 to each potential target. // TODO: Consider making this weight configurable or increasing it to // improve effectiveness for ICP. - VD.Count = 1; - VDs.push_back(VD); - TotalCount += VD.Count; + NewEntry.Count = 1; + TotalCount += NewEntry.Count; + VDs.push_back(NewEntry); } - if (!VDs.empty()) { - annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, - VDs.size()); - } + // Update the VP metadata if we added any new callee GUIDs to the list. + assert(VDs.size() >= OriginalSize); + if (VDs.size() == OriginalSize) + return; + + // First clear the existing !prof. + I.setMetadata(LLVMContext::MD_prof, nullptr); + + // No need to sort the updated VDs as all appended entries have the same count + // of 1, which is no larger than any existing entries. The incoming list of + // CalleeGuids should already be deterministic for a given profile. + annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget, VDs.size()); } static void diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test index ac7dc77d85b3f..15a315543056a 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test +++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test @@ -62,10 +62,13 @@ HeapProfileRecords: CallSites: - Frames: - { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false } - CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] + CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01, 0x7f8d88fcc70a347b] - Frames: - { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false } CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1] + - Frames: + - { Function: _Z3foov, LineOffset: 6, Column: 5, IsInlineFrame: false } + CalleeGuids: [0xf129122801e64264, 0x7f8d88fcc70a347b] ... ;--- fdo_conflict.ll @@ -73,21 +76,33 @@ define dso_local void @_Z3foov() !dbg !14 { entry: %fp = alloca ptr, align 8 %0 = load ptr, ptr %fp, align 8 - ; This call already has FDO value profile metadata - should NOT be modified - ; CHECK-FDO: call void %0(), {{.*}} !prof !6 + ; This call already has FDO value profile metadata - new GUIDs should be appended. + ; The profile contains 0x7f8d88fcc70a347b (9191153033785521275) which already + ; exists in VP metadata and should not be re-added or affect count. + ; CHECK-FDO: call void %0(), {{.*}} !prof ![[VP1:[0-9]+]] call void %0(), !dbg !15, !prof !16 %1 = load ptr, ptr %fp, align 8 ; This call does NOT have existing metadata - should get MemProf annotation - ; CHECK-FDO: call void %1(), {{.*}} !prof !9 + ; CHECK-FDO: call void %1(), {{.*}} !prof ![[VP2:[0-9]+]] call void %1(), !dbg !17 + + %2 = load ptr, ptr %fp, align 8 + ; This call already has FDO value profile metadata, and the profile contains + ; the same 2 callee guids that are already included in the VP metadata: + ; 0x7f8d88fcc70a347b (9191153033785521275) and 0xf129122801e64264 which is + ; 17377440600225628772 aka -1069303473483922844. The metadata should not be affected. + ; CHECK-FDO: call void %2(), {{.*}} !prof ![[VP3:[0-9]+]] + call void %2(), !dbg !18, !prof !19 ret void } !16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} +!19 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} -; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} -; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1} +; CHECK-FDO: ![[VP1]] = !{!"VP", i32 0, i64 102, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} +; CHECK-FDO: ![[VP2]] = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1} +; CHECK-FDO: ![[VP3]] = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20} !llvm.module.flags = !{!12, !13} @@ -98,3 +113,4 @@ entry: !14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10) !15 = !DILocation(line: 4, column: 5, scope: !14) !17 = !DILocation(line: 6, column: 5, scope: !14) +!18 = !DILocation(line: 7, column: 5, scope: !14)