diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index 31e69784262da..011aeca9ef0f2 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -528,35 +528,50 @@ static void handleCallSite( Module &M, std::set> &MatchedCallSites, OptimizationRemarkEmitter &ORE) { auto &Ctx = M.getContext(); + // Set of Callee GUIDs to attach to indirect calls. We accumulate all of them + // to support cases where the instuction's inlined frames match multiple call + // site entries, which can happen if the profile was collected from a binary + // where this instruction was eventually inlined into multiple callers. + SetVector CalleeGuids; + bool CallsiteMDAdded = false; for (const auto &CallSiteEntry : CallSiteEntries) { // If we found and thus matched all frames on the call, create and // attach call stack metadata. if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames, InlinedCallStack)) { NumOfMemProfMatchedCallSites++; - addCallsiteMetadata(I, InlinedCallStack, Ctx); - - // Try to attach indirect call metadata if possible. - if (!CalledFunction) - addVPMetadata(M, I, CallSiteEntry.CalleeGuids); - // Only need to find one with a matching call stack and add a single // callsite metadata. - - // Accumulate call site matching information upon request. - if (ClPrintMemProfMatchInfo) { - std::vector CallStack; - append_range(CallStack, InlinedCallStack); - MatchedCallSites.insert(std::move(CallStack)); + if (!CallsiteMDAdded) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + + // Accumulate call site matching information upon request. + if (ClPrintMemProfMatchInfo) { + std::vector CallStack; + append_range(CallStack, InlinedCallStack); + MatchedCallSites.insert(std::move(CallStack)); + } + ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I) + << ore::NV("CallSite", &I) << " in function " + << ore::NV("Caller", I.getFunction()) + << " matched callsite with frame count " + << ore::NV("Frames", InlinedCallStack.size())); + + // If this is a direct call, we're done. + if (CalledFunction) + break; + CallsiteMDAdded = true; } - ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I) - << ore::NV("CallSite", &I) << " in function " - << ore::NV("Caller", I.getFunction()) - << " matched callsite with frame count " - << ore::NV("Frames", InlinedCallStack.size())); - break; + + assert(!CalledFunction && "Didn't expect direct call"); + + // Collect Callee GUIDs from all matching CallSiteEntries. + CalleeGuids.insert(CallSiteEntry.CalleeGuids.begin(), + CallSiteEntry.CalleeGuids.end()); } } + // Try to attach indirect call metadata if possible. + addVPMetadata(M, I, CalleeGuids.getArrayRef()); } static void readMemprof(Module &M, Function &F, diff --git a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test index ad83da285694a..54f3ed2d9e65e 100644 --- a/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test +++ b/llvm/test/Transforms/PGOProfile/memprof_annotate_indirect_call.test @@ -18,6 +18,18 @@ HeapProfileRecords: - Frames: - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false } CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01] + # The next 2 sets of frames simulates the case where this function was + # eventually inlined into multiple callers. We would have propagated the + # resulting frames and callee guids here for matching with they not yet + # inlined bar. We should aggregate all callee guids into the metadata. + - Frames: + - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true } + - { Function: _Z3foov, LineOffset: 1, Column: 6, IsInlineFrame: false } + CalleeGuids: [0x1234, 0x2345] + - Frames: + - { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: true } + - { Function: _Z3foov, LineOffset: 10, Column: 7, IsInlineFrame: false } + CalleeGuids: [0x3456, 0x4567] ... ;--- basic.ll @@ -31,7 +43,7 @@ entry: ret void } -; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} +; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 6, i64 13398, i64 1, i64 17767, i64 1, i64 4660, i64 1, i64 9029, i64 1, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1} !llvm.module.flags = !{!2, !3}