diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index d35ae4730a9f3..0f4bc649df720 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -107,6 +107,10 @@ STATISTIC(MismatchedCloneAssignments, STATISTIC(TotalMergeInvokes, "Number of merge invocations for nodes"); STATISTIC(TotalMergeIters, "Number of merge iterations for nodes"); STATISTIC(MaxMergeIters, "Max merge iterations for nodes"); +STATISTIC(NumImportantContextIds, "Number of important context ids"); +STATISTIC(NumFixupEdgeIdsInserted, "Number of fixup edge ids inserted"); +STATISTIC(NumFixupEdgesAdded, "Number of fixup edges added"); +STATISTIC(NumFixedContexts, "Number of contexts with fixed edges"); static cl::opt DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -223,9 +227,18 @@ static cl::opt MemProfRequireDefinitionForPromotion( extern cl::opt MemProfReportHintedSizes; extern cl::opt MinClonedColdBytePercent; +cl::opt MemProfTopNImportant( + "memprof-top-n-important", cl::init(10), cl::Hidden, + cl::desc("Number of largest cold contexts to consider important")); + +cl::opt MemProfFixupImportant( + "memprof-fixup-important", cl::init(true), cl::Hidden, + cl::desc("Enables edge fixup for important contexts")); + } // namespace llvm namespace { + /// CRTP base for graphs built from either IR or ThinLTO summary index. /// /// The graph represents the call contexts in all memprof metadata on allocation @@ -581,17 +594,26 @@ class CallsiteContextGraph { /// Adds nodes for the given MIB stack ids. template - void addStackNodesForMIB(ContextNode *AllocNode, - CallStack &StackContext, - CallStack &CallsiteContext, - AllocationType AllocType, - ArrayRef ContextSizeInfo); + void addStackNodesForMIB( + ContextNode *AllocNode, CallStack &StackContext, + CallStack &CallsiteContext, AllocationType AllocType, + ArrayRef ContextSizeInfo, + std::map &TotalSizeToContextIdTopNCold); /// Matches all callsite metadata (or summary) to the nodes created for /// allocation memprof MIB metadata, synthesizing new nodes to reflect any /// inlining performed on those callsite instructions. void updateStackNodes(); + /// Optionally fixup edges for the N largest cold contexts to better enable + /// cloning. This is particularly helpful if the context includes recursion + /// as well as inlining, resulting in a single stack node for multiple stack + /// ids in the context. With recursion it is particularly difficult to get the + /// edge updates correct as in the general case we have lost the original + /// stack id ordering for the context. Do more expensive fixup for the largest + /// contexts, controlled by MemProfTopNImportant and MemProfFixupImportant. + void fixupImportantContexts(); + /// Update graph to conservatively handle any callsite stack nodes that target /// multiple different callee target functions. void handleCallsitesWithMultipleTargets(); @@ -658,7 +680,8 @@ class CallsiteContextGraph { void assignStackNodesPostOrder( ContextNode *Node, DenseSet &Visited, DenseMap> &StackIdToMatchingCalls, - DenseMap &CallToMatchingCall); + DenseMap &CallToMatchingCall, + const DenseSet &ImportantContextIds); /// Duplicates the given set of context ids, updating the provided /// map from each original id with the newly generated context ids, @@ -859,6 +882,50 @@ class CallsiteContextGraph { /// nodes. DenseMap StackEntryIdToContextNodeMap; + /// Saves information for the contexts identified as important (the largest + /// cold contexts up to MemProfTopNImportant). + struct ImportantContextInfo { + // The original list of leaf first stack ids corresponding to this context. + std::vector StackIds; + // Max length of stack ids corresponding to a single stack ContextNode for + // this context (i.e. the max length of a key in StackIdsToNode below). + unsigned MaxLength = 0; + // Mapping of slices of the stack ids to the corresponding ContextNode + // (there can be multiple stack ids due to inlining). Populated when + // updating stack nodes while matching them to the IR or summary. + std::map, ContextNode *> StackIdsToNode; + }; + + // Map of important full context ids to information about each. + DenseMap ImportantContextIdInfo; + + // For each important context id found in Node (if any), records the list of + // stack ids that corresponded to the given callsite Node. There can be more + // than one in the case of inlining. + void recordStackNode(std::vector &StackIds, ContextNode *Node, + // We pass in the Node's context ids to avoid the + // overhead of computing them as the caller already has + // them in some cases. + const DenseSet &NodeContextIds, + const DenseSet &ImportantContextIds) { + if (!MemProfTopNImportant) { + assert(ImportantContextIds.empty()); + return; + } + DenseSet Ids = + set_intersection(NodeContextIds, ImportantContextIds); + if (Ids.empty()) + return; + auto Size = StackIds.size(); + for (auto Id : Ids) { + auto &Entry = ImportantContextIdInfo[Id]; + Entry.StackIdsToNode[StackIds] = Node; + // Keep track of the max to simplify later analysis. + if (Size > Entry.MaxLength) + Entry.MaxLength = Size; + } + } + /// Maps to track the calls to their corresponding nodes in the graph. MapVector AllocationCallToContextNodeMap; MapVector NonAllocationCallToContextNodeMap; @@ -1353,7 +1420,8 @@ template void CallsiteContextGraph::addStackNodesForMIB( ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, AllocationType AllocType, - ArrayRef ContextSizeInfo) { + ArrayRef ContextSizeInfo, + std::map &TotalSizeToContextIdTopNCold) { // Treating the hot alloc type as NotCold before the disambiguation for "hot" // is done. if (AllocType == AllocationType::Hot) @@ -1361,8 +1429,33 @@ void CallsiteContextGraph::addStackNodesForMIB( ContextIdToAllocationType[++LastContextId] = AllocType; + bool IsImportant = false; if (!ContextSizeInfo.empty()) { auto &Entry = ContextIdToContextSizeInfos[LastContextId]; + // If this is a cold allocation, and we are collecting non-zero largest + // contexts, see if this is a candidate. + if (AllocType == AllocationType::Cold && MemProfTopNImportant > 0) { + uint64_t TotalCold = 0; + for (auto &CSI : ContextSizeInfo) + TotalCold += CSI.TotalSize; + // Record this context if either we haven't found the first top-n largest + // yet, or if it is larger than the smallest already recorded. + if (TotalSizeToContextIdTopNCold.size() < MemProfTopNImportant || + // Since TotalSizeToContextIdTopNCold is a std::map, it is implicitly + // sorted in ascending size of its key which is the size. + TotalCold > TotalSizeToContextIdTopNCold.begin()->first) { + if (TotalSizeToContextIdTopNCold.size() == MemProfTopNImportant) { + // Remove old one and its associated entries. + auto IdToRemove = TotalSizeToContextIdTopNCold.begin()->second; + TotalSizeToContextIdTopNCold.erase( + TotalSizeToContextIdTopNCold.begin()); + assert(ImportantContextIdInfo.count(IdToRemove)); + ImportantContextIdInfo.erase(IdToRemove); + } + TotalSizeToContextIdTopNCold[TotalCold] = LastContextId; + IsImportant = true; + } + } Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end()); } @@ -1381,6 +1474,8 @@ void CallsiteContextGraph::addStackNodesForMIB( for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); ContextIter != StackContext.end(); ++ContextIter) { auto StackId = getStackId(*ContextIter); + if (IsImportant) + ImportantContextIdInfo[LastContextId].StackIds.push_back(StackId); ContextNode *StackNode = getNodeForStackId(StackId); if (!StackNode) { StackNode = createNewNode(/*IsAllocation=*/false); @@ -1600,11 +1695,12 @@ static void checkNode(const ContextNode *Node, template void CallsiteContextGraph:: - assignStackNodesPostOrder( - ContextNode *Node, DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls, - DenseMap &CallToMatchingCall) { + assignStackNodesPostOrder(ContextNode *Node, + DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls, + DenseMap &CallToMatchingCall, + const DenseSet &ImportantContextIds) { auto Inserted = Visited.insert(Node); if (!Inserted.second) return; @@ -1620,7 +1716,7 @@ void CallsiteContextGraph:: continue; } assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls, - CallToMatchingCall); + CallToMatchingCall, ImportantContextIds); } // If this node's stack id is in the map, update the graph to contain new @@ -1648,6 +1744,7 @@ void CallsiteContextGraph:: Node->setCall(Call); NonAllocationCallToContextNodeMap[Call] = Node; NodeToCallingFunc[Node] = Func; + recordStackNode(Ids, Node, Node->getContextIds(), ImportantContextIds); return; } } @@ -1786,6 +1883,9 @@ void CallsiteContextGraph:: : CurNode->computeAllocType(); PrevNode = CurNode; } + + recordStackNode(Ids, NewNode, SavedContextIds, ImportantContextIds); + if (VerifyNodes) { checkNode(NewNode, /*CheckEdges=*/true); for (auto Id : Ids) { @@ -1798,6 +1898,122 @@ void CallsiteContextGraph:: } } +template +void CallsiteContextGraph::fixupImportantContexts() { + if (ImportantContextIdInfo.empty()) + return; + + // Update statistics as we are done building this map at this point. + NumImportantContextIds = ImportantContextIdInfo.size(); + + if (!MemProfFixupImportant) + return; + + if (ExportToDot) + exportToDot("beforestackfixup"); + + // For each context we identified as important, walk through the saved context + // stack ids in order from leaf upwards, and make sure all edges are correct. + // These can be difficult to get right when updating the graph while mapping + // nodes onto summary or IR, especially when there is recursion. In + // particular, when we have created new nodes to reflect inlining, it is + // sometimes impossible to know exactly how to update the edges in the face of + // recursion, as we have lost the original ordering of the stack ids in the + // contexts. + // TODO: Consider only doing this if we detect the context has recursive + // cycles. + // + // I.e. assume we have a context with stack ids like: {A B A C A D E} + // and let's say A was inlined into B, C, and D. The original graph will have + // multiple recursive cycles through A. When we match the original context + // nodes onto the IR or summary, we will merge {A B} into one context node, + // {A C} onto another, and {A D} onto another. Looking at the stack sequence + // above, we should end up with a non-cyclic set of edges like: + // {AB} <- {AC} <- {AD} <- E. However, because we normally have lost the + // original ordering, we won't get the edges correct initially (it's + // impossible without the original ordering). Here we do the fixup (add and + // removing edges where necessary) for this context. In the + // ImportantContextInfo struct in this case we should have a MaxLength = 2, + // and map entries for {A B}, {A C}, {A D}, and {E}. + for (auto &[CurContextId, Info] : ImportantContextIdInfo) { + if (Info.StackIdsToNode.empty()) + continue; + bool Changed = false; + ContextNode *PrevNode = nullptr; + ContextNode *CurNode = nullptr; + DenseSet VisitedEdges; + ArrayRef AllStackIds(Info.StackIds); + // Try to identify what callsite ContextNode maps to which slice of the + // context's ordered stack ids. + for (unsigned I = 0; I < AllStackIds.size(); I++, PrevNode = CurNode) { + // We will do this greedily, trying up to MaxLength stack ids in a row, to + // see if we recorded a context node for that sequence. + auto Len = Info.MaxLength; + auto LenToEnd = AllStackIds.size() - I; + if (Len > LenToEnd) + Len = LenToEnd; + CurNode = nullptr; + // Try to find a recorded context node starting with the longest length + // recorded, and on down until we check for just a single stack node. + for (; Len > 0; Len--) { + // Get the slice of the original stack id sequence to check. + auto CheckStackIds = AllStackIds.slice(I, Len); + auto EntryIt = Info.StackIdsToNode.find(CheckStackIds); + if (EntryIt == Info.StackIdsToNode.end()) + continue; + CurNode = EntryIt->second; + // Skip forward so we don't try to look for the ones we just matched. + // We increment by Len - 1, because the outer for loop will increment I. + I += Len - 1; + break; + } + // Give up if we couldn't find a node. Since we need to clone from the + // leaf allocation upwards, no sense in doing anymore fixup further up + // the context if we couldn't match part of the original stack context + // onto a callsite node. + if (!CurNode) + break; + // No edges to fix up until we have a pair of nodes that should be + // adjacent in the graph. + if (!PrevNode) + continue; + // See if we already have a call edge from CurNode to PrevNode. + auto *CurEdge = PrevNode->findEdgeFromCaller(CurNode); + if (CurEdge) { + // We already have an edge. Make sure it contains this context id. + if (CurEdge->getContextIds().insert(CurContextId).second) { + NumFixupEdgeIdsInserted++; + Changed = true; + } + } else { + // No edge exists - add one. + NumFixupEdgesAdded++; + DenseSet ContextIds({CurContextId}); + auto AllocType = computeAllocType(ContextIds); + auto NewEdge = std::make_shared( + PrevNode, CurNode, AllocType, std::move(ContextIds)); + PrevNode->CallerEdges.push_back(NewEdge); + CurNode->CalleeEdges.push_back(NewEdge); + // Save the new edge for the below handling. + CurEdge = NewEdge.get(); + Changed = true; + } + VisitedEdges.insert(CurEdge); + // Now remove this context id from any other caller edges calling + // PrevNode. + for (auto &Edge : PrevNode->CallerEdges) { + // Skip the edge updating/created above and edges we have already + // visited (due to recursion). + if (Edge.get() != CurEdge && !VisitedEdges.contains(Edge.get())) + Edge->getContextIds().erase(CurContextId); + } + } + if (Changed) + NumFixedContexts++; + } +} + template void CallsiteContextGraph::updateStackNodes() { // Map of stack id to all calls with that as the last (outermost caller) @@ -2043,9 +2259,14 @@ void CallsiteContextGraph::updateStackNodes() { // nodes representing any inlining at interior callsites. Note we move the // associated context ids over to the new nodes. DenseSet Visited; + DenseSet ImportantContextIds(llvm::from_range, + ImportantContextIdInfo.keys()); for (auto &Entry : AllocationCallToContextNodeMap) assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls, - CallToMatchingCall); + CallToMatchingCall, ImportantContextIds); + + fixupImportantContexts(); + if (VerifyCCG) check(); } @@ -2155,6 +2376,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( Module &M, llvm::function_ref OREGetter) : Mod(M), OREGetter(OREGetter) { + // Map for keeping track of the largest cold contexts up to the number given + // by MemProfTopNImportant. Must be a std::map (not DenseMap) because keys + // must be sorted. + std::map TotalSizeToContextIdTopNCold; for (auto &F : M) { std::vector CallsWithMetadata; for (auto &BB : F) { @@ -2191,7 +2416,8 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( CallStack StackContext(StackNode); addStackNodesForMIB( AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD), ContextSizeInfo); + getMIBAllocType(MIBMD), ContextSizeInfo, + TotalSizeToContextIdTopNCold); } // If exporting the graph to dot and an allocation id of interest was // specified, record all the context ids for this allocation node. @@ -2241,6 +2467,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( llvm::function_ref isPrevailing) : Index(Index), isPrevailing(isPrevailing) { + // Map for keeping track of the largest cold contexts up to the number given + // by MemProfTopNImportant. Must be a std::map (not DenseMap) because keys + // must be sorted. + std::map TotalSizeToContextIdTopNCold; for (auto &I : Index) { auto VI = Index.getValueInfo(I); for (auto &S : VI.getSummaryList()) { @@ -2288,7 +2518,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( } addStackNodesForMIB::const_iterator>( AllocNode, StackContext, EmptyContext, MIB.AllocType, - ContextSizeInfo); + ContextSizeInfo, TotalSizeToContextIdTopNCold); I++; } // If exporting the graph to dot and an allocation id of interest was diff --git a/llvm/test/ThinLTO/X86/memprof-fixup.ll b/llvm/test/ThinLTO/X86/memprof-fixup.ll new file mode 100644 index 0000000000000..afed80fc562c1 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-fixup.ll @@ -0,0 +1,129 @@ +;; Test fixup of largest cold contexts. + +;; This case has multiple recursive cycles in the cold context, which can be +;; made non-recursive with the inlining in the code. + +;; -stats requires asserts +; REQUIRES: asserts + +;; Need context sizes in summary, so enable reporting. +; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o + +;; First try disabling detection of the largest cold contexts. +;; We will not get any cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -memprof-top-n-important=0 \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of important context ids" \ +; RUN: --implicit-check-not="Number of fixup" + +;; Allow default detection of the largest cold contexts, but disable fixup. +;; We should find 1 important context, but still not get cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -memprof-fixup-important=false \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=TOPN1-NOFIXUP \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of fixup" + +; TOPN1-NOFIXUP: 1 memprof-context-disambiguation - Number of important context ids + +;; Allow default detection of largest cold contexts, fixup is enabled by default. +;; This case should get fixup and cloning. +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,DB,plx \ +; RUN: -r=%t.o,CB,plx \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=TOPN1 + +; TOPN1: created clone E.memprof.1 +; TOPN1: call in clone E marked with memprof allocation attribute notcold +; TOPN1: call in clone E.memprof.1 marked with memprof allocation attribute cold +; TOPN1: created clone DB.memprof.1 +; TOPN1: call in clone DB.memprof.1 assigned to call function clone E.memprof.1 +; TOPN1: created clone CB.memprof.1 +; TOPN1: call in clone CB.memprof.1 assigned to call function clone DB.memprof.1 +; TOPN1: created clone A.memprof.1 +; TOPN1: call in clone A.memprof.1 assigned to call function clone CB.memprof.1 +; TOPN1: call in clone main assigned to call function clone A.memprof.1 + +; TOPN1: 1 memprof-context-disambiguation - Number of contexts with fixed edges +; TOPN1: 2 memprof-context-disambiguation - Number of fixup edges added +; TOPN1: 1 memprof-context-disambiguation - Number of important context ids + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @E() { +entry: + %call = tail call ptr @_Znam(i64 10), !memprof !7, !callsite !14 + ret void +} + +define void @DB() { +entry: + tail call void @E(), !callsite !17 + ret void +} + +define void @CB() { +entry: + tail call void @DB(), !callsite !22 + ret void +} + +define void @A() { +entry: + tail call void @CB(), !callsite !20 + ret void +} + +define i32 @main() { +entry: + tail call void @A(), !callsite !25 + tail call void @A(), !callsite !27 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!7 = !{!8, !10} +!8 = !{!9, !"cold", !2} +!9 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 678} +!2 = !{i64 12345, i64 200} +!10 = !{!11, !"notcold", !3} +!3 = !{i64 23456, i64 200} +!11 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 789} +!14 = !{i64 123} +!17 = !{i64 234, i64 345} +!22 = !{i64 234, i64 456} +!20 = !{i64 234, i64 567} +!25 = !{i64 678} +!27 = !{i64 789} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll new file mode 100644 index 0000000000000..a08f89b5bbe97 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/fixup.ll @@ -0,0 +1,105 @@ +;; Test fixup of largest cold contexts. + +;; This case has multiple recursive cycles in the cold context, which can be +;; made non-recursive with the inlining in the code. + +;; -stats requires asserts +; REQUIRES: asserts + +;; First try disabling detection of the largest cold contexts. +;; We will not get any cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-top-n-important=0 \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUNL -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of important context ids" \ +; RUN: --implicit-check-not="Number of fixup" + +;; Allow default detection of the largest cold contexts, but disable fixup. +;; We should find 1 important context, but still not get cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-fixup-important=false \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUNL -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=TOPN1-NOFIXUP \ +; RUN: --implicit-check-not="created clone" \ +; RUN: --implicit-check-not="Number of cold static allocations" \ +; RUN: --implicit-check-not="Number of function clones" \ +; RUN: --implicit-check-not="Number of fixup" + +; TOPN1-NOFIXUP: 1 memprof-context-disambiguation - Number of important context ids + +;; Allow default detection of largest cold contexts, fixup is enabled by default. +;; This case should get fixup and cloning. +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \ +; RUN: -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=TOPN1 + +; TOPN1: created clone E.memprof.1 +; TOPN1: created clone DB.memprof.1 +; TOPN1: created clone CB.memprof.1 +; TOPN1: created clone A.memprof.1 +; TOPN1: call in clone main assigned to call function clone A.memprof.1 +; TOPN1: call in clone A.memprof.1 assigned to call function clone CB.memprof.1 +; TOPN1: call in clone CB.memprof.1 assigned to call function clone DB.memprof.1 +; TOPN1: call in clone DB.memprof.1 assigned to call function clone E.memprof.1 +; TOPN1: call in clone E.memprof.1 marked with memprof allocation attribute cold +; TOPN1: call in clone E marked with memprof allocation attribute notcold + +; TOPN1: 1 memprof-context-disambiguation - Number of contexts with fixed edges +; TOPN1: 2 memprof-context-disambiguation - Number of fixup edges added +; TOPN1: 1 memprof-context-disambiguation - Number of important context ids + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @E() { +entry: + %call = tail call ptr @_Znam(i64 10), !memprof !7, !callsite !14 + ret void +} + +define void @DB() { +entry: + tail call void @E(), !callsite !17 + ret void +} + +define void @CB() { +entry: + tail call void @DB(), !callsite !22 + ret void +} + +define void @A() { +entry: + tail call void @CB(), !callsite !20 + ret void +} + +define i32 @main() { +entry: + tail call void @A(), !callsite !25 + tail call void @A(), !callsite !27 + ret i32 0 +} + +declare ptr @_Znam(i64) + +!7 = !{!8, !10} +!8 = !{!9, !"cold", !2} +!9 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 678} +!2 = !{i64 12345, i64 200} +!10 = !{!11, !"notcold", !3} +!3 = !{i64 23456, i64 200} +!11 = !{i64 123, i64 234, i64 345, i64 234, i64 456, i64 234, i64 567, i64 789} +!14 = !{i64 123} +!17 = !{i64 234, i64 345} +!22 = !{i64 234, i64 456} +!20 = !{i64 234, i64 567} +!25 = !{i64 678} +!27 = !{i64 789}