Skip to content

Commit

Permalink
[CSSPGO][llvm-profgen] Reimplement SampleContextTracker using context…
Browse files Browse the repository at this point in the history
… trie

This is the followup patch to https://reviews.llvm.org/D125246 for the `SampleContextTracker` part. Before the promotion and merging of the context is based on the SampleContext(the array of frame), this causes a lot of cost to the memory. This patch detaches the tracker from using the array ref instead to use the context trie itself. This can save a lot of memory usage and benefit both the compiler's CS inliner and llvm-profgen's pre-inliner.

One structure needs to be specially treated is the `FuncToCtxtProfiles`, this is used to get all the functionSamples for one function to do the merging and promoting. Before it search each functions' context and traverse the trie to get the node of the context. Now we don't have the context inside the profile, instead we directly use an auxiliary map `ProfileToNodeMap` for profile , it initialize to create the FunctionSamples to TrieNode relations and keep updating it during promoting and merging the node.

Moreover, I was expecting the results before and after remain the same, but I found that the order of FuncToCtxtProfiles matter and affect the results. This can happen on recursive context case, but the difference should be small. Now we don't have the context, so I just used a vector for the order, the result is still deterministic.

Measured on one huge size(12GB) profile from one of our internal service. The profile similarity difference is 99.999%, and the running time is improved by 3X(debug mode) and the memory is reduced from 170GB to 90GB.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D127031
  • Loading branch information
wlei-llvm committed Jun 28, 2022
1 parent aa58b7b commit 7e86b13
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 165 deletions.
10 changes: 0 additions & 10 deletions llvm/include/llvm/ProfileData/SampleProf.h
Expand Up @@ -553,16 +553,6 @@ class SampleContext {
}
}

// Promote context by removing top frames with the length of
// `ContextFramesToRemove`. Note that with array representation of context,
// the promotion is effectively a slice operation with first
// `ContextFramesToRemove` elements removed from left.
void promoteOnPath(uint32_t ContextFramesToRemove) {
assert(ContextFramesToRemove <= FullContext.size() &&
"Cannot remove more than the whole context");
FullContext = FullContext.drop_front(ContextFramesToRemove);
}

// Decode context string for a frame to get function name and location.
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
static void decodeContextString(StringRef ContextStr, StringRef &FName,
Expand Down
94 changes: 72 additions & 22 deletions llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ProfileData/SampleProf.h"
#include <map>
#include <queue>
#include <vector>

namespace llvm {
Expand All @@ -44,11 +45,6 @@ class ContextTrieNode {
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
StringRef ChildName,
bool AllowCreate = true);

ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
ContextTrieNode &&NodeToMove,
uint32_t ContextFramesToRemove,
bool DeleteNode = true);
void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
std::map<uint64_t, ContextTrieNode> &getAllChildContext();
StringRef getFuncName() const;
Expand All @@ -59,6 +55,7 @@ class ContextTrieNode {
LineLocation getCallSiteLoc() const;
ContextTrieNode *getParentContext() const;
void setParentContext(ContextTrieNode *Parent);
void setCallSiteLoc(const LineLocation &Loc);
void dumpNode();
void dumpTree();

Expand Down Expand Up @@ -91,23 +88,13 @@ class ContextTrieNode {
// calling context and the context is identified by path from root to the node.
class SampleContextTracker {
public:
struct ProfileComparer {
bool operator()(FunctionSamples *A, FunctionSamples *B) const {
// Sort function profiles by the number of total samples and their
// contexts.
if (A->getTotalSamples() == B->getTotalSamples())
return A->getContext() < B->getContext();
return A->getTotalSamples() > B->getTotalSamples();
}
};

// Keep profiles of a function sorted so that they will be processed/promoted
// deterministically.
using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
using ContextSamplesTy = std::vector<FunctionSamples *>;

SampleContextTracker() = default;
SampleContextTracker(SampleProfileMap &Profiles,
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
// Populate the FuncToCtxtProfiles map after the trie is built.
void populateFuncToCtxtMap();
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
Expand Down Expand Up @@ -145,6 +132,61 @@ class SampleContextTracker {

// Create a merged conext-less profile map.
void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles);
ContextTrieNode *
getContextNodeForProfile(const FunctionSamples *FSamples) const {
auto I = ProfileToNodeMap.find(FSamples);
if (I == ProfileToNodeMap.end())
return nullptr;
return I->second;
}
StringMap<ContextSamplesTy> &getFuncToCtxtProfiles() {
return FuncToCtxtProfiles;
}

class Iterator : public std::iterator<std::forward_iterator_tag,
const ContextTrieNode *> {
std::queue<ContextTrieNode *> NodeQueue;

public:
explicit Iterator() = default;
explicit Iterator(ContextTrieNode *Node) { NodeQueue.push(Node); }
Iterator &operator++() {
assert(!NodeQueue.empty() && "Iterator already at the end");
ContextTrieNode *Node = NodeQueue.front();
NodeQueue.pop();
for (auto &It : Node->getAllChildContext())
NodeQueue.push(&It.second);
return *this;
}

Iterator operator++(int) {
assert(!NodeQueue.empty() && "Iterator already at the end");
Iterator Ret = *this;
++(*this);
return Ret;
}
bool operator==(const Iterator &Other) const {
if (NodeQueue.empty() && Other.NodeQueue.empty())
return true;
if (NodeQueue.empty() || Other.NodeQueue.empty())
return false;
return NodeQueue.front() == Other.NodeQueue.front();
}
bool operator!=(const Iterator &Other) const { return !(*this == Other); }
ContextTrieNode *operator*() const {
assert(!NodeQueue.empty() && "Invalid access to end iterator");
return NodeQueue.front();
}
};

Iterator begin() { return Iterator(&RootContext); }
Iterator end() { return Iterator(); }

#ifndef NDEBUG
// Get a context string from root to current node.
std::string getContextString(const FunctionSamples &FSamples) const;
std::string getContextString(ContextTrieNode *Node) const;
#endif
// Dump the internal context profile trie.
void dump();

Expand All @@ -155,15 +197,23 @@ class SampleContextTracker {
ContextTrieNode *getTopLevelContextNode(StringRef FName);
ContextTrieNode &addTopLevelContextNode(StringRef FName);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
uint32_t ContextFramesToRemove);
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode);
ContextTrieNode &
promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
ContextTrieNode &ToNodeParent,
uint32_t ContextFramesToRemove);
ContextTrieNode &ToNodeParent);
ContextTrieNode &moveContextSamples(ContextTrieNode &ToNodeParent,
const LineLocation &CallSite,
ContextTrieNode &&NodeToMove);
void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) {
ProfileToNodeMap[FSample] = Node;
}
// Map from function name to context profiles (excluding base profile)
StringMap<ContextSamplesTy> FuncToCtxtProfiles;

// Map from current FunctionSample to the belonged context trie.
std::unordered_map<const FunctionSamples *, ContextTrieNode *>
ProfileToNodeMap;

// Map from function guid to real function names. Only used in md5 mode.
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;

Expand Down

0 comments on commit 7e86b13

Please sign in to comment.