diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 6f1c3696ca78c..aa582cfef1ad1 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -157,6 +157,34 @@ class CtxProfAnalysisPrinterPass const PrintMode Mode; }; +/// Utility that propagates counter values to each basic block and to each edge +/// when a basic block has more than one outgoing edge, using an adaptation of +/// PGOUseFunc::populateCounters. +// FIXME(mtrofin): look into factoring the code to share one implementation. +class ProfileAnnotatorImpl; +class ProfileAnnotator { + std::unique_ptr PImpl; + +public: + ProfileAnnotator(const Function &F, ArrayRef RawCounters); + uint64_t getBBCount(const BasicBlock &BB) const; + + // Finds the true and false counts for the given select instruction. Returns + // false if the select doesn't have instrumentation or if the count of the + // parent BB is 0. + bool getSelectInstrProfile(SelectInst &SI, uint64_t &TrueCount, + uint64_t &FalseCount) const; + // Clears Profile and populates it with the edge weights, in the same order as + // they need to appear in the MD_prof metadata. Also computes the max of those + // weights an returns it in MaxCount. Returs false if: + // - the BB has less than 2 successors + // - the counts are 0 + bool getOutgoingBranchWeights(BasicBlock &BB, + SmallVectorImpl &Profile, + uint64_t &MaxCount) const; + ~ProfileAnnotator(); +}; + /// Assign a GUID to functions as metadata. GUID calculation takes linkage into /// account, which may change especially through and after thinlto. By /// pre-computing and assigning as metadata, this mechanism is resilient to such diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index d203e277546ea..391631e15aa89 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -14,7 +14,9 @@ #include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/Analysis.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -22,6 +24,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include +#include #define DEBUG_TYPE "ctx_prof" @@ -46,6 +50,374 @@ static cl::opt ForceIsInSpecializedModule( const char *AssignGUIDPass::GUIDMetadataName = "guid"; +namespace llvm { +class ProfileAnnotatorImpl final { + friend class ProfileAnnotator; + class BBInfo; + struct EdgeInfo { + BBInfo *const Src; + BBInfo *const Dest; + std::optional Count; + + explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {} + }; + + class BBInfo { + std::optional Count; + // OutEdges is dimensioned to match the number of terminator operands. + // Entries in the vector match the index in the terminator operand list. In + // some cases - see `shouldExcludeEdge` and its implementation - an entry + // will be nullptr. + // InEdges doesn't have the above constraint. + SmallVector OutEdges; + SmallVector InEdges; + size_t UnknownCountOutEdges = 0; + size_t UnknownCountInEdges = 0; + + // Pass AssumeAllKnown when we try to propagate counts from edges to BBs - + // because all the edge counters must be known. + // Return std::nullopt if there were no edges to sum. The user can decide + // how to interpret that. + std::optional getEdgeSum(const SmallVector &Edges, + bool AssumeAllKnown) const { + std::optional Sum; + for (const auto *E : Edges) { + // `Edges` may be `OutEdges`, case in which `E` could be nullptr. + if (E) { + if (!Sum.has_value()) + Sum = 0; + *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U)); + } + } + return Sum; + } + + bool computeCountFrom(const SmallVector &Edges) { + assert(!Count.has_value()); + Count = getEdgeSum(Edges, true); + return Count.has_value(); + } + + void setSingleUnknownEdgeCount(SmallVector &Edges) { + uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U); + uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; + EdgeInfo *E = nullptr; + for (auto *I : Edges) + if (I && !I->Count.has_value()) { + E = I; +#ifdef NDEBUG + break; +#else + assert((!E || E == I) && + "Expected exactly one edge to have an unknown count, " + "found a second one"); + continue; +#endif + } + assert(E && "Expected exactly one edge to have an unknown count"); + assert(!E->Count.has_value()); + E->Count = EdgeVal; + assert(E->Src->UnknownCountOutEdges > 0); + assert(E->Dest->UnknownCountInEdges > 0); + --E->Src->UnknownCountOutEdges; + --E->Dest->UnknownCountInEdges; + } + + public: + BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional Count) + : Count(Count) { + // For in edges, we just want to pre-allocate enough space, since we know + // it at this stage. For out edges, we will insert edges at the indices + // corresponding to positions in this BB's terminator instruction, so we + // construct a default (nullptr values)-initialized vector. A nullptr edge + // corresponds to those that are excluded (see shouldExcludeEdge). + InEdges.reserve(NumInEdges); + OutEdges.resize(NumOutEdges); + } + + bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { + if (!UnknownCountOutEdges) { + return computeCountFrom(OutEdges); + } + return false; + } + + bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { + if (!UnknownCountInEdges) { + return computeCountFrom(InEdges); + } + return false; + } + + void addInEdge(EdgeInfo &Info) { + InEdges.push_back(&Info); + ++UnknownCountInEdges; + } + + // For the out edges, we care about the position we place them in, which is + // the position in terminator instruction's list (at construction). Later, + // we build branch_weights metadata with edge frequency values matching + // these positions. + void addOutEdge(size_t Index, EdgeInfo &Info) { + OutEdges[Index] = &Info; + ++UnknownCountOutEdges; + } + + bool hasCount() const { return Count.has_value(); } + + uint64_t getCount() const { return *Count; } + + bool trySetSingleUnknownInEdgeCount() { + if (UnknownCountInEdges == 1) { + setSingleUnknownEdgeCount(InEdges); + return true; + } + return false; + } + + bool trySetSingleUnknownOutEdgeCount() { + if (UnknownCountOutEdges == 1) { + setSingleUnknownEdgeCount(OutEdges); + return true; + } + return false; + } + size_t getNumOutEdges() const { return OutEdges.size(); } + + uint64_t getEdgeCount(size_t Index) const { + if (auto *E = OutEdges[Index]) + return *E->Count; + return 0U; + } + }; + + const Function &F; + ArrayRef Counters; + // To be accessed through getBBInfo() after construction. + std::map BBInfos; + std::vector EdgeInfos; + + // The only criteria for exclusion is faux suspend -> exit edges in presplit + // coroutines. The API serves for readability, currently. + bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const { + return llvm::isPresplitCoroSuspendExitEdge(Src, Dest); + } + + BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } + + const BBInfo &getBBInfo(const BasicBlock &BB) const { + return BBInfos.find(&BB)->second; + } + + // validation function after we propagate the counters: all BBs and edges' + // counters must have a value. + bool allCountersAreAssigned() const { + for (const auto &BBInfo : BBInfos) + if (!BBInfo.second.hasCount()) + return false; + for (const auto &EdgeInfo : EdgeInfos) + if (!EdgeInfo.Count.has_value()) + return false; + return true; + } + + /// Check that all paths from the entry basic block that use edges with + /// non-zero counts arrive at a basic block with no successors (i.e. "exit") + bool allTakenPathsExit() const { + std::deque Worklist; + DenseSet Visited; + Worklist.push_back(&F.getEntryBlock()); + bool HitExit = false; + while (!Worklist.empty()) { + const auto *BB = Worklist.front(); + Worklist.pop_front(); + if (!Visited.insert(BB).second) + continue; + if (succ_size(BB) == 0) { + if (isa(BB->getTerminator())) + return false; + HitExit = true; + continue; + } + if (succ_size(BB) == 1) { + Worklist.push_back(BB->getUniqueSuccessor()); + continue; + } + const auto &BBInfo = getBBInfo(*BB); + bool HasAWayOut = false; + for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) { + const auto *Succ = BB->getTerminator()->getSuccessor(I); + if (!shouldExcludeEdge(*BB, *Succ)) { + if (BBInfo.getEdgeCount(I) > 0) { + HasAWayOut = true; + Worklist.push_back(Succ); + } + } + } + if (!HasAWayOut) + return false; + } + return HitExit; + } + + bool allNonColdSelectsHaveProfile() const { + for (const auto &BB : F) { + if (getBBInfo(BB).getCount() > 0) { + for (const auto &I : BB) { + if (const auto *SI = dyn_cast(&I)) { + if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation( + *const_cast(SI))) { + auto Index = Inst->getIndex()->getZExtValue(); + assert(Index < Counters.size()); + if (Counters[Index] == 0) + return false; + } + } + } + } + } + return true; + } + + // This is an adaptation of PGOUseFunc::populateCounters. + // FIXME(mtrofin): look into factoring the code to share one implementation. + void propagateCounterValues() { + bool KeepGoing = true; + while (KeepGoing) { + KeepGoing = false; + for (const auto &BB : F) { + auto &Info = getBBInfo(BB); + if (!Info.hasCount()) + KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || + Info.tryTakeCountFromKnownInEdges(BB); + if (Info.hasCount()) { + KeepGoing |= Info.trySetSingleUnknownOutEdgeCount(); + KeepGoing |= Info.trySetSingleUnknownInEdgeCount(); + } + } + } + assert(allCountersAreAssigned() && + "[ctx-prof] Expected all counters have been assigned."); + assert(allTakenPathsExit() && + "[ctx-prof] Encountered a BB with more than one successor, where " + "all outgoing edges have a 0 count. This occurs in non-exiting " + "functions (message pumps, usually) which are not supported in the " + "contextual profiling case"); + assert(allNonColdSelectsHaveProfile() && + "[ctx-prof] All non-cold select instructions were expected to have " + "a profile."); + } + +public: + ProfileAnnotatorImpl(const Function &F, ArrayRef Counters) + : F(F), Counters(Counters) { + assert(!F.isDeclaration()); + assert(!Counters.empty()); + size_t NrEdges = 0; + for (const auto &BB : F) { + std::optional Count; + if (auto *Ins = CtxProfAnalysis::getBBInstrumentation( + const_cast(BB))) { + auto Index = Ins->getIndex()->getZExtValue(); + assert(Index < Counters.size() && + "The index must be inside the counters vector by construction - " + "tripping this assertion indicates a bug in how the contextual " + "profile is managed by IPO transforms"); + (void)Index; + Count = Counters[Ins->getIndex()->getZExtValue()]; + } else if (isa(BB.getTerminator())) { + // The program presumably didn't crash. + Count = 0; + } + auto [It, Ins] = + BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); + (void)Ins; + assert(Ins && "We iterate through the function's BBs, no reason to " + "insert one more than once"); + NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) { + return !shouldExcludeEdge(BB, *Succ); + }); + } + // Pre-allocate the vector, we want references to its contents to be stable. + EdgeInfos.reserve(NrEdges); + for (const auto &BB : F) { + auto &Info = getBBInfo(BB); + for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) { + const auto *Succ = BB.getTerminator()->getSuccessor(I); + if (!shouldExcludeEdge(BB, *Succ)) { + auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ)); + Info.addOutEdge(I, EI); + getBBInfo(*Succ).addInEdge(EI); + } + } + } + assert(EdgeInfos.capacity() == NrEdges && + "The capacity of EdgeInfos should have stayed unchanged it was " + "populated, because we need pointers to its contents to be stable"); + propagateCounterValues(); + } + + uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); } +}; + +} // namespace llvm + +ProfileAnnotator::ProfileAnnotator(const Function &F, + ArrayRef RawCounters) + : PImpl(std::make_unique(F, RawCounters)) {} + +ProfileAnnotator::~ProfileAnnotator() = default; + +uint64_t ProfileAnnotator::getBBCount(const BasicBlock &BB) const { + return PImpl->getBBCount(BB); +} + +bool ProfileAnnotator::getSelectInstrProfile(SelectInst &SI, + uint64_t &TrueCount, + uint64_t &FalseCount) const { + const auto &BBInfo = PImpl->getBBInfo(*SI.getParent()); + TrueCount = FalseCount = 0; + if (BBInfo.getCount() == 0) + return false; + + auto *Step = CtxProfAnalysis::getSelectInstrumentation(SI); + if (!Step) + return false; + auto Index = Step->getIndex()->getZExtValue(); + assert(Index < PImpl->Counters.size() && + "The index of the step instruction must be inside the " + "counters vector by " + "construction - tripping this assertion indicates a bug in " + "how the contextual profile is managed by IPO transforms"); + auto TotalCount = BBInfo.getCount(); + TrueCount = PImpl->Counters[Index]; + FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); + return true; +} + +bool ProfileAnnotator::getOutgoingBranchWeights( + BasicBlock &BB, SmallVectorImpl &Profile, + uint64_t &MaxCount) const { + Profile.clear(); + + if (succ_size(&BB) < 2) + return false; + + auto *Term = BB.getTerminator(); + Profile.resize(Term->getNumSuccessors()); + + const auto &BBInfo = PImpl->getBBInfo(BB); + MaxCount = 0; + for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; + ++SuccIdx) { + uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); + if (EdgeCount > MaxCount) + MaxCount = EdgeCount; + Profile[SuccIdx] = EdgeCount; + } + return MaxCount > 0; +} + PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) { for (auto &F : M.functions()) { if (F.isDeclaration()) diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index 508a41684ed20..e47c9ab75ffe1 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -45,358 +45,33 @@ using namespace llvm; namespace { -class ProfileAnnotator final { - class BBInfo; - struct EdgeInfo { - BBInfo *const Src; - BBInfo *const Dest; - std::optional Count; +/// Assign branch weights and function entry count. Also update the PSI +/// builder. +void assignProfileData(Function &F, ArrayRef RawCounters) { + assert(!RawCounters.empty()); + ProfileAnnotator PA(F, RawCounters); - explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {} - }; + F.setEntryCount(RawCounters[0]); + SmallVector ProfileHolder; - class BBInfo { - std::optional Count; - // OutEdges is dimensioned to match the number of terminator operands. - // Entries in the vector match the index in the terminator operand list. In - // some cases - see `shouldExcludeEdge` and its implementation - an entry - // will be nullptr. - // InEdges doesn't have the above constraint. - SmallVector OutEdges; - SmallVector InEdges; - size_t UnknownCountOutEdges = 0; - size_t UnknownCountInEdges = 0; - - // Pass AssumeAllKnown when we try to propagate counts from edges to BBs - - // because all the edge counters must be known. - // Return std::nullopt if there were no edges to sum. The user can decide - // how to interpret that. - std::optional getEdgeSum(const SmallVector &Edges, - bool AssumeAllKnown) const { - std::optional Sum; - for (const auto *E : Edges) { - // `Edges` may be `OutEdges`, case in which `E` could be nullptr. - if (E) { - if (!Sum.has_value()) - Sum = 0; - *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U)); - } - } - return Sum; - } - - bool computeCountFrom(const SmallVector &Edges) { - assert(!Count.has_value()); - Count = getEdgeSum(Edges, true); - return Count.has_value(); - } - - void setSingleUnknownEdgeCount(SmallVector &Edges) { - uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U); - uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U; - EdgeInfo *E = nullptr; - for (auto *I : Edges) - if (I && !I->Count.has_value()) { - E = I; -#ifdef NDEBUG - break; -#else - assert((!E || E == I) && - "Expected exactly one edge to have an unknown count, " - "found a second one"); - continue; -#endif - } - assert(E && "Expected exactly one edge to have an unknown count"); - assert(!E->Count.has_value()); - E->Count = EdgeVal; - assert(E->Src->UnknownCountOutEdges > 0); - assert(E->Dest->UnknownCountInEdges > 0); - --E->Src->UnknownCountOutEdges; - --E->Dest->UnknownCountInEdges; - } - - public: - BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional Count) - : Count(Count) { - // For in edges, we just want to pre-allocate enough space, since we know - // it at this stage. For out edges, we will insert edges at the indices - // corresponding to positions in this BB's terminator instruction, so we - // construct a default (nullptr values)-initialized vector. A nullptr edge - // corresponds to those that are excluded (see shouldExcludeEdge). - InEdges.reserve(NumInEdges); - OutEdges.resize(NumOutEdges); - } - - bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) { - if (!UnknownCountOutEdges) { - return computeCountFrom(OutEdges); - } - return false; - } - - bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) { - if (!UnknownCountInEdges) { - return computeCountFrom(InEdges); - } - return false; - } - - void addInEdge(EdgeInfo &Info) { - InEdges.push_back(&Info); - ++UnknownCountInEdges; - } - - // For the out edges, we care about the position we place them in, which is - // the position in terminator instruction's list (at construction). Later, - // we build branch_weights metadata with edge frequency values matching - // these positions. - void addOutEdge(size_t Index, EdgeInfo &Info) { - OutEdges[Index] = &Info; - ++UnknownCountOutEdges; - } - - bool hasCount() const { return Count.has_value(); } - - uint64_t getCount() const { return *Count; } - - bool trySetSingleUnknownInEdgeCount() { - if (UnknownCountInEdges == 1) { - setSingleUnknownEdgeCount(InEdges); - return true; - } - return false; - } - - bool trySetSingleUnknownOutEdgeCount() { - if (UnknownCountOutEdges == 1) { - setSingleUnknownEdgeCount(OutEdges); - return true; - } - return false; - } - size_t getNumOutEdges() const { return OutEdges.size(); } - - uint64_t getEdgeCount(size_t Index) const { - if (auto *E = OutEdges[Index]) - return *E->Count; - return 0U; - } - }; - - Function &F; - const SmallVectorImpl &Counters; - // To be accessed through getBBInfo() after construction. - std::map BBInfos; - std::vector EdgeInfos; - - // This is an adaptation of PGOUseFunc::populateCounters. - // FIXME(mtrofin): look into factoring the code to share one implementation. - void propagateCounterValues(const SmallVectorImpl &Counters) { - bool KeepGoing = true; - while (KeepGoing) { - KeepGoing = false; - for (const auto &BB : F) { - auto &Info = getBBInfo(BB); - if (!Info.hasCount()) - KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) || - Info.tryTakeCountFromKnownInEdges(BB); - if (Info.hasCount()) { - KeepGoing |= Info.trySetSingleUnknownOutEdgeCount(); - KeepGoing |= Info.trySetSingleUnknownInEdgeCount(); - } - } - } - } - // The only criteria for exclusion is faux suspend -> exit edges in presplit - // coroutines. The API serves for readability, currently. - bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const { - return llvm::isPresplitCoroSuspendExitEdge(Src, Dest); - } - - BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; } - - const BBInfo &getBBInfo(const BasicBlock &BB) const { - return BBInfos.find(&BB)->second; - } - - // validation function after we propagate the counters: all BBs and edges' - // counters must have a value. - bool allCountersAreAssigned() const { - for (const auto &BBInfo : BBInfos) - if (!BBInfo.second.hasCount()) - return false; - for (const auto &EdgeInfo : EdgeInfos) - if (!EdgeInfo.Count.has_value()) - return false; - return true; - } - - /// Check that all paths from the entry basic block that use edges with - /// non-zero counts arrive at a basic block with no successors (i.e. "exit") - bool allTakenPathsExit() const { - std::deque Worklist; - DenseSet Visited; - Worklist.push_back(&F.getEntryBlock()); - bool HitExit = false; - while (!Worklist.empty()) { - const auto *BB = Worklist.front(); - Worklist.pop_front(); - if (!Visited.insert(BB).second) - continue; - if (succ_size(BB) == 0) { - if (isa(BB->getTerminator())) - return false; - HitExit = true; - continue; - } - if (succ_size(BB) == 1) { - Worklist.push_back(BB->getUniqueSuccessor()); - continue; - } - const auto &BBInfo = getBBInfo(*BB); - bool HasAWayOut = false; - for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) { - const auto *Succ = BB->getTerminator()->getSuccessor(I); - if (!shouldExcludeEdge(*BB, *Succ)) { - if (BBInfo.getEdgeCount(I) > 0) { - HasAWayOut = true; - Worklist.push_back(Succ); - } - } - } - if (!HasAWayOut) - return false; - } - return HitExit; - } - - bool allNonColdSelectsHaveProfile() const { - for (const auto &BB : F) { - if (getBBInfo(BB).getCount() > 0) { - for (const auto &I : BB) { - if (const auto *SI = dyn_cast(&I)) { - if (!SI->getMetadata(LLVMContext::MD_prof)) { - return false; - } - } - } - } - } - return true; - } - -public: - ProfileAnnotator(Function &F, const SmallVectorImpl &Counters) - : F(F), Counters(Counters) { - assert(!F.isDeclaration()); - assert(!Counters.empty()); - size_t NrEdges = 0; - for (const auto &BB : F) { - std::optional Count; - if (auto *Ins = CtxProfAnalysis::getBBInstrumentation( - const_cast(BB))) { - auto Index = Ins->getIndex()->getZExtValue(); - assert(Index < Counters.size() && - "The index must be inside the counters vector by construction - " - "tripping this assertion indicates a bug in how the contextual " - "profile is managed by IPO transforms"); - (void)Index; - Count = Counters[Ins->getIndex()->getZExtValue()]; - } else if (isa(BB.getTerminator())) { - // The program presumably didn't crash. - Count = 0; - } - auto [It, Ins] = - BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}}); - (void)Ins; - assert(Ins && "We iterate through the function's BBs, no reason to " - "insert one more than once"); - NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) { - return !shouldExcludeEdge(BB, *Succ); - }); - } - // Pre-allocate the vector, we want references to its contents to be stable. - EdgeInfos.reserve(NrEdges); - for (const auto &BB : F) { - auto &Info = getBBInfo(BB); - for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) { - const auto *Succ = BB.getTerminator()->getSuccessor(I); - if (!shouldExcludeEdge(BB, *Succ)) { - auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ)); - Info.addOutEdge(I, EI); - getBBInfo(*Succ).addInEdge(EI); - } - } - } - assert(EdgeInfos.capacity() == NrEdges && - "The capacity of EdgeInfos should have stayed unchanged it was " - "populated, because we need pointers to its contents to be stable"); - } - - void setProfileForSelectInstructions(BasicBlock &BB, const BBInfo &BBInfo) { - if (BBInfo.getCount() == 0) - return; - - for (auto &I : BB) { + for (auto &BB : F) { + for (auto &I : BB) if (auto *SI = dyn_cast(&I)) { - if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) { - auto Index = Step->getIndex()->getZExtValue(); - assert(Index < Counters.size() && - "The index of the step instruction must be inside the " - "counters vector by " - "construction - tripping this assertion indicates a bug in " - "how the contextual profile is managed by IPO transforms"); - auto TotalCount = BBInfo.getCount(); - auto TrueCount = Counters[Index]; - auto FalseCount = - (TotalCount > TrueCount ? TotalCount - TrueCount : 0U); - setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, - std::max(TrueCount, FalseCount)); - } - } - } - } - - /// Assign branch weights and function entry count. Also update the PSI - /// builder. - void assignProfileData() { - assert(!Counters.empty()); - propagateCounterValues(Counters); - F.setEntryCount(Counters[0]); - - for (auto &BB : F) { - const auto &BBInfo = getBBInfo(BB); - setProfileForSelectInstructions(BB, BBInfo); - if (succ_size(&BB) < 2) - continue; - auto *Term = BB.getTerminator(); - SmallVector EdgeCounts(Term->getNumSuccessors(), 0); - uint64_t MaxCount = 0; - - for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size; - ++SuccIdx) { - uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx); - if (EdgeCount > MaxCount) - MaxCount = EdgeCount; - EdgeCounts[SuccIdx] = EdgeCount; + uint64_t TrueCount, FalseCount = 0; + if (!PA.getSelectInstrProfile(*SI, TrueCount, FalseCount)) + continue; + setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount}, + std::max(TrueCount, FalseCount)); } - - if (MaxCount != 0) - setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount); - } - assert(allCountersAreAssigned() && - "[ctx-prof] Expected all counters have been assigned."); - assert(allTakenPathsExit() && - "[ctx-prof] Encountered a BB with more than one successor, where " - "all outgoing edges have a 0 count. This occurs in non-exiting " - "functions (message pumps, usually) which are not supported in the " - "contextual profiling case"); - assert(allNonColdSelectsHaveProfile() && - "[ctx-prof] All non-cold select instructions were expected to have " - "a profile."); + if (succ_size(&BB) < 2) + continue; + uint64_t MaxCount = 0; + if (!PA.getOutgoingBranchWeights(BB, ProfileHolder, MaxCount)) + continue; + assert(MaxCount > 0); + setProfMetadata(F.getParent(), BB.getTerminator(), ProfileHolder, MaxCount); } -}; +} [[maybe_unused]] bool areAllBBsReachable(const Function &F, FunctionAnalysisManager &FAM) { @@ -510,10 +185,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M, // If this function didn't appear in the contextual profile, it's cold. if (It == FlattenedProfile.end()) clearColdFunctionProfile(F); - else { - ProfileAnnotator S(F, It->second); - S.assignProfileData(); - } + else + assignProfileData(F, It->second); } InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs); // use here the flat profiles just so the importer doesn't complain about