From 480d48e42720915de2f9958b69ad05df7c0bf802 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 31 Aug 2025 09:03:27 -0400 Subject: [PATCH 1/2] [BOLT] Optimize basic block loops to avoid n^2 loop This improves BOLT runtime when optimizing rustc_driver.so from 15 minutes to 7 minutes (49 minutes to 37 minutes of userspace time). --- bolt/lib/Core/BinaryFunction.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 6cac2d0cca2cb..a86e204cae974 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3591,6 +3591,18 @@ void BinaryFunction::fixBranches() { auto &MIB = BC.MIB; MCContext *Ctx = BC.Ctx.get(); + // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`. + // nextBasicBlock uses linear search to find the next block, so the loop + // below becomes O(n^2). This avoids that. + DenseMap nextBasicBlock( + Layout.block_size()); + for (size_t i = 0; i + 1 < Layout.block_size(); i++) { + auto current = Layout.block_begin() + i; + auto next = Layout.block_begin() + i + 1; + if (next != Layout.getFragment((*current)->getFragmentNum()).end()) + nextBasicBlock.insert(std::pair(*current, *next)); + } + for (BinaryBasicBlock *BB : BasicBlocks) { const MCSymbol *TBB = nullptr; const MCSymbol *FBB = nullptr; @@ -3605,7 +3617,7 @@ void BinaryFunction::fixBranches() { // Basic block that follows the current one in the final layout. const BinaryBasicBlock *const NextBB = - Layout.getBasicBlockAfter(BB, /*IgnoreSplits=*/false); + nextBasicBlock.lookup_or(BB, nullptr); if (BB->succ_size() == 1) { // __builtin_unreachable() could create a conditional branch that From db72cc241e075fac0f201f24f36627950434e1f8 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 21 Sep 2025 12:32:56 -0400 Subject: [PATCH 2/2] [BOLT] Split getBasicBlocksAfter cache into a distinct function This enables future re-use in other code that calls getBasicBlockAfter in loops, though for now those uses aren't introduced. --- bolt/include/bolt/Core/FunctionLayout.h | 9 +++++++++ bolt/lib/Core/BinaryFunction.cpp | 14 ++------------ bolt/lib/Core/FunctionLayout.cpp | 16 ++++++++++++++++ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h index ee4dd689b8dd6..f600d8ab0dabf 100644 --- a/bolt/include/bolt/Core/FunctionLayout.h +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -243,9 +243,18 @@ class FunctionLayout { /// Returns the basic block after the given basic block in the layout or /// nullptr if the last basic block is given. + /// + /// Note that this performs a linear search for BB. const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB, bool IgnoreSplits = true) const; + /// Returns a mapping from BB -> getBasicBlockAfter(BB). + /// + /// This should be preferred in loops that call getBasicBlockAfter without + /// changes to the function layout. Caching the results avoid n^2 lookup cost. + DenseMap + getBasicBlocksAfter(bool IgnoreSplits = true) const; + /// True if the layout contains at least two non-empty fragments. bool isSplit() const; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index a86e204cae974..35c00a8012426 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3591,17 +3591,7 @@ void BinaryFunction::fixBranches() { auto &MIB = BC.MIB; MCContext *Ctx = BC.Ctx.get(); - // Caches `FunctionLayout::nextBasicBlock(IgnoreSplits = false)`. - // nextBasicBlock uses linear search to find the next block, so the loop - // below becomes O(n^2). This avoids that. - DenseMap nextBasicBlock( - Layout.block_size()); - for (size_t i = 0; i + 1 < Layout.block_size(); i++) { - auto current = Layout.block_begin() + i; - auto next = Layout.block_begin() + i + 1; - if (next != Layout.getFragment((*current)->getFragmentNum()).end()) - nextBasicBlock.insert(std::pair(*current, *next)); - } + auto NextBasicBlock = Layout.getBasicBlocksAfter(/* IgnoreSplits */ false); for (BinaryBasicBlock *BB : BasicBlocks) { const MCSymbol *TBB = nullptr; @@ -3617,7 +3607,7 @@ void BinaryFunction::fixBranches() { // Basic block that follows the current one in the final layout. const BinaryBasicBlock *const NextBB = - nextBasicBlock.lookup_or(BB, nullptr); + NextBasicBlock.lookup_or(BB, nullptr); if (BB->succ_size() == 1) { // __builtin_unreachable() could create a conditional branch that diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp index 4498fc44da954..4f8d75585b4e4 100644 --- a/bolt/lib/Core/FunctionLayout.cpp +++ b/bolt/lib/Core/FunctionLayout.cpp @@ -241,6 +241,22 @@ FunctionLayout::getBasicBlockAfter(const BinaryBasicBlock *BB, return *BlockAfter; } +DenseMap +FunctionLayout::getBasicBlocksAfter(bool IgnoreSplits) const { + DenseMap NextBasicBlock(block_size()); + for (size_t i = 0; i + 1 < block_size(); i++) { + auto Current = block_begin() + i; + auto Next = block_begin() + i + 1; + + if (IgnoreSplits) { + NextBasicBlock.insert(std::pair(*Current, *Next)); + } else if (Next != getFragment((*Current)->getFragmentNum()).end()) { + NextBasicBlock.insert(std::pair(*Current, *Next)); + } + } + return NextBasicBlock; +} + bool FunctionLayout::isSplit() const { const unsigned NonEmptyFragCount = llvm::count_if( fragments(), [](const FunctionFragment &FF) { return !FF.empty(); });