Skip to content

Commit

Permalink
[BOLT] Fix double jump peephole, remove useless conditional branches.
Browse files Browse the repository at this point in the history
Summary:
I split some of this out from the jumptable diff since it fixes the
double jump peephole.

I've changed the pass manager so that UCE and peepholes are not called
after SCTC.  I've incorporated a call to the double jump fixer to SCTC
since it is needed to fix things up afterwards.

While working on fixing the double jump peephole I discovered a few
useless conditional branches that could be removed as well.  I highly
doubt that removing them will improve perf at all but it does seem
odd to leave in useless conditional branches.

There are also some minor logging improvements.

(cherry picked from FBD4751875)
  • Loading branch information
Bill Nell authored and maksfb committed Mar 21, 2017
1 parent f7d32f7 commit 6c5c65e
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 133 deletions.
30 changes: 29 additions & 1 deletion bolt/BinaryBasicBlock.cpp
Expand Up @@ -55,6 +55,8 @@ BinaryBasicBlock::reverse_iterator BinaryBasicBlock::getLastNonPseudo() {
}

bool BinaryBasicBlock::validateSuccessorInvariants() {
auto *Func = getFunction();
auto &BC = Func->getBinaryContext();
const MCSymbol *TBB = nullptr;
const MCSymbol *FBB = nullptr;
MCInst *CondBranch = nullptr;
Expand All @@ -67,7 +69,9 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
case 0:
return !CondBranch && !UncondBranch;
case 1:
return !CondBranch;
return !CondBranch ||
(CondBranch &&
!Func->getBasicBlockForLabel(BC.MIA->getTargetSymbol(*CondBranch)));
case 2:
return
(!CondBranch ||
Expand Down Expand Up @@ -185,6 +189,7 @@ void BinaryBasicBlock::replaceSuccessor(BinaryBasicBlock *Succ,
BinaryBasicBlock *NewSucc,
uint64_t Count,
uint64_t MispredictedCount) {
Succ->removePredecessor(this);
auto I = succ_begin();
auto BI = BranchInfo.begin();
for (; I != succ_end(); ++I) {
Expand All @@ -197,6 +202,7 @@ void BinaryBasicBlock::replaceSuccessor(BinaryBasicBlock *Succ,

*I = NewSucc;
*BI = BinaryBranchInfo{Count, MispredictedCount};
NewSucc->addPredecessor(this);
}

void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
Expand Down Expand Up @@ -225,6 +231,28 @@ void BinaryBasicBlock::removePredecessor(BinaryBasicBlock *Pred) {
Predecessors.erase(I);
}

void BinaryBasicBlock::removeDuplicateConditionalSuccessor(MCInst *CondBranch) {
assert(succ_size() == 2);

auto *Succ = Successors[0];
assert(Succ == Successors[1]);

const auto CondBI = BranchInfo[0];
const auto UncondBI = BranchInfo[1];

eraseInstruction(CondBranch);

Successors.clear();
BranchInfo.clear();

Successors.push_back(Succ);
BranchInfo.push_back({CondBI.Count + UncondBI.Count,
CondBI.MispredictedCount + UncondBI.MispredictedCount});

assert(isSuccessor(Succ));
assert(Succ->isPredecessor(this));
}

void BinaryBasicBlock::addLandingPad(BinaryBasicBlock *LPBlock) {
if (std::find(LandingPads.begin(), LandingPads.end(), LPBlock) == LandingPads.end()) {
LandingPads.push_back(LPBlock);
Expand Down
11 changes: 11 additions & 0 deletions bolt/BinaryBasicBlock.h
Expand Up @@ -522,6 +522,17 @@ class BinaryBasicBlock {
}
}

/// Remove useless duplicate successors. When the conditional
/// successor is the same as the unconditional successor, we can
/// remove the conditional successor and branch instruction.
void removeDuplicateConditionalSuccessor(MCInst *CondBranch);

/// Test if BB is a predecessor of this block.
bool isPredecessor(const BinaryBasicBlock *BB) const {
auto Itr = std::find(Predecessors.begin(), Predecessors.end(), BB);
return Itr != Predecessors.end();
}

/// Test if BB is a successor of this block.
bool isSuccessor(const BinaryBasicBlock *BB) const {
auto Itr = std::find(Successors.begin(), Successors.end(), BB);
Expand Down
19 changes: 17 additions & 2 deletions bolt/BinaryFunction.cpp
Expand Up @@ -322,10 +322,22 @@ std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
}

bool BinaryFunction::isForwardCall(const MCSymbol *CalleeSymbol) const {
// TODO: Once we start reordering functions this has to change. #15031238
// This function should work properly before and after function reordering.
// In order to accomplish this, we use the function index (if it is valid).
// If the function indices are not valid, we fall back to the original
// addresses. This should be ok because the functions without valid indices
// should have been ordered with a stable sort.
const auto *CalleeBF = BC.getFunctionForSymbol(CalleeSymbol);
if (CalleeBF) {
return CalleeBF->getAddress() > getAddress();
if (hasValidIndex() && CalleeBF->hasValidIndex()) {
return getIndex() < CalleeBF->getIndex();
} else if (hasValidIndex() && !CalleeBF->hasValidIndex()) {
return true;
} else if (!hasValidIndex() && CalleeBF->hasValidIndex()) {
return false;
} else {
return getAddress() < CalleeBF->getAddress();
}
} else {
// Absolute symbol.
auto const CalleeSI = BC.GlobalSymbols.find(CalleeSymbol->getName());
Expand Down Expand Up @@ -2888,6 +2900,9 @@ void BinaryFunction::fixBranches() {
} else {
MIA->replaceBranchTarget(*CondBranch, TSuccessor->getLabel(), Ctx);
}
if (TSuccessor == FSuccessor) {
BB->removeDuplicateConditionalSuccessor(CondBranch);
}
if (!NextBB || (NextBB != TSuccessor && NextBB != FSuccessor)) {
BB->addBranchInstruction(FSuccessor);
}
Expand Down
15 changes: 10 additions & 5 deletions bolt/BinaryFunction.h
Expand Up @@ -337,11 +337,6 @@ class BinaryFunction : public AddressRangesOwner {
return BB->getIndex();
}

BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) const {
auto I = LabelToBB.find(Label);
return I == LabelToBB.end() ? nullptr : I->second;
}

/// Return basic block that originally contained offset \p Offset
/// from the function start.
BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);
Expand Down Expand Up @@ -913,6 +908,16 @@ class BinaryFunction : public AddressRangesOwner {
/// fixBranches().
DynoStats getDynoStats() const;

BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) {
auto I = LabelToBB.find(Label);
return I == LabelToBB.end() ? nullptr : I->second;
}

const BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) const {
auto I = LabelToBB.find(Label);
return I == LabelToBB.end() ? nullptr : I->second;
}

/// Returns the basic block after the given basic block in the layout or
/// nullptr the last basic block is given.
const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB) const {
Expand Down
51 changes: 36 additions & 15 deletions bolt/BinaryPassManager.cpp
Expand Up @@ -13,6 +13,7 @@
#include "Passes/FrameOptimizer.h"
#include "Passes/Inliner.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <numeric>

using namespace llvm;
Expand All @@ -21,10 +22,17 @@ namespace opts {

extern cl::OptionCategory BoltOptCategory;

extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> PrintAll;
extern cl::opt<bool> DumpDotAll;
extern cl::opt<bool> DynoStatsAll;

static cl::opt<bool>
ICF("icf",
cl::desc("fold functions with identical code"),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));

static cl::opt<bool>
EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"),
Expand Down Expand Up @@ -223,6 +231,10 @@ void BinaryFunctionPassManager::runPasses() {

auto &Pass = OptPassPair.second;

if (opts::Verbosity > 0) {
outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
}

NamedRegionTimer T(Pass->getName(), TimerGroupName, TimeOpts);

callWithDynoStats(
Expand All @@ -247,6 +259,10 @@ void BinaryFunctionPassManager::runPasses() {
exit(1);
}

if (opts::Verbosity > 0) {
outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
}

if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
continue;

Expand Down Expand Up @@ -282,7 +298,8 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(llvm::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);

Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);

Manager.registerPass(llvm::make_unique<IndirectCallPromotion>(PrintICP),
opts::IndirectCallPromotion);
Expand All @@ -301,7 +318,8 @@ void BinaryFunctionPassManager::runAllPasses(
llvm::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads),
opts::SimplifyRODataLoads);

Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);

Manager.registerPass(llvm::make_unique<ReorderBasicBlocks>(PrintReordered));

Expand All @@ -320,27 +338,30 @@ void BinaryFunctionPassManager::runAllPasses(
Manager.registerPass(llvm::make_unique<FrameOptimizerPass>(PrintFOP),
OptimizeFrameAccesses);

// This pass should come close to last since it uses the estimated hot
// size of a function to determine the order. It should definitely
// also happen after any changes to the call graph are made, e.g. inlining.
Manager.registerPass(
llvm::make_unique<ReorderFunctions>(PrintReorderedFunctions));

// This pass introduces conditional jumps into external functions.
// Between extending CFG to support this and isolating this pass we chose
// the latter. Thus this pass will do unreachable code elimination
// if necessary and wouldn't rely on UCE for this.
// More generally this pass should be the last optimization pass.
// the latter. Thus this pass will do double jump removal and unreachable
// code elimination if necessary and won't rely on peepholes/UCE for these
// optimizations.
// More generally this pass should be the last optimization pass that
// modifies branches/control flow. This pass is run after function
// reordering so that it can tell whether calls are forward/backward
// accurately.
Manager.registerPass(
llvm::make_unique<SimplifyConditionalTailCalls>(PrintSCTC),
opts::SimplifyConditionalTailCalls);

Manager.registerPass(llvm::make_unique<Peepholes>(PrintPeepholes),
opts::Peepholes);

Manager.registerPass(
llvm::make_unique<EliminateUnreachableBlocks>(PrintUCE),
opts::EliminateUnreachable);

Manager.registerPass(
llvm::make_unique<ReorderFunctions>(PrintReorderedFunctions));

// This pass should always run last.*
Manager.registerPass(llvm::make_unique<FinalizeFunctions>(PrintFinalized));

// *except for this pass. TODO: figure out why moving this before function
// reordering breaks things badly.
Manager.registerPass(
llvm::make_unique<InstructionLowering>(PrintAfterLowering));

Expand Down

0 comments on commit 6c5c65e

Please sign in to comment.