Skip to content

Commit

Permalink
[BOLT] Add shrink wrapping pass
Browse files Browse the repository at this point in the history
Summary:
Add an implementation for shrink wrapping, a frame optimization
that moves callee-saved register spills from hot prologues to cold
successors.

(cherry picked from FBD4983706)
  • Loading branch information
rafaelauler authored and maksfb committed May 1, 2017
1 parent 4b485f4 commit d850ca3
Show file tree
Hide file tree
Showing 32 changed files with 3,608 additions and 843 deletions.
37 changes: 35 additions & 2 deletions bolt/BinaryBasicBlock.cpp
Expand Up @@ -148,8 +148,9 @@ BinaryBasicBlock *BinaryBasicBlock::getLandingPad(const MCSymbol *Label) const {
}

int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const {
assert(getFunction()->getState() == BinaryFunction::State::CFG &&
"can only calculate CFI state when function is in active CFG state");
assert(
getFunction()->getState() >= BinaryFunction::State::CFG &&
"can only calculate CFI state when function is in or past the CFG state");

const auto &FDEProgram = getFunction()->getFDEProgram();

Expand Down Expand Up @@ -316,6 +317,38 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB,
return MIA->analyzeBranch(Instructions, TBB, FBB, CondBranch, UncondBranch);
}

MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
auto &BC = Function->getBinaryContext();
auto Itr = rbegin();
bool Check = Pos ? false : true;
MCInst *FirstTerminator{nullptr};
while (Itr != rend()) {
if (!Check) {
if (&*Itr == Pos)
Check = true;
++Itr;
continue;
}
if (BC.MIA->isTerminator(*Itr))
FirstTerminator = &*Itr;
++Itr;
}
return FirstTerminator;
}

bool BinaryBasicBlock::hasTerminatorAfter(MCInst *Pos) {
auto &BC = Function->getBinaryContext();
auto Itr = rbegin();
while (Itr != rend()) {
if (&*Itr == Pos)
return false;
if (BC.MIA->isTerminator(*Itr))
return true;
++Itr;
}
return false;
}

bool BinaryBasicBlock::swapConditionalSuccessors() {
if (succ_size() != 2)
return false;
Expand Down
43 changes: 33 additions & 10 deletions bolt/BinaryBasicBlock.h
Expand Up @@ -617,20 +617,26 @@ class BinaryBasicBlock {
return Instructions.erase(II);
}

/// Retrieve iterator for \p Inst or return end iterator if instruction is not
/// from this basic block.
decltype(Instructions)::iterator findInstruction(const MCInst *Inst) {
if (Instructions.empty())
return Instructions.end();
size_t Index = Inst - &Instructions[0];
return Index >= Instructions.size() ? Instructions.end()
: Instructions.begin() + Index;
}

/// Replace an instruction with a sequence of instructions. Returns true
/// if the instruction to be replaced was found and replaced.
template <typename Itr>
bool replaceInstruction(const MCInst *Inst, Itr Begin, Itr End) {
auto I = Instructions.end();
auto B = Instructions.begin();
while (I > B) {
--I;
if (&*I == Inst) {
adjustNumPseudos(*Inst, -1);
Instructions.insert(Instructions.erase(I), Begin, End);
adjustNumPseudos(Begin, End, 1);
return true;
}
auto I = findInstruction(Inst);
if (I != Instructions.end()) {
adjustNumPseudos(*Inst, -1);
Instructions.insert(Instructions.erase(I), Begin, End);
adjustNumPseudos(Begin, End, 1);
return true;
}
return false;
}
Expand All @@ -640,6 +646,23 @@ class BinaryBasicBlock {
return replaceInstruction(Inst, Replacement.begin(), Replacement.end());
}

/// Insert \p NewInst before \p At, which must be an existing instruction in
/// this BB. Return a pointer to the newly inserted instruction.
iterator insertInstruction(iterator At, MCInst &&NewInst) {
adjustNumPseudos(NewInst, 1);
return Instructions.emplace(At, std::move(NewInst));
}

/// Helper to retrieve any terminators in \p BB before \p Pos. This is used
/// to skip CFI instructions and to retrieve the first terminator instruction
/// in basic blocks with two terminators (conditional jump and unconditional
/// jump).
MCInst *getTerminatorBefore(MCInst *Pos);

/// Used to identify whether an instruction is before a terminator and whether
/// moving it to the end of the BB would render it dead code.
bool hasTerminatorAfter(MCInst *Pos);

/// Split apart the instructions in this basic block starting at Inst.
/// The instructions following Inst are removed and returned in a vector.
std::vector<MCInst> splitInstructions(const MCInst *Inst) {
Expand Down
71 changes: 52 additions & 19 deletions bolt/BinaryContext.cpp
Expand Up @@ -239,24 +239,57 @@ void BinaryContext::preprocessDebugInfo(
}
}

void BinaryContext::printCFI(raw_ostream &OS, uint32_t Operation) {
switch(Operation) {
case MCCFIInstruction::OpSameValue: OS << "OpSameValue"; break;
case MCCFIInstruction::OpRememberState: OS << "OpRememberState"; break;
case MCCFIInstruction::OpRestoreState: OS << "OpRestoreState"; break;
case MCCFIInstruction::OpOffset: OS << "OpOffset"; break;
case MCCFIInstruction::OpDefCfaRegister: OS << "OpDefCfaRegister"; break;
case MCCFIInstruction::OpDefCfaOffset: OS << "OpDefCfaOffset"; break;
case MCCFIInstruction::OpDefCfa: OS << "OpDefCfa"; break;
case MCCFIInstruction::OpRelOffset: OS << "OpRelOffset"; break;
case MCCFIInstruction::OpAdjustCfaOffset: OS << "OfAdjustCfaOffset"; break;
case MCCFIInstruction::OpEscape: OS << "OpEscape"; break;
case MCCFIInstruction::OpRestore: OS << "OpRestore"; break;
case MCCFIInstruction::OpUndefined: OS << "OpUndefined"; break;
case MCCFIInstruction::OpRegister: OS << "OpRegister"; break;
case MCCFIInstruction::OpWindowSave: OS << "OpWindowSave"; break;
case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break;
default: OS << "Op#" << Operation; break;
void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
uint32_t Operation = Inst.getOperation();
switch (Operation) {
case MCCFIInstruction::OpSameValue:
OS << "OpSameValue Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpRememberState:
OS << "OpRememberState";
break;
case MCCFIInstruction::OpRestoreState:
OS << "OpRestoreState";
break;
case MCCFIInstruction::OpOffset:
OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister:
OS << "OpDefCfaRegister Reg" << Inst.getRegister();
break;
case MCCFIInstruction::OpDefCfaOffset:
OS << "OpDefCfaOffset " << Inst.getOffset();
break;
case MCCFIInstruction::OpDefCfa:
OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
break;
case MCCFIInstruction::OpRelOffset:
OS << "OpRelOffset";
break;
case MCCFIInstruction::OpAdjustCfaOffset:
OS << "OfAdjustCfaOffset";
break;
case MCCFIInstruction::OpEscape:
OS << "OpEscape";
break;
case MCCFIInstruction::OpRestore:
OS << "OpRestore";
break;
case MCCFIInstruction::OpUndefined:
OS << "OpUndefined";
break;
case MCCFIInstruction::OpRegister:
OS << "OpRegister";
break;
case MCCFIInstruction::OpWindowSave:
OS << "OpWindowSave";
break;
case MCCFIInstruction::OpGnuArgsSize:
OS << "OpGnuArgsSize";
break;
default:
OS << "Op#" << Operation;
break;
}
}

Expand All @@ -274,7 +307,7 @@ void BinaryContext::printInstruction(raw_ostream &OS,
uint32_t Offset = Instruction.getOperand(0).getImm();
OS << "\t!CFI\t$" << Offset << "\t; ";
if (Function)
printCFI(OS, Function->getCFIFor(Instruction)->getOperation());
printCFI(OS, *Function->getCFIFor(Instruction));
OS << "\n";
return;
}
Expand Down
19 changes: 18 additions & 1 deletion bolt/BinaryContext.h
Expand Up @@ -143,6 +143,12 @@ class BinaryContext {

const DataReader &DR;

/// Sum of execution count of all functions
uint64_t SumExecutionCount{0};

/// Number of functions with profile information
uint64_t NumProfiledFuncs{0};

BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<DWARFContext> DwCtx,
std::unique_ptr<Triple> TheTriple,
Expand Down Expand Up @@ -262,8 +268,19 @@ class BinaryContext {
return Size;
}

/// Return a function execution count threshold for determining whether the
/// the function is 'hot'. Consider it hot if count is above the average exec
/// count of profiled functions.
uint64_t getHotThreshold() const {
static uint64_t Threshold{0};
if (Threshold == 0) {
Threshold = NumProfiledFuncs ? SumExecutionCount / NumProfiledFuncs : 1;
}
return Threshold;
}

/// Print the string name for a CFI operation.
static void printCFI(raw_ostream &OS, uint32_t Operation);
static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);

/// Print a single MCInst in native format. If Function is non-null,
/// the instruction will be annotated with CFI and possibly DWARF line table
Expand Down
71 changes: 67 additions & 4 deletions bolt/BinaryFunction.cpp
Expand Up @@ -150,7 +150,7 @@ constexpr unsigned NoRegister = 0;

constexpr const char *DynoStats::Desc[];
constexpr unsigned BinaryFunction::MinAlign;

namespace {

/// Gets debug line information for the instruction located at the given
Expand Down Expand Up @@ -535,16 +535,15 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation,
for (auto &Elmt : OffsetToCFI) {
OS << format(" %08x:\t", Elmt.first);
assert(Elmt.second < FrameInstructions.size() && "Incorrect CFI offset");
BinaryContext::printCFI(OS,
FrameInstructions[Elmt.second].getOperation());
BinaryContext::printCFI(OS, FrameInstructions[Elmt.second]);
OS << "\n";
}
} else {
// Post-buildCFG information
for (uint32_t I = 0, E = FrameInstructions.size(); I != E; ++I) {
const MCCFIInstruction &CFI = FrameInstructions[I];
OS << format(" %d:\t", I);
BinaryContext::printCFI(OS, CFI.getOperation());
BinaryContext::printCFI(OS, CFI);
OS << "\n";
}
}
Expand Down Expand Up @@ -3442,6 +3441,54 @@ void BinaryFunction::updateLayout(LayoutType Type,
updateLayoutIndices();
}

bool BinaryFunction::replaceJumpTableEntryIn(BinaryBasicBlock *BB,
BinaryBasicBlock *OldDest,
BinaryBasicBlock *NewDest) {
auto *Instr = BB->getLastNonPseudoInstr();
if (!Instr || !BC.MIA->isIndirectBranch(*Instr))
return false;
auto JTAddress = BC.MIA->getJumpTable(*Instr);
assert(JTAddress && "Invalid jump table address");
auto *JT = getJumpTableContainingAddress(JTAddress);
assert(JT && "No jump table structure for this indirect branch");
bool Patched = JT->replaceDestination(JTAddress, OldDest->getLabel(),
NewDest->getLabel());
assert(Patched && "Invalid entry to be replaced in jump table");
return true;
}

BinaryBasicBlock *BinaryFunction::splitEdge(BinaryBasicBlock *From,
BinaryBasicBlock *To) {
// Create intermediate BB
MCSymbol *Tmp = BC.Ctx->createTempSymbol("SplitEdge", true);
auto NewBB = createBasicBlock(0, Tmp);
auto NewBBPtr = NewBB.get();

// Update "From" BB
auto I = From->succ_begin();
auto BI = From->branch_info_begin();
for (; I != From->succ_end(); ++I) {
if (*I == To)
break;
++BI;
}
assert(I != From->succ_end() && "Invalid CFG edge in splitEdge!");
uint64_t OrigCount{BI->Count};
uint64_t OrigMispreds{BI->MispredictedCount};
replaceJumpTableEntryIn(From, To, NewBBPtr);
From->replaceSuccessor(To, NewBBPtr, OrigCount, OrigMispreds);

NewBB->addSuccessor(To, OrigCount, OrigMispreds);
NewBB->setExecutionCount(OrigCount);
NewBB->setIsCold(From->isCold());

// Update CFI and BB layout with new intermediate BB
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
NewBBs.emplace_back(std::move(NewBB));
insertBasicBlocks(From, std::move(NewBBs), true, true);
return NewBBPtr;
}

bool BinaryFunction::isSymbolValidInScope(const SymbolRef &Symbol,
uint64_t SymbolSize) const {
// Some symbols are tolerated inside function bodies, others are not.
Expand Down Expand Up @@ -3578,6 +3625,22 @@ BinaryFunction::JumpTable::getEntriesForAddress(const uint64_t Addr) const {
return std::make_pair(StartIndex, EndIndex);
}

bool BinaryFunction::JumpTable::replaceDestination(uint64_t JTAddress,
const MCSymbol *OldDest,
MCSymbol *NewDest) {
bool Patched{false};
const auto Range = getEntriesForAddress(JTAddress);
for (auto I = &Entries[Range.first], E = &Entries[Range.second];
I != E; ++I) {
auto &Entry = *I;
if (Entry == OldDest) {
Patched = true;
Entry = NewDest;
}
}
return Patched;
}

void BinaryFunction::JumpTable::updateOriginal(BinaryContext &BC) {
// In non-relocation mode we have to emit jump tables in local sections.
// This way we only overwrite them when a corresponding function is
Expand Down
20 changes: 20 additions & 0 deletions bolt/BinaryFunction.h
Expand Up @@ -624,6 +624,11 @@ class BinaryFunction {
/// Total number of times this jump table was used.
uint64_t Count{0};

/// Change all entries of the jump table in \p JTAddress pointing to
/// \p OldDest to \p NewDest. Return false if unsuccessful.
bool replaceDestination(uint64_t JTAddress, const MCSymbol *OldDest,
MCSymbol *NewDest);

/// Update jump table at its original location.
void updateOriginal(BinaryContext &BC);

Expand Down Expand Up @@ -1368,6 +1373,21 @@ class BinaryFunction {
/// new blocks into the CFG. This must be called after updateLayout.
void updateCFIState(BinaryBasicBlock *Start, const unsigned NumNewBlocks);

/// Change \p OrigDest to \p NewDest in the jump table used at the end of
/// \p BB. Returns false if \p OrigDest couldn't be find as a valid target
/// and no replacement took place.
bool replaceJumpTableEntryIn(BinaryBasicBlock *BB,
BinaryBasicBlock *OldDest,
BinaryBasicBlock *NewDest);

/// Split the CFG edge <From, To> by inserting an intermediate basic block.
/// Returns a pointer to this new intermediate basic block. BB "From" will be
/// updated to jump to the intermediate block, which in turn will have an
/// unconditional branch to BB "To".
/// User needs to manually call fixBranches(). This function only creates the
/// correct CFG edges.
BinaryBasicBlock *splitEdge(BinaryBasicBlock *From, BinaryBasicBlock *To);

/// Determine direction of the branch based on the current layout.
/// Callee is responsible of updating basic block indices prior to using
/// this function (e.g. by calling BinaryFunction::updateLayoutIndices()).
Expand Down

0 comments on commit d850ca3

Please sign in to comment.