45 changes: 26 additions & 19 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include <algorithm>
#include <functional>
#include <iterator>
#include <numeric>
#include <unordered_set>

using namespace llvm;
Expand Down Expand Up @@ -2189,27 +2190,31 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
// Create symbols in the LocalCtx so that they get destroyed with it.
MCSymbol *StartLabel = LocalCtx->createTempSymbol();
MCSymbol *EndLabel = LocalCtx->createTempSymbol();
MCSymbol *ColdStartLabel = LocalCtx->createTempSymbol();
MCSymbol *ColdEndLabel = LocalCtx->createTempSymbol();

Streamer->switchSection(Section);
Streamer->emitLabel(StartLabel);
emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/false,
emitFunctionBody(*Streamer, BF, BF.getLayout().getMainFragment(),
/*EmitCodeOnly=*/true);
Streamer->emitLabel(EndLabel);

if (BF.isSplit()) {
MCSectionELF *ColdSection =
LocalCtx->getELFSection(BF.getColdCodeSectionName(), ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
ColdSection->setHasInstructions(true);

Streamer->switchSection(ColdSection);
Streamer->emitLabel(ColdStartLabel);
emitFunctionBody(*Streamer, BF, /*EmitColdPart=*/true,
/*EmitCodeOnly=*/true);
Streamer->emitLabel(ColdEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is private
using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
SmallVector<LabelRange> SplitLabels;
for (const FunctionFragment FF : BF.getLayout().getSplitFragments()) {
MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
SplitLabels.emplace_back(SplitStartLabel, SplitEndLabel);

MCSectionELF *const SplitSection = LocalCtx->getELFSection(
BF.getCodeSectionName(FF.getFragmentNum()), ELF::SHT_PROGBITS,
ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
SplitSection->setHasInstructions(true);
Streamer->switchSection(SplitSection);

Streamer->emitLabel(SplitStartLabel);
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
Streamer->emitLabel(SplitEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
// private
Streamer->emitBytes(StringRef(""));
Streamer->switchSection(Section);
}
Expand All @@ -2225,10 +2230,12 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {

const uint64_t HotSize =
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
const uint64_t ColdSize = BF.isSplit()
? Layout.getSymbolOffset(*ColdEndLabel) -
Layout.getSymbolOffset(*ColdStartLabel)
: 0ULL;
const uint64_t ColdSize =
std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
[&](const uint64_t Accu, const LabelRange &Labels) {
return Accu + Layout.getSymbolOffset(*Labels.second) -
Layout.getSymbolOffset(*Labels.first);
});

// Clean-up the effect of the code emission.
for (const MCSymbol &Symbol : Assembler.symbols()) {
Expand Down
74 changes: 43 additions & 31 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@ class BinaryEmitter {

/// Emit function code. The caller is responsible for emitting function
/// symbol(s) and setting the section to emit the code to.
void emitFunctionBody(BinaryFunction &BF, bool EmitColdPart,
void emitFunctionBody(BinaryFunction &BF, const FunctionFragment &FF,
bool EmitCodeOnly = false);

private:
/// Emit function code.
void emitFunctions();

/// Emit a single function.
bool emitFunction(BinaryFunction &BF, bool EmitColdPart);
bool emitFunction(BinaryFunction &BF, const FunctionFragment &FF);

/// Helper for emitFunctionBody to write data inside a function
/// (used for AArch64)
Expand Down Expand Up @@ -234,13 +234,24 @@ void BinaryEmitter::emitFunctions() {
!Function->hasValidProfile())
Streamer.setAllowAutoPadding(false);

Emitted |= emitFunction(*Function, /*EmitColdPart=*/false);
const FunctionLayout &Layout = Function->getLayout();
Emitted |= emitFunction(*Function, Layout.getMainFragment());

if (Function->isSplit()) {
if (opts::X86AlignBranchBoundaryHotOnly)
Streamer.setAllowAutoPadding(false);
Emitted |= emitFunction(*Function, /*EmitColdPart=*/true);

assert((Layout.fragment_size() == 1 || Function->isSimple()) &&
"Only simple functions can have fragments");
for (const FunctionFragment FF : Layout.getSplitFragments()) {
// Skip empty fragments so no symbols and sections for empty fragments
// are generated
if (FF.empty() && !Function->hasConstantIsland())
continue;
Emitted |= emitFunction(*Function, FF);
}
}

Streamer.setAllowAutoPadding(OriginalAllowAutoPadding);

if (Emitted)
Expand Down Expand Up @@ -268,16 +279,16 @@ void BinaryEmitter::emitFunctions() {
}
}

bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
bool BinaryEmitter::emitFunction(BinaryFunction &Function,
const FunctionFragment &FF) {
if (Function.size() == 0 && !Function.hasIslandsInfo())
return false;

if (Function.getState() == BinaryFunction::State::Empty)
return false;

MCSection *Section =
BC.getCodeSection(EmitColdPart ? Function.getColdCodeSectionName()
: Function.getCodeSectionName());
BC.getCodeSection(Function.getCodeSectionName(FF.getFragmentNum()));
Streamer.switchSection(Section);
Section->setHasInstructions(true);
BC.Ctx->addGenDwarfSection(Section);
Expand All @@ -290,8 +301,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
Section->setAlignment(Align(opts::AlignFunctions));

Streamer.emitCodeAlignment(BinaryFunction::MinAlign, &*BC.STI);
uint16_t MaxAlignBytes = EmitColdPart ? Function.getMaxColdAlignmentBytes()
: Function.getMaxAlignmentBytes();
uint16_t MaxAlignBytes = FF.isSplitFragment()
? Function.getMaxColdAlignmentBytes()
: Function.getMaxAlignmentBytes();
if (MaxAlignBytes > 0)
Streamer.emitCodeAlignment(Function.getAlignment(), &*BC.STI,
MaxAlignBytes);
Expand All @@ -302,17 +314,15 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
MCContext &Context = Streamer.getContext();
const MCAsmInfo *MAI = Context.getAsmInfo();

MCSymbol *StartSymbol = nullptr;
MCSymbol *const StartSymbol = Function.getSymbol(FF.getFragmentNum());

// Emit all symbols associated with the main function entry.
if (!EmitColdPart) {
StartSymbol = Function.getSymbol();
if (FF.isMainFragment()) {
for (MCSymbol *Symbol : Function.getSymbols()) {
Streamer.emitSymbolAttribute(Symbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(Symbol);
}
} else {
StartSymbol = Function.getColdSymbol();
Streamer.emitSymbolAttribute(StartSymbol, MCSA_ELF_TypeFunction);
Streamer.emitLabel(StartSymbol);
}
Expand All @@ -323,8 +333,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
if (Function.getPersonalityFunction() != nullptr)
Streamer.emitCFIPersonality(Function.getPersonalityFunction(),
Function.getPersonalityEncoding());
MCSymbol *LSDASymbol =
EmitColdPart ? Function.getColdLSDASymbol() : Function.getLSDASymbol();
MCSymbol *LSDASymbol = FF.isSplitFragment()
? Function.getColdLSDASymbol(FF.getFragmentNum())
: Function.getLSDASymbol();
if (LSDASymbol)
Streamer.emitCFILsda(LSDASymbol, BC.LSDAEncoding);
else
Expand Down Expand Up @@ -353,7 +364,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
}

// Emit code.
emitFunctionBody(Function, EmitColdPart, /*EmitCodeOnly=*/false);
emitFunctionBody(Function, FF, /*EmitCodeOnly=*/false);

// Emit padding if requested.
if (size_t Padding = opts::padFunction(Function)) {
Expand All @@ -369,8 +380,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
if (Function.hasCFI())
Streamer.emitCFIEndProc();

MCSymbol *EndSymbol = EmitColdPart ? Function.getFunctionColdEndLabel()
: Function.getFunctionEndLabel();
MCSymbol *EndSymbol = Function.getFunctionEndLabel(FF.getFragmentNum());
Streamer.emitLabel(EndSymbol);

if (MAI->hasDotTypeDotSizeDirective()) {
Expand All @@ -384,21 +394,22 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, bool EmitColdPart) {
emitLineInfoEnd(Function, EndSymbol);

// Exception handling info for the function.
emitLSDA(Function, EmitColdPart);
emitLSDA(Function, FF.isSplitFragment());

if (!EmitColdPart && opts::JumpTables > JTS_NONE)
if (FF.isMainFragment() && opts::JumpTables > JTS_NONE)
emitJumpTables(Function);

return true;
}

void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, bool EmitColdPart,
void BinaryEmitter::emitFunctionBody(BinaryFunction &BF,
const FunctionFragment &FF,
bool EmitCodeOnly) {
if (!EmitCodeOnly && EmitColdPart && BF.hasConstantIsland())
if (!EmitCodeOnly && FF.isSplitFragment() && BF.hasConstantIsland()) {
assert(BF.getLayout().isHotColdSplit() &&
"Constant island support only with hot/cold split");
BF.duplicateConstantIslands();

const FunctionFragment FF = BF.getLayout().getFragment(
EmitColdPart ? FragmentNum::cold() : FragmentNum::hot());
}

if (!FF.empty() && FF.front()->isLandingPad()) {
assert(!FF.front()->isEntryPoint() &&
Expand Down Expand Up @@ -488,7 +499,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, bool EmitColdPart,
}

if (!EmitCodeOnly)
emitConstantIslands(BF, EmitColdPart);
emitConstantIslands(BF, FF.isSplitFragment());
}

void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart,
Expand Down Expand Up @@ -897,14 +908,15 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, bool EmitColdPart) {
Streamer.emitValueToAlignment(TTypeAlignment);

// Emit the LSDA label.
MCSymbol *LSDASymbol =
EmitColdPart ? BF.getColdLSDASymbol() : BF.getLSDASymbol();
MCSymbol *LSDASymbol = EmitColdPart
? BF.getColdLSDASymbol(FragmentNum::cold())
: BF.getLSDASymbol();
assert(LSDASymbol && "no LSDA symbol set");
Streamer.emitLabel(LSDASymbol);

// Corresponding FDE start.
const MCSymbol *StartSymbol =
EmitColdPart ? BF.getColdSymbol() : BF.getSymbol();
BF.getSymbol(EmitColdPart ? FragmentNum::cold() : FragmentNum::main());

// Emit the LSDA header.

Expand Down Expand Up @@ -1148,9 +1160,9 @@ void emitBinaryContext(MCStreamer &Streamer, BinaryContext &BC,
}

void emitFunctionBody(MCStreamer &Streamer, BinaryFunction &BF,
bool EmitColdPart, bool EmitCodeOnly) {
const FunctionFragment &FF, bool EmitCodeOnly) {
BinaryEmitter(Streamer, BF.getBinaryContext())
.emitFunctionBody(BF, EmitColdPart, EmitCodeOnly);
.emitFunctionBody(BF, FF, EmitCodeOnly);
}

} // namespace bolt
Expand Down
115 changes: 70 additions & 45 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -1572,8 +1573,9 @@ bool BinaryFunction::scanExternalRefs() {
if (BC.HasRelocations) {
for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
BC.UndefinedSymbols.insert(LI.second);
if (FunctionEndLabel)
BC.UndefinedSymbols.insert(FunctionEndLabel);
for (MCSymbol *const EndLabel : FunctionEndLabels)
if (EndLabel)
BC.UndefinedSymbols.insert(EndLabel);
}

clearList(Relocations);
Expand Down Expand Up @@ -2843,8 +2845,9 @@ void BinaryFunction::clearDisasmState() {
if (BC.HasRelocations) {
for (std::pair<const uint32_t, MCSymbol *> &LI : Labels)
BC.UndefinedSymbols.insert(LI.second);
if (FunctionEndLabel)
BC.UndefinedSymbols.insert(FunctionEndLabel);
for (MCSymbol *const EndLabel : FunctionEndLabels)
if (EndLabel)
BC.UndefinedSymbols.insert(EndLabel);
}
}

Expand Down Expand Up @@ -3260,8 +3263,8 @@ void BinaryFunction::fixBranches() {
// could be "short", then prioritize short for "taken". This will
// generate a sequence 1 byte shorter on x86.
if (IsSupported && BC.isX86() &&
TSuccessor->isCold() != FSuccessor->isCold() &&
BB->isCold() != TSuccessor->isCold()) {
TSuccessor->getFragmentNum() != FSuccessor->getFragmentNum() &&
BB->getFragmentNum() != TSuccessor->getFragmentNum()) {
std::swap(TSuccessor, FSuccessor);
{
auto L = BC.scopeLock();
Expand Down Expand Up @@ -4035,9 +4038,6 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
}

const uint64_t BaseAddress = getCodeSection()->getOutputAddress();
ErrorOr<BinarySection &> ColdSection = getColdCodeSection();
const uint64_t ColdBaseAddress =
isSplit() ? ColdSection->getOutputAddress() : 0;
if (BC.HasRelocations || isInjected()) {
const uint64_t StartOffset = Layout.getSymbolOffset(*getSymbol());
const uint64_t EndOffset = Layout.getSymbolOffset(*getFunctionEndLabel());
Expand All @@ -4049,20 +4049,35 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
setOutputDataAddress(BaseAddress + DataOffset);
}
if (isSplit()) {
const MCSymbol *ColdStartSymbol = getColdSymbol();
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
"split function should have defined cold symbol");
const MCSymbol *ColdEndSymbol = getFunctionColdEndLabel();
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
"split function should have defined cold end symbol");
const uint64_t ColdStartOffset = Layout.getSymbolOffset(*ColdStartSymbol);
const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
cold().setAddress(ColdBaseAddress + ColdStartOffset);
cold().setImageSize(ColdEndOffset - ColdStartOffset);
if (hasConstantIsland()) {
const uint64_t DataOffset =
Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
for (const FunctionFragment &FF : getLayout().getSplitFragments()) {
ErrorOr<BinarySection &> ColdSection =
getCodeSection(FF.getFragmentNum());
// If fragment is empty, cold section might not exist
if (FF.empty() && ColdSection.getError())
continue;
const uint64_t ColdBaseAddress = ColdSection->getOutputAddress();

const MCSymbol *ColdStartSymbol = getSymbol(FF.getFragmentNum());
// If fragment is empty, symbol might have not been emitted
if (FF.empty() && (!ColdStartSymbol || !ColdStartSymbol->isDefined()) &&
!hasConstantIsland())
continue;
assert(ColdStartSymbol && ColdStartSymbol->isDefined() &&
"split function should have defined cold symbol");
const MCSymbol *ColdEndSymbol =
getFunctionEndLabel(FF.getFragmentNum());
assert(ColdEndSymbol && ColdEndSymbol->isDefined() &&
"split function should have defined cold end symbol");
const uint64_t ColdStartOffset =
Layout.getSymbolOffset(*ColdStartSymbol);
const uint64_t ColdEndOffset = Layout.getSymbolOffset(*ColdEndSymbol);
cold().setAddress(ColdBaseAddress + ColdStartOffset);
cold().setImageSize(ColdEndOffset - ColdStartOffset);
if (hasConstantIsland()) {
const uint64_t DataOffset =
Layout.getSymbolOffset(*getFunctionColdConstantIslandLabel());
setOutputColdDataAddress(ColdBaseAddress + DataOffset);
}
}
}
} else {
Expand All @@ -4084,32 +4099,42 @@ void BinaryFunction::updateOutputValues(const MCAsmLayout &Layout) {
if (getLayout().block_empty())
return;

assert((getLayout().isHotColdSplit() ||
(cold().getAddress() == 0 && cold().getImageSize() == 0 &&
BC.HasRelocations)) &&
"Function must be split two ways or cold fragment must have no "
"address (only in relocation mode)");

BinaryBasicBlock *PrevBB = nullptr;
for (BinaryBasicBlock *BB : this->Layout.blocks()) {
assert(BB->getLabel()->isDefined() && "symbol should be defined");
const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress;
if (!BC.HasRelocations) {
if (BB->isCold()) {
assert(BBBaseAddress == cold().getAddress());
} else {
assert(BBBaseAddress == getOutputAddress());
for (const FunctionFragment &FF : getLayout().fragments()) {
const uint64_t FragmentBaseAddress =
getCodeSection(isSimple() ? FF.getFragmentNum() : FragmentNum::main())
->getOutputAddress();
for (BinaryBasicBlock *const BB : FF) {
assert(BB->getLabel()->isDefined() && "symbol should be defined");
if (!BC.HasRelocations) {
if (BB->isSplit()) {
assert(FragmentBaseAddress == cold().getAddress());
} else {
assert(FragmentBaseAddress == getOutputAddress());
}
}
}
const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
const uint64_t BBAddress = BBBaseAddress + BBOffset;
BB->setOutputStartAddress(BBAddress);

if (PrevBB) {
uint64_t PrevBBEndAddress = BBAddress;
if (BB->isCold() != PrevBB->isCold())
PrevBBEndAddress = getOutputAddress() + getOutputSize();
PrevBB->setOutputEndAddress(PrevBBEndAddress);
}
PrevBB = BB;
const uint64_t BBOffset = Layout.getSymbolOffset(*BB->getLabel());
const uint64_t BBAddress = FragmentBaseAddress + BBOffset;
BB->setOutputStartAddress(BBAddress);

if (PrevBB) {
uint64_t PrevBBEndAddress = BBAddress;
if (BB->isSplit() != PrevBB->isSplit())
PrevBBEndAddress = getOutputAddress() + getOutputSize();
PrevBB->setOutputEndAddress(PrevBBEndAddress);
}
PrevBB = BB;

BB->updateOutputValues(Layout);
BB->updateOutputValues(Layout);
}
}
PrevBB->setOutputEndAddress(PrevBB->isCold()
PrevBB->setOutputEndAddress(PrevBB->isSplit()
? cold().getAddress() + cold().getImageSize()
: getOutputAddress() + getOutputSize());
}
Expand Down
184 changes: 83 additions & 101 deletions bolt/lib/Core/Exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,115 +367,97 @@ void BinaryFunction::updateEHRanges() {
uint64_t Action;
};

// If previous call can throw, this is its exception handler.
EHInfo PreviousEH = {nullptr, 0};

// Marker for the beginning of exceptions range.
const MCSymbol *StartRange = nullptr;

// Indicates whether the start range is located in a cold part.
bool IsStartInCold = false;

// Have we crossed hot/cold border for split functions?
bool SeenCold = false;

// Sites to update - either regular or cold.
CallSitesType *Sites = &CallSites;
for (const FunctionFragment FF : getLayout().fragments()) {
// Sites to update - either regular or cold.
CallSitesType &Sites = FF.isMainFragment() ? CallSites : ColdCallSites;

// If previous call can throw, this is its exception handler.
EHInfo PreviousEH = {nullptr, 0};

// Marker for the beginning of exceptions range.
const MCSymbol *StartRange = nullptr;

for (BinaryBasicBlock *const BB : FF) {
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (!BC.MIB->isCall(*II))
continue;

// Instruction can throw an exception that should be handled.
const bool Throws = BC.MIB->isInvoke(*II);

// Ignore the call if it's a continuation of a no-throw gap.
if (!Throws && !StartRange)
continue;

assert(getLayout().isHotColdSplit() &&
"Exceptions only supported for hot/cold splitting");

// Extract exception handling information from the instruction.
const MCSymbol *LP = nullptr;
uint64_t Action = 0;
if (const Optional<MCPlus::MCLandingPad> EHInfo =
BC.MIB->getEHInfo(*II))
std::tie(LP, Action) = *EHInfo;

// No action if the exception handler has not changed.
if (Throws && StartRange && PreviousEH.LP == LP &&
PreviousEH.Action == Action)
continue;

// Same symbol is used for the beginning and the end of the range.
const MCSymbol *EHSymbol;
MCInst EHLabel;
{
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
}

for (BinaryBasicBlock *BB : getLayout().blocks()) {
II = std::next(BB->insertPseudoInstr(II, EHLabel));

// At this point we could be in one of the following states:
//
// I. Exception handler has changed and we need to close previous range
// and start a new one.
//
// II. Start a new exception range after the gap.
//
// III. Close current exception range and start a new gap.
const MCSymbol *EndRange;
if (StartRange) {
// I, III:
EndRange = EHSymbol;
} else {
// II:
StartRange = EHSymbol;
EndRange = nullptr;
}

if (BB->isCold() && !SeenCold) {
SeenCold = true;
// Close the previous range.
if (EndRange) {
Sites.emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}

// Close the range (if any) and change the target call sites.
if (StartRange) {
Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
PreviousEH.LP, PreviousEH.Action});
if (Throws) {
// I, II:
StartRange = EHSymbol;
PreviousEH = EHInfo{LP, Action};
} else {
StartRange = nullptr;
}
}
Sites = &ColdCallSites;

// Reset the range.
StartRange = nullptr;
PreviousEH = {nullptr, 0};
}

for (auto II = BB->begin(); II != BB->end(); ++II) {
if (!BC.MIB->isCall(*II))
continue;

// Instruction can throw an exception that should be handled.
const bool Throws = BC.MIB->isInvoke(*II);

// Ignore the call if it's a continuation of a no-throw gap.
if (!Throws && !StartRange)
continue;

// Extract exception handling information from the instruction.
const MCSymbol *LP = nullptr;
uint64_t Action = 0;
if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
std::tie(LP, Action) = *EHInfo;

// No action if the exception handler has not changed.
if (Throws && StartRange && PreviousEH.LP == LP &&
PreviousEH.Action == Action)
continue;

// Same symbol is used for the beginning and the end of the range.
const MCSymbol *EHSymbol;
MCInst EHLabel;
{
std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
}

II = std::next(BB->insertPseudoInstr(II, EHLabel));

// At this point we could be in one of the following states:
//
// I. Exception handler has changed and we need to close previous range
// and start a new one.
//
// II. Start a new exception range after the gap.
//
// III. Close current exception range and start a new gap.
const MCSymbol *EndRange;
if (StartRange) {
// I, III:
EndRange = EHSymbol;
} else {
// II:
StartRange = EHSymbol;
IsStartInCold = SeenCold;
EndRange = nullptr;
}

// Close the previous range.
if (EndRange) {
Sites->emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}

if (Throws) {
// I, II:
StartRange = EHSymbol;
IsStartInCold = SeenCold;
PreviousEH = EHInfo{LP, Action};
} else {
StartRange = nullptr;
}
// Check if we need to close the range.
if (StartRange) {
assert((FF.isMainFragment() || &Sites == &ColdCallSites) &&
"sites mismatch");
const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
Sites.emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}
}

// Check if we need to close the range.
if (StartRange) {
assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
const MCSymbol *EndRange =
IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
Sites->emplace_back(
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}
}

const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
Expand Down
20 changes: 19 additions & 1 deletion bolt/lib/Core/FunctionLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/edit_distance.h"
#include <algorithm>
#include <cstddef>
#include <functional>

using namespace llvm;
Expand Down Expand Up @@ -61,18 +62,35 @@ void FunctionLayout::eraseBasicBlocks(
NewFragments.emplace_back(0);
for (const FunctionFragment FF : fragments()) {
unsigned ErasedBlocks = count_if(FF, IsErased);
// Only add the fragment if it is non-empty after removing blocks.
unsigned NewFragment = NewFragments.back() + FF.size() - ErasedBlocks;
NewFragments.emplace_back(NewFragment);
}
llvm::erase_if(Blocks, IsErased);
Fragments = std::move(NewFragments);

// Remove empty fragments at the end
const_iterator EmptyTailBegin =
llvm::find_if_not(reverse(fragments()), [](const FunctionFragment &FF) {
return FF.empty();
}).base();
if (EmptyTailBegin != fragment_end()) {
// Add +1 for one-past-the-end entry
const FunctionFragment TailBegin = *EmptyTailBegin;
unsigned NewFragmentSize = TailBegin.getFragmentNum().get() + 1;
Fragments.resize(NewFragmentSize);
}

updateLayoutIndices();
}

void FunctionLayout::updateLayoutIndices() const {
unsigned BlockIndex = 0;
for (const FunctionFragment FF : fragments()) {
for (BinaryBasicBlock *const BB : FF)
for (BinaryBasicBlock *const BB : FF) {
BB->setLayoutIndex(BlockIndex++);
BB->setFragmentNum(FF.getFragmentNum());
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/Aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void alignCompact(BinaryFunction &Function, const MCCodeEmitter *Emitter) {
size_t ColdSize = 0;

for (const BinaryBasicBlock &BB : Function)
if (BB.isCold())
if (BB.isSplit())
ColdSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
else
HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter);
Expand Down
8 changes: 4 additions & 4 deletions bolt/lib/Passes/BinaryFunctionCallGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {

BinaryFunctionCallGraph
buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
bool IncludeColdCalls, bool UseFunctionHotSize,
bool IncludeSplitCalls, bool UseFunctionHotSize,
bool UseSplitHotSize, bool UseEdgeCounts,
bool IgnoreRecursiveCalls) {
NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
Expand Down Expand Up @@ -216,16 +216,16 @@ buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
}
} else {
for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
// Don't count calls from cold blocks unless requested.
if (BB->isCold() && !IncludeColdCalls)
// Don't count calls from split blocks unless requested.
if (BB->isSplit() && !IncludeSplitCalls)
continue;

// Determine whether the block is included in Function's (hot) size
// See BinaryFunction::estimateHotSize
bool BBIncludedInFunctionSize = false;
if (UseFunctionHotSize && Function->isSplit()) {
if (UseSplitHotSize)
BBIncludedInFunctionSize = !BB->isCold();
BBIncludedInFunctionSize = !BB->isSplit();
else
BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
} else {
Expand Down
60 changes: 29 additions & 31 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,41 +592,39 @@ void LowerAnnotations::runOnFunctions(BinaryContext &BC) {

for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &BF = It.second;
int64_t CurrentGnuArgsSize = 0;

// Have we crossed hot/cold border for split functions?
bool SeenCold = false;

for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
if (BB->isCold() && !SeenCold) {
SeenCold = true;
CurrentGnuArgsSize = 0;
}

// First convert GnuArgsSize annotations into CFIs. This may change instr
// pointers, so do it before recording ptrs for preserved annotations
if (BF.usesGnuArgsSize()) {
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (!BC.MIB->isInvoke(*II))
continue;
const int64_t NewGnuArgsSize = BC.MIB->getGnuArgsSize(*II);
assert(NewGnuArgsSize >= 0 && "expected non-negative GNU_args_size");
if (NewGnuArgsSize != CurrentGnuArgsSize) {
auto InsertII = BF.addCFIInstruction(
BB, II,
MCCFIInstruction::createGnuArgsSize(nullptr, NewGnuArgsSize));
CurrentGnuArgsSize = NewGnuArgsSize;
II = std::next(InsertII);
for (const FunctionFragment FF : BF.getLayout().fragments()) {
int64_t CurrentGnuArgsSize = 0;

for (BinaryBasicBlock *const BB : FF) {
// First convert GnuArgsSize annotations into CFIs. This may change
// instr pointers, so do it before recording ptrs for preserved
// annotations
if (BF.usesGnuArgsSize()) {
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (!BC.MIB->isInvoke(*II))
continue;
const int64_t NewGnuArgsSize = BC.MIB->getGnuArgsSize(*II);
assert(NewGnuArgsSize >= 0 &&
"expected non-negative GNU_args_size");
if (NewGnuArgsSize != CurrentGnuArgsSize) {
auto InsertII = BF.addCFIInstruction(
BB, II,
MCCFIInstruction::createGnuArgsSize(nullptr, NewGnuArgsSize));
CurrentGnuArgsSize = NewGnuArgsSize;
II = std::next(InsertII);
}
}
}
}

// Now record preserved annotations separately and then strip annotations.
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (BF.requiresAddressTranslation() && BC.MIB->getOffset(*II))
PreservedOffsetAnnotations.emplace_back(&(*II),
*BC.MIB->getOffset(*II));
BC.MIB->stripAnnotations(*II);
// Now record preserved annotations separately and then strip
// annotations.
for (auto II = BB->begin(); II != BB->end(); ++II) {
if (BF.requiresAddressTranslation() && BC.MIB->getOffset(*II))
PreservedOffsetAnnotations.emplace_back(&(*II),
*BC.MIB->getOffset(*II));
BC.MIB->stripAnnotations(*II);
}
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions bolt/lib/Passes/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
MCSymbol *Entry = JT->Entries[I];
assert(BF.getBasicBlockForLabel(Entry) ||
Entry == BF.getFunctionEndLabel() ||
Entry == BF.getFunctionColdEndLabel());
Entry == BF.getFunctionEndLabel(FragmentNum::cold()));
if (Entry == BF.getFunctionEndLabel() ||
Entry == BF.getFunctionColdEndLabel())
Entry == BF.getFunctionEndLabel(FragmentNum::cold()))
continue;
const Location To(Entry);
const BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(Entry);
Expand Down
3 changes: 2 additions & 1 deletion bolt/lib/Passes/LoopInversionPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ bool LoopInversionPass::runOnFunction(BinaryFunction &BF) {
const unsigned BBIndex = BB->getLayoutIndex();
const unsigned SuccBBIndex = SuccBB->getLayoutIndex();
if (SuccBB == PredBB && BB != SuccBB && BBIndex != 0 && SuccBBIndex != 0 &&
SuccBB->succ_size() == 2 && BB->isCold() == SuccBB->isCold()) {
SuccBB->succ_size() == 2 &&
BB->getFragmentNum() == SuccBB->getFragmentNum()) {
// Get the second successor (after loop BB)
BinaryBasicBlock *SecondSucc = nullptr;
for (BinaryBasicBlock *Succ : SuccBB->successors()) {
Expand Down
27 changes: 13 additions & 14 deletions bolt/lib/Passes/ReorderFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,20 +266,19 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
if (opts::ReorderFunctions != RT_NONE &&
opts::ReorderFunctions != RT_EXEC_COUNT &&
opts::ReorderFunctions != RT_USER) {
Cg = buildCallGraph(BC,
[](const BinaryFunction &BF) {
if (!BF.hasProfile())
return true;
if (BF.getState() != BinaryFunction::State::CFG)
return true;
return false;
},
opts::CgFromPerfData,
false, // IncludeColdCalls
opts::ReorderFunctionsUseHotSize,
opts::CgUseSplitHotSize,
opts::UseEdgeCounts,
opts::CgIgnoreRecursiveCalls);
Cg = buildCallGraph(
BC,
[](const BinaryFunction &BF) {
if (!BF.hasProfile())
return true;
if (BF.getState() != BinaryFunction::State::CFG)
return true;
return false;
},
opts::CgFromPerfData,
/*IncludeSplitCalls=*/false, opts::ReorderFunctionsUseHotSize,
opts::CgUseSplitHotSize, opts::UseEdgeCounts,
opts::CgIgnoreRecursiveCalls);
Cg.normalizeArcWeights();
}

Expand Down
215 changes: 161 additions & 54 deletions bolt/lib/Passes/SplitFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,19 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/SplitFunctions.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/FunctionLayout.h"
#include "bolt/Core/ParallelUtilities.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include <algorithm>
#include <iterator>
#include <numeric>
#include <random>
#include <vector>

Expand Down Expand Up @@ -66,7 +74,7 @@ static cl::opt<unsigned> SplitAlignThreshold(

static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser>
SplitFunctions("split-functions",
cl::desc("split functions into hot and cold regions"),
cl::desc("split functions into fragments"),
cl::cat(BoltOptCategory));

static cl::opt<unsigned> SplitThreshold(
Expand All @@ -77,14 +85,29 @@ static cl::opt<unsigned> SplitThreshold(
"increase after splitting."),
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool>
RandomSplit("split-random",
cl::desc("split functions randomly into hot/cold regions"),
cl::Hidden);
static cl::opt<SplitFunctionsStrategy> SplitStrategy(
"split-strategy", cl::init(SplitFunctionsStrategy::Profile2),
cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
"split each function into a hot and cold fragment "
"using profiling information")),
cl::values(clEnumValN(
SplitFunctionsStrategy::Random2, "random2",
"split each function into a hot and cold fragment at a randomly chosen "
"split point (ignoring any available profiling information)")),
cl::values(clEnumValN(
SplitFunctionsStrategy::RandomN, "randomN",
"split each function into N fragments at a randomly chosen split "
"points (ignoring any available profiling information)")),
cl::values(clEnumValN(
SplitFunctionsStrategy::All, "all",
"split all basic blocks of each function into fragments such that each "
"fragment contains exactly a single basic block")),
cl::desc("strategy used to partition blocks into fragments"),
cl::cat(BoltOptCategory));
} // namespace opts

namespace {
struct SplitCold {
struct SplitProfile2 {
bool canSplit(const BinaryFunction &BF) {
if (!BF.hasValidProfile())
return false;
Expand All @@ -106,48 +129,98 @@ struct SplitCold {
}

template <typename It> void partition(const It Start, const It End) const {
for (auto I = Start; I != End; ++I) {
BinaryBasicBlock *BB = *I;
if (!BB->canOutline())
break;
BB->setIsCold(true);
}
std::for_each(Start, End, [](BinaryBasicBlock *const BB) {
assert(BB->canOutline() &&
"Moving a block that is not outlineable to cold fragment");
BB->setFragmentNum(FragmentNum::cold());
});
}
};

struct SplitRandom {
struct SplitRandom2 {
std::minstd_rand0 *Gen;

explicit SplitRandom(std::minstd_rand0 &Gen) : Gen(&Gen) {}
explicit SplitRandom2(std::minstd_rand0 &Gen) : Gen(&Gen) {}

bool canSplit(const BinaryFunction &BF) { return true; }
bool canOutline(const BinaryBasicBlock &BB) { return true; }

template <typename It> void partition(It Start, It End) const {
using DiffT = typename It::difference_type;
using DiffT = typename std::iterator_traits<It>::difference_type;
const DiffT NumOutlineableBlocks = End - Start;

const It OutlineableBegin = Start;
const It OutlineableEnd =
std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) {
return !BB->canOutline();
});
const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin;

// We want to split at least one block unless there are not blocks that can
// We want to split at least one block unless there are no blocks that can
// be outlined
const auto MinimumSplit = std::min<DiffT>(NumOutlineableBlocks, 1);
std::uniform_int_distribution<DiffT> Dist(MinimumSplit,
NumOutlineableBlocks);
const DiffT NumColdBlocks = Dist(*Gen);
const It ColdEnd = OutlineableBegin + NumColdBlocks;
std::for_each(End - NumColdBlocks, End, [](BinaryBasicBlock *BB) {
BB->setFragmentNum(FragmentNum::cold());
});

LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of "
"{1} possible) blocks to split\n",
ColdEnd - OutlineableBegin,
OutlineableEnd - OutlineableBegin));
NumColdBlocks, End - Start));
}
};

struct SplitRandomN {
std::minstd_rand0 *Gen;

explicit SplitRandomN(std::minstd_rand0 &Gen) : Gen(&Gen) {}

std::for_each(OutlineableBegin, ColdEnd,
[](BinaryBasicBlock *BB) { BB->setIsCold(true); });
bool canSplit(const BinaryFunction &BF) { return true; }
bool canOutline(const BinaryBasicBlock &BB) { return true; }

template <typename It> void partition(It Start, It End) const {
using DiffT = typename std::iterator_traits<It>::difference_type;
const DiffT NumOutlineableBlocks = End - Start;

// We want to split at least one fragment if possible
const auto MinimumSplits = std::min<DiffT>(NumOutlineableBlocks, 1);
std::uniform_int_distribution<DiffT> Dist(MinimumSplits,
NumOutlineableBlocks);
// Choose how many splits to perform
const DiffT NumSplits = Dist(*Gen);

// Draw split points from a lottery
SmallVector<unsigned, 0> Lottery(NumOutlineableBlocks);
std::iota(Lottery.begin(), Lottery.end(), 0u);
std::shuffle(Lottery.begin(), Lottery.end(), *Gen);
Lottery.resize(NumSplits);
llvm::sort(Lottery);

// Add one past the end entry to lottery
Lottery.push_back(NumOutlineableBlocks);

unsigned LotteryIndex = 0;
unsigned BBPos = 0;
for (BinaryBasicBlock *const BB : make_range(Start, End)) {
// Check whether to start new fragment
if (BBPos >= Lottery[LotteryIndex])
++LotteryIndex;

// Because LotteryIndex is 0 based and cold fragments are 1 based, we can
// use the index to assign fragments.
BB->setFragmentNum(FragmentNum(LotteryIndex));

++BBPos;
}
}
};

struct SplitAll {
bool canSplit(const BinaryFunction &BF) { return true; }
bool canOutline(const BinaryBasicBlock &BB) { return true; }

template <typename It> void partition(It Start, It End) const {
unsigned Fragment = 1;
std::for_each(Start, End, [&](BinaryBasicBlock *const BB) {
assert(BB->canOutline() &&
"Moving a block that is not outlineable to cold fragment");
BB->setFragmentNum(FragmentNum(Fragment++));
});
}
};
} // namespace
Expand All @@ -167,23 +240,39 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return;

ParallelUtilities::WorkFuncTy WorkFun;
std::minstd_rand0 RandGen(opts::RandomSeed.getValue());
if (opts::RandomSplit)

ParallelUtilities::WorkFuncTy WorkFun;
bool ForceSequential = false;

switch (opts::SplitStrategy) {
case SplitFunctionsStrategy::Profile2:
WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitProfile2>(BF); };
break;
case SplitFunctionsStrategy::Random2:
WorkFun = [&](BinaryFunction &BF) {
splitFunction(BF, SplitRandom2(RandGen));
};
// If we split functions randomly, we need to ensure that across runs with
// the same input, we generate random numbers for each function in the same
// order.
ForceSequential = true;
break;
case SplitFunctionsStrategy::RandomN:
WorkFun = [&](BinaryFunction &BF) {
splitFunction(BF, SplitRandom(RandGen));
splitFunction(BF, SplitRandomN(RandGen));
};
else
WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitCold>(BF); };
ForceSequential = true;
break;
case SplitFunctionsStrategy::All:
WorkFun = [&](BinaryFunction &BF) { splitFunction<SplitAll>(BF); };
break;
}

ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
return !shouldOptimize(BF);
};

// If we split functions randomly, we need to ensure that across runs with the
// same input, we generate random numbers for each function in the same order.
const bool ForceSequential = opts::RandomSplit;

ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, WorkFun, SkipFunc,
"SplitFunctions", ForceSequential);
Expand All @@ -195,12 +284,12 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
}

template <typename SplitStrategy>
void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
template <typename Strategy>
void SplitFunctions::splitFunction(BinaryFunction &BF, Strategy S) {
if (BF.empty())
return;

if (!Strategy.canSplit(BF))
if (!S.canSplit(BF))
return;

FunctionLayout &Layout = BF.getLayout();
Expand All @@ -226,7 +315,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
for (BinaryBasicBlock *const BB : NewLayout) {
if (!BB->canOutline())
continue;
if (!Strategy.canOutline(*BB)) {
if (!S.canOutline(*BB)) {
BB->setCanOutline(false);
continue;
}
Expand Down Expand Up @@ -278,8 +367,16 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
});
}

// Separate hot from cold starting from the bottom.
Strategy.partition(NewLayout.rbegin(), NewLayout.rend());
// Identify the last block that must not be split into a fragment. Every block
// after this block can be split. Note that when the iterator points to the
// block that cannot be outlined, then reverse_iterator::base() points to the
// block after it.
const BinaryFunction::BasicBlockOrderType::reverse_iterator FirstOutlineable =
llvm::find_if(reverse(NewLayout), [](const BinaryBasicBlock *const BB) {
return !BB->canOutline();
});

S.partition(FirstOutlineable.base(), NewLayout.end());
BF.getLayout().update(NewLayout);

// For shared objects, invoke instructions and corresponding landing pads
Expand Down Expand Up @@ -309,7 +406,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy Strategy) {
PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines);

for (BinaryBasicBlock &BB : BF)
BB.setIsCold(false);
BB.setFragmentNum(FragmentNum::main());
BF.getLayout().update(PreSplitLayout);
} else {
SplitBytesHot += HotSize;
Expand All @@ -336,24 +433,25 @@ SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {

const MCSymbol *LPLabel = EHInfo->first;
BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(LPLabel);
if (BB->isCold() == LPBlock->isCold())
if (BB->getFragmentNum() == LPBlock->getFragmentNum())
continue;

const MCSymbol *TrampolineLabel = nullptr;
auto Iter = LPTrampolines.find(LPLabel);
const TrampolineKey Key(BB->getFragmentNum(), LPLabel);
auto Iter = LPTrampolines.find(Key);
if (Iter != LPTrampolines.end()) {
TrampolineLabel = Iter->second;
} else {
// Create a trampoline basic block in the same fragment as the thrower.
// Note: there's no need to insert the jump instruction, it will be
// added by fixBranches().
BinaryBasicBlock *TrampolineBB = BF.addBasicBlock();
TrampolineBB->setIsCold(BB->isCold());
TrampolineBB->setFragmentNum(BB->getFragmentNum());
TrampolineBB->setExecutionCount(LPBlock->getExecutionCount());
TrampolineBB->addSuccessor(LPBlock, TrampolineBB->getExecutionCount());
TrampolineBB->setCFIState(LPBlock->getCFIState());
TrampolineLabel = TrampolineBB->getLabel();
LPTrampolines.insert(std::make_pair(LPLabel, TrampolineLabel));
LPTrampolines.insert(std::make_pair(Key, TrampolineLabel));
}

// Substitute the landing pad with the trampoline.
Expand All @@ -370,7 +468,7 @@ SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(),
BF.getLayout().block_end());
stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) {
return A->isCold() < B->isCold();
return A->getFragmentNum() < B->getFragmentNum();
});
BF.getLayout().update(NewLayout);

Expand All @@ -386,13 +484,22 @@ SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
const SplitFunctions::TrampolineSetType &Trampolines) const {
DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>>
IncomingTrampolines;
for (const auto &Entry : Trampolines) {
IncomingTrampolines[Entry.getFirst().Target].emplace_back(
Entry.getSecond());
}

BasicBlockOrderType MergedLayout;
for (BinaryBasicBlock *BB : Layout) {
auto Iter = Trampolines.find(BB->getLabel());
if (Iter != Trampolines.end()) {
BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Iter->second);
assert(LPBlock && "Could not find matching landing pad block.");
MergedLayout.push_back(LPBlock);
auto Iter = IncomingTrampolines.find(BB->getLabel());
if (Iter != IncomingTrampolines.end()) {
for (const MCSymbol *const Trampoline : Iter->getSecond()) {
BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Trampoline);
assert(LPBlock && "Could not find matching landing pad block.");
MergedLayout.push_back(LPBlock);
}
}
MergedLayout.push_back(BB);
}
Expand Down
100 changes: 58 additions & 42 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Core/Exceptions.h"
#include "bolt/Core/FunctionLayout.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Core/Relocation.h"
Expand Down Expand Up @@ -3181,12 +3182,15 @@ void RewriteInstance::emitAndLink() {
if (Section)
BC->deregisterSection(*Section);
assert(Function->getOriginSectionName() && "expected origin section");
Function->CodeSectionName = std::string(*Function->getOriginSectionName());
if (Function->isSplit()) {
if (ErrorOr<BinarySection &> ColdSection = Function->getColdCodeSection())
Function->CodeSectionName = Function->getOriginSectionName()->str();
for (const FunctionFragment FF :
Function->getLayout().getSplitFragments()) {
if (ErrorOr<BinarySection &> ColdSection =
Function->getCodeSection(FF.getFragmentNum()))
BC->deregisterSection(*ColdSection);
Function->ColdCodeSectionName = std::string(getBOLTTextSectionName());
}
if (Function->getLayout().isSplit())
Function->setColdCodeSectionName(getBOLTTextSectionName());
}

if (opts::PrintCacheMetrics) {
Expand Down Expand Up @@ -3722,34 +3726,37 @@ void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) {
if (!Function.isSplit())
continue;

ErrorOr<BinarySection &> ColdSection = Function.getColdCodeSection();
assert(ColdSection && "cannot find section for cold part");
// Cold fragments are aligned at 16 bytes.
NextAvailableAddress = alignTo(NextAvailableAddress, 16);
BinaryFunction::FragmentInfo &ColdPart = Function.cold();
if (TooLarge) {
// The corresponding FDE will refer to address 0.
ColdPart.setAddress(0);
ColdPart.setImageAddress(0);
ColdPart.setImageSize(0);
ColdPart.setFileOffset(0);
} else {
ColdPart.setAddress(NextAvailableAddress);
ColdPart.setImageAddress(ColdSection->getAllocAddress());
ColdPart.setImageSize(ColdSection->getOutputSize());
ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
ColdSection->setOutputAddress(ColdPart.getAddress());
}
for (const FunctionFragment FF : Function.getLayout().getSplitFragments()) {
ErrorOr<BinarySection &> ColdSection =
Function.getCodeSection(FF.getFragmentNum());
assert(ColdSection && "cannot find section for cold part");
// Cold fragments are aligned at 16 bytes.
NextAvailableAddress = alignTo(NextAvailableAddress, 16);
BinaryFunction::FragmentInfo &ColdPart = Function.cold();
if (TooLarge) {
// The corresponding FDE will refer to address 0.
ColdPart.setAddress(0);
ColdPart.setImageAddress(0);
ColdPart.setImageSize(0);
ColdPart.setFileOffset(0);
} else {
ColdPart.setAddress(NextAvailableAddress);
ColdPart.setImageAddress(ColdSection->getAllocAddress());
ColdPart.setImageSize(ColdSection->getOutputSize());
ColdPart.setFileOffset(getFileOffsetForAddress(NextAvailableAddress));
ColdSection->setOutputAddress(ColdPart.getAddress());
}

LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
<< Twine::utohexstr(ColdPart.getImageAddress())
<< " to 0x" << Twine::utohexstr(ColdPart.getAddress())
<< " with size "
<< Twine::utohexstr(ColdPart.getImageSize()) << '\n');
RTDyld.reassignSectionAddress(ColdSection->getSectionID(),
ColdPart.getAddress());
LLVM_DEBUG(dbgs() << "BOLT: mapping cold fragment 0x"
<< Twine::utohexstr(ColdPart.getImageAddress())
<< " to 0x" << Twine::utohexstr(ColdPart.getAddress())
<< " with size "
<< Twine::utohexstr(ColdPart.getImageSize()) << '\n');
RTDyld.reassignSectionAddress(ColdSection->getSectionID(),
ColdPart.getAddress());

NextAvailableAddress += ColdPart.getImageSize();
NextAvailableAddress += ColdPart.getImageSize();
}
}

// Add the new text section aggregating all existing code sections.
Expand Down Expand Up @@ -4511,17 +4518,20 @@ void RewriteInstance::updateELFSymbolTable(
Symbols.emplace_back(ICFSymbol);
}
if (Function.isSplit() && Function.cold().getAddress()) {
ELFSymTy NewColdSym = FunctionSymbol;
SmallVector<char, 256> Buf;
NewColdSym.st_name =
AddToStrTab(Twine(cantFail(FunctionSymbol.getName(StringSection)))
.concat(".cold.0")
.toStringRef(Buf));
NewColdSym.st_shndx = Function.getColdCodeSection()->getIndex();
NewColdSym.st_value = Function.cold().getAddress();
NewColdSym.st_size = Function.cold().getImageSize();
NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
Symbols.emplace_back(NewColdSym);
for (const FunctionFragment FF :
Function.getLayout().getSplitFragments()) {
ELFSymTy NewColdSym = FunctionSymbol;
const SmallString<256> SymbolName = formatv(
"{0}.cold.{1}", cantFail(FunctionSymbol.getName(StringSection)),
FF.getFragmentNum().get() - 1);
NewColdSym.st_name = AddToStrTab(SymbolName);
NewColdSym.st_shndx =
Function.getCodeSection(FF.getFragmentNum())->getIndex();
NewColdSym.st_value = Function.cold().getAddress();
NewColdSym.st_size = Function.cold().getImageSize();
NewColdSym.setBindingAndType(ELF::STB_LOCAL, ELF::STT_FUNC);
Symbols.emplace_back(NewColdSym);
}
}
if (Function.hasConstantIsland()) {
uint64_t DataMark = Function.getOutputDataAddress();
Expand Down Expand Up @@ -4636,6 +4646,9 @@ void RewriteInstance::updateELFSymbolTable(
: nullptr;

if (Function && Function->isEmitted()) {
assert(Function->getLayout().isHotColdSplit() &&
"Adding symbols based on cold fragment when there are more than "
"2 fragments");
const uint64_t OutputAddress =
Function->translateInputToOutputAddress(Symbol.st_value);

Expand All @@ -4645,7 +4658,7 @@ void RewriteInstance::updateELFSymbolTable(
NewSymbol.st_shndx =
OutputAddress >= Function->cold().getAddress() &&
OutputAddress < Function->cold().getImageSize()
? Function->getColdCodeSection()->getIndex()
? Function->getCodeSection(FragmentNum::cold())->getIndex()
: Function->getCodeSection()->getIndex();
} else {
// Check if the symbol belongs to moved data object and update it.
Expand Down Expand Up @@ -4743,6 +4756,9 @@ void RewriteInstance::updateELFSymbolTable(
Symbols.emplace_back(NewSymbol);

if (Function->isSplit()) {
assert(Function->getLayout().isHotColdSplit() &&
"Adding symbols based on cold fragment when there are more than "
"2 fragments");
ELFSymTy NewColdSym = NewSymbol;
NewColdSym.setType(ELF::STT_NOTYPE);
SmallVector<char, 256> Buf;
Expand Down
84 changes: 84 additions & 0 deletions bolt/test/X86/split-all.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Test split all block strategy

# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=all \
# RUN: --print-split --print-only=chain \
# RUN: 2>&1 | FileCheck %s

# CHECK: Binary Function "chain"
# CHECK: IsSplit :
# CHECK-SAME: {{ 1$}}
# CHECK: {{^\.LBB00}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.LFT0}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp0}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp2}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp3}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp4}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp5}}
# CHECK: ------- HOT-COLD SPLIT POINT -------
# CHECK: {{^\.Ltmp1}}
# CHECK: End of Function "chain"

.text
.globl chain
.type chain, @function
chain:
.Lchain_entry:
pushq %rbp
movq %rsp, %rbp
cmpl $2, %edi
jge .Lchain_start
.Lfast:
movl $5, %eax
jmp .Lexit
.Lchain_start:
movl $10, %eax
jmp .Lchain1
.Lchain1:
addl $1, %eax
jmp .Lchain2
.Lchain2:
addl $1, %eax
jmp .Lchain3
.Lchain3:
addl $1, %eax
addl $1, %eax
addl $1, %eax
addl $1, %eax
addl $1, %eax
jmp .Lchain4
.Lchain4:
addl $1, %eax
addl $1, %eax
addl $1, %eax
addl $1, %eax
addl $1, %eax
jmp .Lexit
.Lexit:
popq %rbp
ret
.Lchain_end:
.size chain, .Lchain_end-chain


.globl main
.type main, @function
main:
pushq %rbp
movq %rsp, %rbp
movl $1, %edi
call chain
movl $4, %edi
call chain
xorl %eax, %eax
popq %rbp
retq
.Lmain_end:
.size main, .Lmain_end-main
11 changes: 8 additions & 3 deletions bolt/test/X86/split-random.s
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# Test random function splitting option

# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe
# RUN: llvm-bolt %t.exe -o %t.out --split-functions --split-random \
# RUN: --print-finalized --print-only=two_block --bolt-seed=7 2>&1 | \
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.random2 --split-functions \
# RUN: --split-strategy=random2 --print-finalized \
# RUN: --print-only=two_block --bolt-seed=7 2>&1 | \
# RUN: FileCheck %s
# RUN: llvm-bolt %t.exe -o %t.randomN --split-functions \
# RUN: --split-strategy=randomN --print-finalized \
# RUN: --print-only=two_block --bolt-seed=7 2>&1 | \
# RUN: FileCheck %s

# CHECK: Binary Function "two_block"
Expand Down