8 changes: 7 additions & 1 deletion bolt/include/bolt/Core/DIEBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,13 @@ class DIEBuilder {
void updateReferences();

/// Update the Offset and Size of DIE, populate DebugNames table.
uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset);
/// Along with current CU, and DIE being processed and the new DIE offset to
/// be updated, it takes in Parents vector that can be empty if this DIE has
/// no parents.
uint32_t
finalizeDIEs(DWARFUnit &CU, DIE &Die,
std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
uint32_t &CurOffset);

void registerUnit(DWARFUnit &DU, bool NeedSort);

Expand Down
28 changes: 23 additions & 5 deletions bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class BOLTDWARF5AccelTableData : public DWARF5AccelTableData {
bool isTU() const { return DWARF5AccelTableData::isTU(); }
std::optional<unsigned> getSecondUnitID() const { return SecondUnitID; }

void setPatchOffset(uint64_t PatchOffset) { OffsetVal = PatchOffset; }
uint64_t getPatchOffset() const { return std::get<uint64_t>(OffsetVal); }

private:
std::optional<unsigned> SecondUnitID;
};
Expand All @@ -49,10 +52,12 @@ class DWARF5AcceleratorTable {
Abbrev->~DebugNamesAbbrev();
}
/// Add DWARF5 Accelerator table entry.
/// Input is DWARFUnit being processed, DIE that belongs to it, and potential
/// SkeletonCU if the Unit comes from a DWO section.
void addAccelTableEntry(DWARFUnit &Unit, const DIE &Die,
const std::optional<uint64_t> &DWOID);
/// Input is DWARFUnit being processed, DIE that belongs to it, potential
/// DWOID if the Unit comes from a DWO section, and potential parent entry.
std::optional<BOLTDWARF5AccelTableData *>
addAccelTableEntry(DWARFUnit &Unit, const DIE &Die,
const std::optional<uint64_t> &DWOID,
std::optional<BOLTDWARF5AccelTableData *> &Parent);
/// Set current unit being processed.
void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset);
/// Emit Accelerator table.
Expand All @@ -63,6 +68,16 @@ class DWARF5AcceleratorTable {
std::unique_ptr<DebugBufferVector> releaseBuffer() {
return std::move(FullTableBuffer);
}
/// Adds a DIE that is referenced across CUs.
void addCrossCUDie(const DIE *Die) {
CrossCUDies.insert({Die->getOffset(), Die});
}
/// Returns true if the DIE can generate an entry for a cross cu reference.
/// This only checks TAGs of a DIE because when this is invoked DIE might not
/// be fully constructed.
bool canGenerateEntryWithCrossCUReference(
const DWARFUnit &Unit, const DIE &Die,
const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);

private:
BinaryContext &BC;
Expand Down Expand Up @@ -121,6 +136,9 @@ class DWARF5AcceleratorTable {
llvm::DenseMap<llvm::hash_code, uint64_t> StrCacheToOffsetMap;
// Contains DWO ID to CUList Index.
llvm::DenseMap<uint64_t, uint32_t> CUOffsetsToPatch;
// Contains a map of Entry ID to Entry relative offset.
llvm::DenseMap<uint64_t, uint32_t> EntryRelativeOffsets;
llvm::DenseMap<uint64_t, const DIE *> CrossCUDies;
/// Adds Unit to either CUList, LocalTUList or ForeignTUList.
/// Input Unit being processed, and DWO ID if Unit is being processed comes
/// from a DWO section.
Expand All @@ -143,7 +161,7 @@ class DWARF5AcceleratorTable {
/// Write Entries.
void writeEntries();
/// Write an Entry.
void writeEntry(const BOLTDWARF5AccelTableData &Entry);
void writeEntry(BOLTDWARF5AccelTableData &Entry);
/// Write augmentation_string for BOLT.
void writeAugmentationString();
/// Emit out Header for DWARF5 Accelerator table.
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/MCPlus.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class MCAnnotation {
kOffset, /// Offset in the function.
kLabel, /// MCSymbol pointing to this instruction.
kSize, /// Size of the instruction.
kDynamicBranch, /// Jit instruction patched at runtime.
kGeneric /// First generic annotation.
};

Expand Down
82 changes: 44 additions & 38 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -487,10 +487,9 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}

virtual bool createDirectCall(MCInst &Inst, const MCSymbol *Target,
virtual void createDirectCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx, bool IsTailCall) {
llvm_unreachable("not implemented");
return false;
}

virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
Expand Down Expand Up @@ -1200,6 +1199,16 @@ class MCPlusBuilder {
/// Set instruction size.
void setSize(MCInst &Inst, uint32_t Size) const;

/// Check if the branch instruction could be modified at runtime.
bool isDynamicBranch(const MCInst &Inst) const;

/// Return ID for runtime-modifiable instruction.
std::optional<uint32_t> getDynamicBranchID(const MCInst &Inst) const;

/// Mark instruction as a dynamic branch, i.e. a branch that can be
/// overwritten at runtime.
void setDynamicBranch(MCInst &Inst, uint32_t ID) const;

/// Return MCSymbol that represents a target of this instruction at a given
/// operand number \p OpNum. If there's no symbol associated with
/// the operand - return nullptr.
Expand Down Expand Up @@ -1534,15 +1543,13 @@ class MCPlusBuilder {
}

/// Create a no-op instruction.
virtual bool createNoop(MCInst &Inst) const {
virtual void createNoop(MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

/// Create a return instruction.
virtual bool createReturn(MCInst &Inst) const {
virtual void createReturn(MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

/// Store \p Target absolute address to \p RegName
Expand All @@ -1556,32 +1563,30 @@ class MCPlusBuilder {

/// Creates a new unconditional branch instruction in Inst and set its operand
/// to TBB.
///
/// Returns true on success.
virtual bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
virtual void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const {
llvm_unreachable("not implemented");
return false;
}

/// Create a version of unconditional jump that has the largest span for a
/// single instruction with direct target.
virtual void createLongUncondBranch(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) const {
llvm_unreachable("not implemented");
}

/// Creates a new call instruction in Inst and sets its operand to
/// Target.
///
/// Returns true on success.
virtual bool createCall(MCInst &Inst, const MCSymbol *Target,
virtual void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) {
llvm_unreachable("not implemented");
return false;
}

/// Creates a new tail call instruction in Inst and sets its operand to
/// Target.
///
/// Returns true on success.
virtual bool createTailCall(MCInst &Inst, const MCSymbol *Target,
virtual void createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) {
llvm_unreachable("not implemented");
return false;
}

virtual void createLongTailCall(InstructionListType &Seq,
Expand All @@ -1590,43 +1595,36 @@ class MCPlusBuilder {
}

/// Creates a trap instruction in Inst.
///
/// Returns true on success.
virtual bool createTrap(MCInst &Inst) const {
virtual void createTrap(MCInst &Inst) const {
llvm_unreachable("not implemented");
return false;
}

/// Creates an instruction to bump the stack pointer just like a call.
virtual bool createStackPointerIncrement(MCInst &Inst, int Size = 8,
virtual void createStackPointerIncrement(MCInst &Inst, int Size = 8,
bool NoFlagsClobber = false) const {
llvm_unreachable("not implemented");
return false;
}

/// Creates an instruction to move the stack pointer just like a ret.
virtual bool createStackPointerDecrement(MCInst &Inst, int Size = 8,
virtual void createStackPointerDecrement(MCInst &Inst, int Size = 8,
bool NoFlagsClobber = false) const {
llvm_unreachable("not implemented");
return false;
}

/// Create a store instruction using \p StackReg as the base register
/// and \p Offset as the displacement.
virtual bool createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg,
virtual void createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg,
int Offset, const MCPhysReg &SrcReg,
int Size) const {
llvm_unreachable("not implemented");
return false;
}

virtual bool createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
virtual void createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
const MCPhysReg &IndexReg, int64_t Offset,
const MCExpr *OffsetExpr,
const MCPhysReg &AddrSegmentReg,
const MCPhysReg &DstReg, int Size) const {
llvm_unreachable("not implemented");
return false;
}

virtual InstructionListType createLoadImmediate(const MCPhysReg Dest,
Expand All @@ -1636,32 +1634,27 @@ class MCPlusBuilder {

/// Create a fragment of code (sequence of instructions) that load a 32-bit
/// address from memory, zero-extends it to 64 and jump to it (indirect jump).
virtual bool
virtual void
createIJmp32Frag(SmallVectorImpl<MCInst> &Insts, const MCOperand &BaseReg,
const MCOperand &Scale, const MCOperand &IndexReg,
const MCOperand &Offset, const MCOperand &TmpReg) const {
llvm_unreachable("not implemented");
return false;
}

/// Create a load instruction using \p StackReg as the base register
/// and \p Offset as the displacement.
virtual bool createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
virtual void createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
int Offset, const MCPhysReg &DstReg,
int Size) const {
llvm_unreachable("not implemented");
return false;
}

/// Creates a call frame pseudo instruction. A single operand identifies which
/// MCCFIInstruction this MCInst is referring to.
///
/// Returns true on success.
virtual bool createCFI(MCInst &Inst, int64_t Offset) const {
virtual void createCFI(MCInst &Inst, int64_t Offset) const {
Inst.clear();
Inst.setOpcode(TargetOpcode::CFI_INSTRUCTION);
Inst.addOperand(MCOperand::createImm(Offset));
return true;
}

/// Create an inline version of memcpy(dest, src, 1).
Expand Down Expand Up @@ -1699,6 +1692,19 @@ class MCPlusBuilder {
return Inst.getOpcode() == TargetOpcode::CFI_INSTRUCTION;
}

/// Create a conditional branch with a target-specific conditional code \p CC.
virtual void createCondBranch(MCInst &Inst, const MCSymbol *Target,
unsigned CC, MCContext *Ctx) const {
llvm_unreachable("not implemented");
}

/// Create long conditional branch with a target-specific conditional code
/// \p CC.
virtual void createLongCondBranch(MCInst &Inst, const MCSymbol *Target,
unsigned CC, MCContext *Ctx) const {
llvm_unreachable("not implemented");
}

/// Reverses the branch condition in Inst and update its taken target to TBB.
///
/// Returns true on success.
Expand Down
117 changes: 114 additions & 3 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,16 @@ class BoltAddressTranslation {
/// Save function and basic block hashes used for metadata dump.
void saveMetadata(BinaryContext &BC);

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

private:
/// Helper to update \p Map by inserting one or more BAT entries reflecting
/// \p BB for function located at \p FuncAddress. At least one entry will be
/// emitted for the start of the BB. More entries may be emitted to cover
/// the location of calls or any instruction that may change control flow.
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
uint64_t FuncAddress);
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);

/// Write the serialized address translation table for a function.
template <bool Cold>
Expand All @@ -144,15 +147,123 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

using BBHashMap = std::unordered_map<uint32_t, size_t>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
/// Map a function to its basic blocks count
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;

/// Links outlined cold bocks to their original function
std::map<uint64_t, uint64_t> ColdPartSource;

/// Links output address of a main fragment back to input address.
std::unordered_map<uint64_t, uint64_t> ReverseMap;

/// Identifies the address of a control-flow changing instructions in a
/// translation map entry
const static uint32_t BRANCHENTRY = 0x1;

public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
class EntryTy {
unsigned Index;
size_t Hash;

public:
unsigned getBBIndex() const { return Index; }
size_t getBBHash() const { return Hash; }
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::unordered_map<uint32_t, EntryTy> Map;
const EntryTy &getEntry(uint32_t BBInputOffset) const {
auto It = Map.find(BBInputOffset);
assert(It != Map.end());
return It->second;
}

public:
bool isInputBlock(uint32_t InputOffset) const {
return Map.count(InputOffset);
}

unsigned getBBIndex(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBIndex();
}

size_t getBBHash(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBHash();
}

void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
}

size_t getNumBasicBlocks() const { return Map.size(); }
};

/// Map function output address to its hash and basic blocks hash map.
class FuncHashesTy {
class EntryTy {
size_t Hash;
BBHashMapTy BBHashMap;

public:
size_t getBFHash() const { return Hash; }
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
EntryTy(size_t Hash) : Hash(Hash) {}
};

std::unordered_map<uint64_t, EntryTy> Map;
const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
auto It = Map.find(FuncOutputAddress);
assert(It != Map.end());
return It->second;
}

public:
size_t getBFHash(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBFHash();
}

const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBBHashMap();
}

void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
}

size_t getNumFunctions() const { return Map.size(); };

size_t getNumBasicBlocks() const {
size_t NumBasicBlocks{0};
for (auto &I : Map)
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
return NumBasicBlocks;
}
};

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t FuncOutputAddress) const {
return FuncHashes.getBFHash(FuncOutputAddress);
}

/// Returns BBHashMap by function output address (after BOLT).
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return FuncHashes.getBBHashMap(FuncOutputAddress);
}

BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
return const_cast<BBHashMapTy &>(
std::as_const(*this).getBBHashMap(FuncOutputAddress));
}

/// Returns the number of basic blocks in a function.
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
return NumBasicBlocksMap.at(OutputAddress);
}

private:
FuncHashesTy FuncHashes;
};
} // namespace bolt

Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ class DataAggregator : public DataReader {
/// Dump data structures into a file readable by llvm-bolt
std::error_code writeAggregatedFile(StringRef OutputFilename) const;

/// Dump translated data structures into YAML
std::error_code writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const;

/// Filter out binaries based on PID
void filterBinaryMMapInfo();

Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef BOLT_PROFILE_YAML_PROFILE_WRITER_H
#define BOLT_PROFILE_YAML_PROFILE_WRITER_H

#include "bolt/Profile/ProfileYAMLMapping.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>

Expand All @@ -29,6 +30,9 @@ class YAMLProfileWriter {

/// Save execution profile for that instance.
std::error_code writeProfile(const RewriteInstance &RI);

static yaml::bolt::BinaryFunctionProfile convert(const BinaryFunction &BF,
bool UseDFS);
};

} // namespace bolt
Expand Down
8 changes: 7 additions & 1 deletion bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1939,7 +1939,13 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
OS << Endl;
return;
}
InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
if (std::optional<uint32_t> DynamicID =
MIB->getDynamicBranchID(Instruction)) {
OS << "\tjit\t" << MIB->getTargetSymbol(Instruction)->getName()
<< " # ID: " << DynamicID;
} else {
InstPrinter->printInst(&Instruction, 0, "", *STI, OS);
}
if (MIB->isCall(Instruction)) {
if (MIB->isTailCall(Instruction))
OS << " # TAILCALL ";
Expand Down
17 changes: 16 additions & 1 deletion bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3350,6 +3350,16 @@ void BinaryFunction::fixBranches() {

// Eliminate unnecessary conditional branch.
if (TSuccessor == FSuccessor) {
// FIXME: at the moment, we cannot safely remove static key branches.
if (MIB->isDynamicBranch(*CondBranch)) {
if (opts::Verbosity) {
BC.outs()
<< "BOLT-INFO: unable to remove redundant dynamic branch in "
<< *this << '\n';
}
continue;
}

BB->removeDuplicateConditionalSuccessor(CondBranch);
if (TSuccessor != NextBB)
BB->addBranchInstruction(TSuccessor);
Expand All @@ -3358,8 +3368,13 @@ void BinaryFunction::fixBranches() {

// Reverse branch condition and swap successors.
auto swapSuccessors = [&]() {
if (MIB->isUnsupportedBranch(*CondBranch))
if (MIB->isUnsupportedBranch(*CondBranch)) {
if (opts::Verbosity) {
BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
<< '\n';
}
return false;
}
std::swap(TSuccessor, FSuccessor);
BB->swapConditionalSuccessors();
auto L = BC.scopeLock();
Expand Down
34 changes: 27 additions & 7 deletions bolt/lib/Core/DIEBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,20 +377,32 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx,
return nullptr;
}

uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
uint32_t &CurOffset) {
uint32_t DIEBuilder::finalizeDIEs(
DWARFUnit &CU, DIE &Die,
std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
uint32_t &CurOffset) {
getState().DWARFDieAddressesParsed.erase(Die.getOffset());
uint32_t CurSize = 0;
Die.setOffset(CurOffset);
DebugNamesTable.addAccelTableEntry(
CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt);
std::optional<BOLTDWARF5AccelTableData *> NameEntry =
DebugNamesTable.addAccelTableEntry(
CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt,
Parents.back());
// It is possible that an indexed debugging information entry has a parent
// that is not indexed (for example, if its parent does not have a name
// attribute). In such a case, a parent attribute may point to a nameless
// index entry (that is, one that cannot be reached from any entry in the name
// table), or it may point to the nearest ancestor that does have an index
// entry.
if (NameEntry)
Parents.push_back(std::move(NameEntry));
for (DIEValue &Val : Die.values())
CurSize += Val.sizeOf(CU.getFormParams());
CurSize += getULEB128Size(Die.getAbbrevNumber());
CurOffset += CurSize;

for (DIE &Child : Die.children()) {
uint32_t ChildSize = finalizeDIEs(CU, Child, CurOffset);
uint32_t ChildSize = finalizeDIEs(CU, Child, Parents, CurOffset);
CurSize += ChildSize;
}
// for children end mark.
Expand All @@ -400,6 +412,8 @@ uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
}

Die.setSize(CurSize);
if (NameEntry)
Parents.pop_back();

return CurSize;
}
Expand All @@ -410,7 +424,9 @@ void DIEBuilder::finish() {
uint32_t HeaderSize = CU.getHeaderSize();
uint32_t CurOffset = HeaderSize;
DebugNamesTable.setCurrentUnit(CU, UnitStartOffset);
finalizeDIEs(CU, *UnitDIE, CurOffset);
std::vector<std::optional<BOLTDWARF5AccelTableData *>> Parents;
Parents.push_back(std::nullopt);
finalizeDIEs(CU, *UnitDIE, Parents, CurOffset);

DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU);
CurUnitInfo.UnitOffset = UnitStartOffset;
Expand Down Expand Up @@ -529,6 +545,10 @@ void DIEBuilder::cloneDieReferenceAttribute(
NewRefDie = DieInfo.Die;

if (AttrSpec.Form == dwarf::DW_FORM_ref_addr) {
// Adding referenced DIE to DebugNames to be used when entries are created
// that contain cross cu references.
if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec))
DebugNamesTable.addCrossCUDie(DieInfo.Die);
// no matter forward reference or backward reference, we are supposed
// to calculate them in `finish` due to the possible modification of
// the DIE.
Expand All @@ -538,7 +558,7 @@ void DIEBuilder::cloneDieReferenceAttribute(
std::make_pair(CurDieInfo, AddrReferenceInfo(&DieInfo, AttrSpec)));

Die.addValue(getState().DIEAlloc, AttrSpec.Attr, dwarf::DW_FORM_ref_addr,
DIEInteger(0xDEADBEEF));
DIEInteger(DieInfo.Die->getOffset()));
return;
}

Expand Down
217 changes: 168 additions & 49 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"
#include <cstdint>
#include <optional>

namespace llvm {
namespace bolt {
Expand Down Expand Up @@ -145,6 +146,55 @@ static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {
return false;
}

bool static canProcess(const DWARFUnit &Unit, const DIE &Die,
std::string &NameToUse, const bool TagsOnly) {
switch (Die.getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_imported_declaration:
case dwarf::DW_TAG_pointer_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_unspecified_type:
if (TagsOnly || Die.findAttribute(dwarf::Attribute::DW_AT_name))
return true;
return false;
case dwarf::DW_TAG_namespace:
// According to DWARF5 spec namespaces without DW_AT_name needs to have
// "(anonymous namespace)"
if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
NameToUse = "(anonymous namespace)";
return true;
case dwarf::DW_TAG_inlined_subroutine:
case dwarf::DW_TAG_label:
case dwarf::DW_TAG_subprogram:
if (TagsOnly || Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
return true;
return false;
case dwarf::DW_TAG_variable:
return TagsOnly || shouldIncludeVariable(Unit, Die);
default:
break;
}
return false;
}

bool DWARF5AcceleratorTable::canGenerateEntryWithCrossCUReference(
const DWARFUnit &Unit, const DIE &Die,
const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
if (!isCreated())
return false;
std::string NameToUse = "";
if (!canProcess(Unit, Die, NameToUse, true))
return false;
return (AttrSpec.Attr == dwarf::Attribute::DW_AT_abstract_origin ||
AttrSpec.Attr == dwarf::Attribute::DW_AT_specification) &&
AttrSpec.Form == dwarf::DW_FORM_ref_addr;
}
/// Returns name offset in String Offset section.
static uint64_t getNameOffset(BinaryContext &BC, DWARFUnit &Unit,
const uint64_t Index) {
Expand All @@ -163,46 +213,17 @@ static uint64_t getNameOffset(BinaryContext &BC, DWARFUnit &Unit,
Index * DwarfOffsetByteSize);
}

void DWARF5AcceleratorTable::addAccelTableEntry(
DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID) {
static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) {
return reinterpret_cast<uint64_t>(&Entry);
}

std::optional<BOLTDWARF5AccelTableData *>
DWARF5AcceleratorTable::addAccelTableEntry(
DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
std::optional<BOLTDWARF5AccelTableData *> &Parent) {
if (Unit.getVersion() < 5 || !NeedToCreate)
return;
return std::nullopt;
std::string NameToUse = "";
auto canProcess = [&](const DIE &Die) -> bool {
switch (Die.getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_imported_declaration:
case dwarf::DW_TAG_pointer_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_unspecified_type:
if (Die.findAttribute(dwarf::Attribute::DW_AT_name))
return true;
return false;
case dwarf::DW_TAG_namespace:
// According to DWARF5 spec namespaces without DW_AT_name needs to have
// "(anonymous namespace)"
if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
NameToUse = "(anonymous namespace)";
return true;
case dwarf::DW_TAG_inlined_subroutine:
case dwarf::DW_TAG_label:
case dwarf::DW_TAG_subprogram:
if (Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
return true;
return false;
case dwarf::DW_TAG_variable:
return shouldIncludeVariable(Unit, Die);
default:
break;
}
return false;
};

auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU,
uint32_t &DieTag) -> uint32_t {
Expand All @@ -216,8 +237,8 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
return CUList.size() - 1;
};

if (!canProcess(Die))
return;
if (!canProcess(Unit, Die, NameToUse, false))
return std::nullopt;

// Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
// encounter it.
Expand All @@ -227,10 +248,10 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
addUnit(Unit, DWOID);
}

auto addEntry = [&](DIEValue ValName) -> void {
auto getName = [&](DIEValue ValName) -> std::optional<std::string> {
if ((!ValName || ValName.getForm() == dwarf::DW_FORM_string) &&
NameToUse.empty())
return;
return std::nullopt;
std::string Name = "";
uint64_t NameIndexOffset = 0;
if (NameToUse.empty()) {
Expand Down Expand Up @@ -260,7 +281,16 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
// This the same hash function used in DWARF5AccelTableData.
It.HashValue = caseFoldingDjbHash(Name);
}
return Name;
};

auto addEntry =
[&](DIEValue ValName) -> std::optional<BOLTDWARF5AccelTableData *> {
std::optional<std::string> Name = getName(ValName);
if (!Name)
return std::nullopt;

auto &It = Entries[*Name];
bool IsTU = false;
uint32_t DieTag = 0;
uint32_t UnitID = getUnitID(Unit, IsTU, DieTag);
Expand All @@ -270,18 +300,66 @@ void DWARF5AcceleratorTable::addAccelTableEntry(
if (Iter == CUOffsetsToPatch.end())
BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
"DWO ID in CU offsets for second Unit Index "
<< Name << ". For DIE at offset: "
<< *Name << ". For DIE at offset: "
<< Twine::utohexstr(CurrentUnitOffset + Die.getOffset())
<< ".\n";
SecondIndex = Iter->second;
}
std::optional<uint64_t> ParentOffset =
(Parent ? std::optional<uint64_t>(getEntryID(**Parent)) : std::nullopt);
// This will be populated later in writeEntry.
// This way only parent entries get tracked.
// Keeping memory footprint down.
if (ParentOffset)
EntryRelativeOffsets.insert({*ParentOffset, 0});
It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
Die.getOffset(), std::nullopt, DieTag, UnitID, IsTU, SecondIndex));
Die.getOffset(), ParentOffset, DieTag, UnitID, IsTU, SecondIndex));
return It.Values.back();
};

// Minor optimization not to add entry twice for DW_TAG_namespace if it has no
// DW_AT_name.
if (!(Die.getTag() == dwarf::DW_TAG_namespace &&
!Die.findAttribute(dwarf::Attribute::DW_AT_name)))
addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
// For the purposes of determining whether a debugging information entry has a
// particular attribute (such as DW_AT_name), if debugging information entry A
// has a DW_AT_specification or DW_AT_abstract_origin attribute pointing to
// another debugging information entry B, any attributes of B are considered
// to be part of A.
auto processReferencedDie = [&](const dwarf::Attribute &Attr)
-> std::optional<BOLTDWARF5AccelTableData *> {
const DIEValue Value = Die.findAttribute(Attr);
if (!Value)
return std::nullopt;
const DIE *EntryDie = nullptr;
if (Value.getForm() == dwarf::DW_FORM_ref_addr) {
auto Iter = CrossCUDies.find(Value.getDIEInteger().getValue());
if (Iter == CrossCUDies.end()) {
BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
"referenced DIE in CrossCUDies for "
<< Twine::utohexstr(Value.getDIEInteger().getValue())
<< ".\n";
return std::nullopt;
}
EntryDie = Iter->second;
} else {
const DIEEntry &DIEENtry = Value.getDIEEntry();
EntryDie = &DIEENtry.getEntry();
}

addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_linkage_name));
return addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_name));
};

addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
return;
if (std::optional<BOLTDWARF5AccelTableData *> Entry =
processReferencedDie(dwarf::Attribute::DW_AT_abstract_origin))
return *Entry;
if (std::optional<BOLTDWARF5AccelTableData *> Entry =
processReferencedDie(dwarf::Attribute::DW_AT_specification))
return *Entry;

return addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
}

/// Algorithm from llvm implementation.
Expand Down Expand Up @@ -382,6 +460,11 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
if (SecondEntryRet)
Abbrev.addAttribute(SecondEntryRet->Encoding);
Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
if (std::optional<uint64_t> Offset = Value->getParentDieOffset())
Abbrev.addAttribute({dwarf::DW_IDX_parent, dwarf::DW_FORM_ref4});
else
Abbrev.addAttribute(
{dwarf::DW_IDX_parent, dwarf::DW_FORM_flag_present});
FoldingSetNodeID ID;
Abbrev.Profile(ID);
void *InsertPos;
Expand All @@ -401,7 +484,11 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
}
}

void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
void DWARF5AcceleratorTable::writeEntry(BOLTDWARF5AccelTableData &Entry) {
const uint64_t EntryID = getEntryID(Entry);
if (EntryRelativeOffsets.find(EntryID) != EntryRelativeOffsets.end())
EntryRelativeOffsets[EntryID] = EntriesBuffer->size();

const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(Entry);
// For forgeign type (FTU) units that need to refer to the FTU and to the CU.
Expand Down Expand Up @@ -456,6 +543,17 @@ void DWARF5AcceleratorTable::writeEntry(const BOLTDWARF5AccelTableData &Entry) {
llvm::endianness::little);
break;
}
case dwarf::DW_IDX_parent: {
assert(
(AttrEnc.Form == dwarf::DW_FORM_ref4 && Entry.getParentDieOffset()) ||
AttrEnc.Form == dwarf::DW_FORM_flag_present);
if (std::optional<uint64_t> ParentOffset = Entry.getParentDieOffset()) {
Entry.setPatchOffset(EntriesBuffer->size());
support::endian::write(*Entriestream, static_cast<uint32_t>(UINT32_MAX),
llvm::endianness::little);
}
break;
}
}
}
}
Expand All @@ -464,13 +562,34 @@ void DWARF5AcceleratorTable::writeEntries() {
for (auto &Bucket : getBuckets()) {
for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
Hash->EntryOffset = EntriesBuffer->size();
for (const BOLTDWARF5AccelTableData *Value : Hash->Values) {
for (BOLTDWARF5AccelTableData *Value : Hash->Values) {
writeEntry(*Value);
}
support::endian::write(*Entriestream, static_cast<uint8_t>(0),
llvm::endianness::little);
}
}
// Patching parent offsets.
for (auto &Bucket : getBuckets()) {
for (DWARF5AcceleratorTable::HashData *Hash : Bucket) {
for (BOLTDWARF5AccelTableData *Entry : Hash->Values) {
std::optional<uint64_t> ParentOffset = Entry->getParentDieOffset();
if (!ParentOffset)
continue;
if (const auto Iter = EntryRelativeOffsets.find(*ParentOffset);
Iter != EntryRelativeOffsets.end()) {
const uint64_t PatchOffset = Entry->getPatchOffset();
uint32_t *Ptr = reinterpret_cast<uint32_t *>(
&EntriesBuffer.get()->data()[PatchOffset]);
*Ptr = Iter->second;
} else {
BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
"entry with offset "
<< *ParentOffset << "\n";
}
}
}
}
}

void DWARF5AcceleratorTable::writeAugmentationString() {
Expand Down
22 changes: 22 additions & 0 deletions bolt/lib/Core/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,28 @@ void MCPlusBuilder::setSize(MCInst &Inst, uint32_t Size) const {
setAnnotationOpValue(Inst, MCAnnotation::kSize, Size);
}

bool MCPlusBuilder::isDynamicBranch(const MCInst &Inst) const {
if (!hasAnnotation(Inst, MCAnnotation::kDynamicBranch))
return false;
assert(isBranch(Inst) && "Branch expected.");
return true;
}

std::optional<uint32_t>
MCPlusBuilder::getDynamicBranchID(const MCInst &Inst) const {
if (std::optional<int64_t> Value =
getAnnotationOpValue(Inst, MCAnnotation::kDynamicBranch)) {
assert(isBranch(Inst) && "Branch expected.");
return static_cast<uint32_t>(*Value);
}
return std::nullopt;
}

void MCPlusBuilder::setDynamicBranch(MCInst &Inst, uint32_t ID) const {
assert(isBranch(Inst) && "Branch expected.");
setAnnotationOpValue(Inst, MCAnnotation::kDynamicBranch, ID);
}

bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const {
return (bool)getAnnotationOpValue(Inst, Index);
}
Expand Down
27 changes: 22 additions & 5 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ static cl::opt<unsigned>
cl::desc("print statistics about basic block ordering"),
cl::init(0), cl::cat(BoltOptCategory));

static cl::opt<bool> PrintLargeFunctions(
"print-large-functions",
cl::desc("print functions that could not be overwritten due to excessive "
"size"),
cl::init(false), cl::cat(BoltOptCategory));

static cl::list<bolt::DynoStats::Category>
PrintSortedBy("print-sorted-by", cl::CommaSeparated,
cl::desc("print functions sorted by order of dyno stats"),
Expand Down Expand Up @@ -570,8 +576,12 @@ Error CheckLargeFunctions::runOnFunctions(BinaryContext &BC) {
uint64_t HotSize, ColdSize;
std::tie(HotSize, ColdSize) =
BC.calculateEmittedSize(BF, /*FixBranches=*/false);
if (HotSize > BF.getMaxSize())
if (HotSize > BF.getMaxSize()) {
if (opts::PrintLargeFunctions)
BC.outs() << "BOLT-INFO: " << BF << " size exceeds allocated space by "
<< (HotSize - BF.getMaxSize()) << " bytes\n";
BF.setSimple(false);
}
};

ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
Expand Down Expand Up @@ -852,6 +862,10 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryFunction &BF) {
assert(Result && "internal error analyzing conditional branch");
assert(CondBranch && "conditional branch expected");

// Skip dynamic branches for now.
if (BF.getBinaryContext().MIB->isDynamicBranch(*CondBranch))
continue;

// It's possible that PredBB is also a successor to BB that may have
// been processed by a previous iteration of the SCTC loop, in which
// case it may have been marked invalid. We should skip rewriting in
Expand Down Expand Up @@ -1012,6 +1026,10 @@ uint64_t ShortenInstructions::shortenInstructions(BinaryFunction &Function) {
const BinaryContext &BC = Function.getBinaryContext();
for (BinaryBasicBlock &BB : Function) {
for (MCInst &Inst : BB) {
// Skip shortening instructions with Size annotation.
if (BC.MIB->getSize(Inst))
continue;

MCInst OriginalInst;
if (opts::Verbosity > 2)
OriginalInst = Inst;
Expand Down Expand Up @@ -1056,10 +1074,9 @@ void Peepholes::addTailcallTraps(BinaryFunction &Function) {
MCInst *Inst = BB.getLastNonPseudoInstr();
if (Inst && MIB->isTailCall(*Inst) && MIB->isIndirectBranch(*Inst)) {
MCInst Trap;
if (MIB->createTrap(Trap)) {
BB.addInstruction(Trap);
++TailCallTraps;
}
MIB->createTrap(Trap);
BB.addInstruction(Trap);
++TailCallTraps;
}
}
}
Expand Down
44 changes: 15 additions & 29 deletions bolt/lib/Passes/ShrinkWrapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,16 +680,15 @@ void StackLayoutModifier::performChanges() {
if (StackPtrReg != BC.MIB->getFramePointer())
Adjustment = -Adjustment;
if (IsLoad)
Success = BC.MIB->createRestoreFromStack(
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
BC.MIB->createRestoreFromStack(Inst, StackPtrReg,
StackOffset + Adjustment, Reg, Size);
else if (IsStore)
Success = BC.MIB->createSaveToStack(
Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
BC.MIB->createSaveToStack(Inst, StackPtrReg, StackOffset + Adjustment,
Reg, Size);
LLVM_DEBUG({
dbgs() << "Adjusted instruction: ";
Inst.dump();
});
assert(Success);
}
}
}
Expand Down Expand Up @@ -1653,19 +1652,13 @@ Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
if (SPVal != StackPointerTracking::SUPERPOSITION &&
SPVal != StackPointerTracking::EMPTY) {
if (FIE.IsLoad) {
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
FIE.StackOffset - SPVal, FIE.RegOrImm,
FIE.Size)) {
return createFatalBOLTError(
"createRestoreFromStack: not supported on this platform\n");
}
} else {
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
FIE.StackOffset - SPVal, FIE.RegOrImm,
FIE.Size)) {
return createFatalBOLTError(
"createSaveToStack: not supported on this platform\n");
}
FIE.Size);
} else {
BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
FIE.StackOffset - SPVal, FIE.RegOrImm,
FIE.Size);
}
if (CreatePushOrPop)
BC.MIB->changeToPushOrPop(NewInst);
Expand All @@ -1675,19 +1668,12 @@ Expected<MCInst> ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
FPVal != StackPointerTracking::EMPTY);

if (FIE.IsLoad) {
if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
FIE.StackOffset - FPVal, FIE.RegOrImm,
FIE.Size)) {
return createFatalBOLTError(
"createRestoreFromStack: not supported on this platform\n");
}
} else {
if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
FIE.StackOffset - FPVal, FIE.RegOrImm,
FIE.Size)) {
return createFatalBOLTError(
"createSaveToStack: not supported on this platform\n");
}
FIE.Size);
} else {
BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
FIE.StackOffset - FPVal, FIE.RegOrImm, FIE.Size);
}
return NewInst;
}
Expand Down
119 changes: 102 additions & 17 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";

void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncAddress) {
uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) {
const uint64_t BBOutputOffset =
BB.getOutputAddressRange().first - FuncAddress;
BB.getOutputAddressRange().first - FuncOutputAddress;
const uint32_t BBInputOffset = BB.getInputOffset();

// Every output BB must track back to an input BB for profile collection
Expand All @@ -39,6 +40,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
// NB: in `writeEntriesForBB` we use the input address because hashes are
// saved early in `saveMetadata` before output addresses are assigned.
const BBHashMapTy &BBHashMap = getBBHashMap(FuncInputAddress);
(void)BBHashMap;
LLVM_DEBUG(
dbgs() << formatv(" Hash: {0:x}\n", BBHashMap.getBBHash(BBInputOffset)));
LLVM_DEBUG(
dbgs() << formatv(" Index: {0}\n", BBHashMap.getBBIndex(BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
Expand All @@ -55,7 +64,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
assert(OutputAddress && "Unknown instruction address");
const auto OutputOffset = *OutputAddress - FuncAddress;
const auto OutputOffset = *OutputAddress - FuncOutputAddress;

// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
Expand All @@ -72,20 +81,28 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
const uint64_t InputAddress = Function.getAddress();
const uint64_t OutputAddress = Function.getOutputAddress();
// We don't need a translation table if the body of the function hasn't
// changed
if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
continue;

// TBD: handle BAT functions w/multiple entry points.
if (Function.isMultiEntry())
continue;

LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
LLVM_DEBUG(dbgs() << " Address reference: 0x"
<< Twine::utohexstr(Function.getOutputAddress()) << "\n");
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));

MapTy Map;
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, Function.getOutputAddress());
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
Maps.emplace(Function.getOutputAddress(), std::move(Map));
ReverseMap.emplace(OutputAddress, InputAddress);

if (!Function.isSplit())
continue;
Expand All @@ -94,12 +111,12 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << " Cold part\n");
for (const FunctionFragment &FF :
Function.getLayout().getSplitFragments()) {
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
writeEntriesForBB(Map, *BB, FF.getAddress());
writeEntriesForBB(Map, *BB, InputAddress, FF.getAddress());

Maps.emplace(FF.getAddress(), std::move(Map));
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
}
}

Expand All @@ -109,6 +126,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);

BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
<< " function and " << FuncHashes.getNumBasicBlocks()
<< " basic block hashes\n";
}

APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
Expand Down Expand Up @@ -155,6 +175,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
// Only process cold fragments in cold mode, and vice versa.
if (Cold != ColdPartSource.count(Address))
continue;
// NB: in `writeMaps` we use the input address because hashes are saved
// early in `saveMetadata` before output addresses are assigned.
const uint64_t HotInputAddress =
ReverseMap[Cold ? ColdPartSource[Address] : Address];
MapTy &Map = MapEntry.second;
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
Expand All @@ -166,6 +190,15 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
encodeULEB128(HotIndex - PrevIndex, OS);
PrevIndex = HotIndex;
} else {
// Function hash
size_t BFHash = getBFHash(HotInputAddress);
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
OS.write(reinterpret_cast<char *>(&BFHash), 8);
// Number of basic blocks
size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks();
LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n');
encodeULEB128(NumBasicBlocks, OS);
}
encodeULEB128(NumEntries, OS);
// For hot fragments only: encode the number of equal offsets
Expand All @@ -187,8 +220,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
});
}
}
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
// Output and Input addresses and delta-encoded
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint64_t OutputAddress = KeyVal.first + Address;
Expand All @@ -197,6 +232,17 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
if (Index++ >= EqualElems)
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
const bool IsBlock = BBHashMap.isInputBlock(InOffset >> 1);
unsigned BBIndex = IsBlock ? BBHashMap.getBBIndex(InOffset >> 1) : 0;
size_t BBHash = IsBlock ? BBHashMap.getBBHash(InOffset >> 1) : 0;
OS.write(reinterpret_cast<char *>(&BBHash), 8);
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
PrevBBIndex = BBIndex;
LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first,
InOffset >> 1, BBHash, BBIndex));
}
}
}
}
Expand Down Expand Up @@ -239,12 +285,24 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
size_t HotIndex = 0;
for (uint32_t I = 0; I < NumFunctions; ++I) {
const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
uint64_t HotAddress = Cold ? 0 : Address;
PrevAddress = Address;
if (Cold) {
HotIndex += DE.getULEB128(&Offset, &Err);
ColdPartSource.emplace(Address, HotFuncs[HotIndex]);
HotAddress = HotFuncs[HotIndex];
ColdPartSource.emplace(Address, HotAddress);
} else {
HotFuncs.push_back(Address);
// Function hash
const size_t FuncHash = DE.getU64(&Offset, &Err);
FuncHashes.addEntry(Address, FuncHash);
LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
// Number of basic blocks
const size_t NumBasicBlocks = DE.getULEB128(&Offset, &Err);
NumBasicBlocksMap.emplace(Address, NumBasicBlocks);
LLVM_DEBUG(dbgs() << formatv("{0:x}: #bbs {1}, {2} bytes\n", Address,
NumBasicBlocks,
getULEB128Size(NumBasicBlocks)));
}
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
// Equal offsets, hot fragments only.
Expand Down Expand Up @@ -275,6 +333,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << "\n");
uint64_t InputOffset = 0;
size_t BBIndex = 0;
for (uint32_t J = 0; J < NumEntries; ++J) {
const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err);
const uint64_t OutputAddress = PrevAddress + OutputDelta;
Expand All @@ -288,12 +347,27 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
InputOffset += InputDelta;
}
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset));
LLVM_DEBUG(
dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
OutputOffset, InputOffset, OutputDelta,
getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta),
OutputAddress));
size_t BBHash = 0;
size_t BBIndexDelta = 0;
const bool IsBranchEntry = InputOffset & BRANCHENTRY;
if (!IsBranchEntry) {
BBHash = DE.getU64(&Offset, &Err);
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
getBBHashMap(HotAddress).addEntry(InputOffset >> 1, BBIndex, BBHash);
}
LLVM_DEBUG({
dbgs() << formatv(
"{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset,
InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta,
(J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress);
if (!IsBranchEntry) {
dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex,
getULEB128Size(BBIndexDelta));
}
dbgs() << '\n';
});
}
Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
}
Expand All @@ -303,15 +377,24 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
const size_t NumTables = Maps.size();
OS << "BAT tables for " << NumTables << " functions:\n";
for (const auto &MapEntry : Maps) {
OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
const uint64_t Address = MapEntry.first;
const uint64_t HotAddress = fetchParentAddress(Address);
OS << "Function Address: 0x" << Twine::utohexstr(Address);
if (HotAddress == 0)
OS << formatv(", hash: {0:x}", getBFHash(Address));
OS << "\n";
OS << "BB mappings:\n";
const BBHashMapTy &BBHashMap =
getBBHashMap(HotAddress ? HotAddress : Address);
for (const auto &Entry : MapEntry.second) {
const bool IsBranch = Entry.second & BRANCHENTRY;
const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
<< "0x" << Twine::utohexstr(Val);
if (IsBranch)
OS << " (branch)";
else
OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val));
OS << "\n";
}
OS << "\n";
Expand Down Expand Up @@ -432,12 +515,14 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
continue;
// Prepare function and block hashes
FuncHashes[BF.getAddress()].first = BF.computeHash();
FuncHashes.addEntry(BF.getAddress(), BF.computeHash());
BF.computeBlockHashes();
BBHashMapTy &BBHashMap = getBBHashMap(BF.getAddress());
// Set BF/BB metadata
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(),
BB.getHash());
BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
}
}

} // namespace bolt
} // namespace llvm
114 changes: 110 additions & 4 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/Heatmap.h"
#include "bolt/Profile/YAMLProfileWriter.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
Expand Down Expand Up @@ -85,6 +86,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));

extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
extern cl::opt<std::string> SaveProfile;

cl::opt<bool> ReadPreAggregated(
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
Expand Down Expand Up @@ -594,10 +596,21 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
convertBranchData(Function);
}

if (opts::AggregateOnly &&
opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata) {
if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
report_error("cannot create output data file", EC);
if (opts::AggregateOnly) {
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_Fdata)
if (std::error_code EC = writeAggregatedFile(opts::OutputFilename))
report_error("cannot create output data file", EC);

// BAT YAML is handled by DataAggregator since normal YAML output requires
// CFG which is not available in BAT mode.
if (usesBAT()) {
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
report_error("cannot create output data file", EC);
if (!opts::SaveProfile.empty())
if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
report_error("cannot create output data file", EC);
}
}

return Error::success();
Expand Down Expand Up @@ -2258,6 +2271,99 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
return std::error_code();
}

std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const {
std::error_code EC;
raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return EC;

yaml::bolt::BinaryProfile BP;

// Fill out the header info.
BP.Header.Version = 1;
BP.Header.FileName = std::string(BC.getFilename());
std::optional<StringRef> BuildID = BC.getFileBuildID();
BP.Header.Id = BuildID ? std::string(*BuildID) : "<unknown>";
BP.Header.Origin = std::string(getReaderName());
// Only the input binary layout order is supported.
BP.Header.IsDFSOrder = false;
// FIXME: Need to match hash function used to produce BAT hashes.
BP.Header.HashFunction = HashFunction::Default;

ListSeparator LS(",");
raw_string_ostream EventNamesOS(BP.Header.EventNames);
for (const StringMapEntry<std::nullopt_t> &EventEntry : EventNames)
EventNamesOS << LS << EventEntry.first().str();

BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;

if (!opts::BasicAggregation) {
// Convert profile for functions not covered by BAT
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &Function = BFI.second;
if (!Function.hasProfile())
continue;
if (BAT->isBATFunction(Function.getAddress()))
continue;
BP.Functions.emplace_back(
YAMLProfileWriter::convert(Function, /*UseDFS=*/false));
}

for (const auto &KV : NamesToBranches) {
const StringRef FuncName = KV.first;
const FuncBranchData &Branches = KV.second;
yaml::bolt::BinaryFunctionProfile YamlBF;
BinaryData *BD = BC.getBinaryDataByName(FuncName);
assert(BD);
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
assert(BF);
YamlBF.Name = FuncName.str();
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);

auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t SuccOffset, unsigned SuccDataIdx) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BlockMap.getBBIndex(SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
YamlBB.Successors.emplace_back(SI);
};

for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
if (!BlockMap.isInputBlock(FromOffset))
continue;
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
if (YamlBB.ExecCount || !YamlBB.Successors.empty())
YamlBF.Blocks.emplace_back(YamlBB);
}
BP.Functions.emplace_back(YamlBF);
}
}

// Write the profile.
yaml::Output Out(OutFile, nullptr, 0);
Out << BP;
return std::error_code();
}

void DataAggregator::dump() const { DataReader::dump(); }

void DataAggregator::dump(const LBREntry &LBR) const {
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,9 @@ void assignProfile(BinaryFunction &BF,

bool YAMLProfileReader::inferStaleProfile(
BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {
if (!BF.hasCFG())
return false;

LLVM_DEBUG(dbgs() << "BOLT-INFO: applying profile inference for "
<< "\"" << BF.getPrintName() << "\"\n");

Expand Down
35 changes: 13 additions & 22 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
Expand All @@ -26,15 +25,15 @@ extern llvm::cl::opt<bool> ProfileUseDFS;
namespace llvm {
namespace bolt {

namespace {
void convert(const BinaryFunction &BF,
yaml::bolt::BinaryFunctionProfile &YamlBF) {
yaml::bolt::BinaryFunctionProfile
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();

const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;

// Prepare function and block hashes
BF.computeHash(opts::ProfileUseDFS);
BF.computeHash(UseDFS);
BF.computeBlockHashes();

YamlBF.Name = BF.getPrintName();
Expand All @@ -44,7 +43,7 @@ void convert(const BinaryFunction &BF,
YamlBF.ExecCount = BF.getKnownExecutionCount();

BinaryFunction::BasicBlockOrderType Order;
llvm::copy(opts::ProfileUseDFS ? BF.dfs() : BF.getLayout().blocks(),
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
std::back_inserter(Order));

for (const BinaryBasicBlock *BB : Order) {
Expand Down Expand Up @@ -106,20 +105,14 @@ void convert(const BinaryFunction &BF,
TargetName = Callee->getOneName();
}

auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull);
};
if (BC.MIB->getConditionalTailCall(Instr)) {
auto CTCCount =
BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "CTCTakenCount");
if (CTCCount) {
CSI.Count = *CTCCount;
auto CTCMispreds =
BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "CTCMispredCount");
if (CTCMispreds)
CSI.Mispreds = *CTCMispreds;
}
CSI.Count = getAnnotationWithDefault(Instr, "CTCTakenCount");
CSI.Mispreds = getAnnotationWithDefault(Instr, "CTCMispredCount");
} else {
auto Count = BC.MIB->tryGetAnnotationAs<uint64_t>(Instr, "Count");
if (Count)
CSI.Count = *Count;
CSI.Count = getAnnotationWithDefault(Instr, "Count");
}

if (CSI.Count)
Expand Down Expand Up @@ -165,8 +158,8 @@ void convert(const BinaryFunction &BF,

YamlBF.Blocks.emplace_back(YamlBB);
}
return YamlBF;
}
} // end anonymous namespace

std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
const BinaryContext &BC = RI.getBinaryContext();
Expand Down Expand Up @@ -222,9 +215,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) {
if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource())
continue;

yaml::bolt::BinaryFunctionProfile YamlBF;
convert(BF, YamlBF);
BP.Functions.emplace_back(YamlBF);
BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS));
}
}

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static cl::opt<bool> JTFootprintReductionFlag(
"instructions at jump sites"),
cl::cat(BoltOptCategory));

static cl::opt<bool>
cl::opt<bool>
KeepNops("keep-nops",
cl::desc("keep no-op instructions. By default they are removed."),
cl::Hidden, cl::cat(BoltOptCategory));
Expand Down
49 changes: 35 additions & 14 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,9 @@ class DIEStreamer : public DwarfStreamer {
DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter,
DWARFLinkerBase::OutputFileType OutFileType,
raw_pwrite_stream &OutFile,
std::function<StringRef(StringRef Input)> Translator,
DWARFLinkerBase::MessageHandlerTy Warning)
: DwarfStreamer(OutFileType, OutFile, Translator, Warning),
DIEBldr(DIEBldr), Rewriter(Rewriter){};
: DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr),
Rewriter(Rewriter){};

using DwarfStreamer::emitCompileUnitHeader;

Expand Down Expand Up @@ -376,12 +375,11 @@ static cl::opt<bool> AlwaysConvertToRanges(
extern cl::opt<std::string> CompDirOverride;
} // namespace opts

static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
uint64_t &LowPC, uint64_t &HighPC,
uint64_t &SectionIndex) {
/// If DW_AT_low_pc exists sets LowPC and returns true.
static bool getLowPC(const DIE &Die, const DWARFUnit &DU, uint64_t &LowPC,
uint64_t &SectionIndex) {
DIEValue DvalLowPc = Die.findAttribute(dwarf::DW_AT_low_pc);
DIEValue DvalHighPc = Die.findAttribute(dwarf::DW_AT_high_pc);
if (!DvalLowPc || !DvalHighPc)
if (!DvalLowPc)
return false;

dwarf::Form Form = DvalLowPc.getForm();
Expand All @@ -404,14 +402,39 @@ static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
LowPC = LowPcValue;
SectionIndex = 0;
}
return true;
}

/// If DW_AT_high_pc exists sets HighPC and returns true.
static bool getHighPC(const DIE &Die, const uint64_t LowPC, uint64_t &HighPC) {
DIEValue DvalHighPc = Die.findAttribute(dwarf::DW_AT_high_pc);
if (!DvalHighPc)
return false;
if (DvalHighPc.getForm() == dwarf::DW_FORM_addr)
HighPC = DvalHighPc.getDIEInteger().getValue();
else
HighPC = LowPC + DvalHighPc.getDIEInteger().getValue();

return true;
}

/// If DW_AT_low_pc and DW_AT_high_pc exist sets LowPC and HighPC and returns
/// true.
static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
uint64_t &LowPC, uint64_t &HighPC,
uint64_t &SectionIndex) {
uint64_t TempLowPC = LowPC;
uint64_t TempHighPC = HighPC;
uint64_t TempSectionIndex = SectionIndex;
if (getLowPC(Die, DU, TempLowPC, TempSectionIndex) &&
getHighPC(Die, TempLowPC, TempHighPC)) {
LowPC = TempLowPC;
HighPC = TempHighPC;
SectionIndex = TempSectionIndex;
return true;
}
return false;
}

static Expected<llvm::DWARFAddressRangesVector>
getDIEAddressRanges(const DIE &Die, DWARFUnit &DU) {
uint64_t LowPC, HighPC, Index;
Expand Down Expand Up @@ -469,7 +492,6 @@ createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile,

std::unique_ptr<DIEStreamer> Streamer = std::make_unique<DIEStreamer>(
&DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile,
[](StringRef Input) -> StringRef { return Input; },
[&](const Twine &Warning, StringRef Context, const DWARFDie *) {});
Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName);
if (Err)
Expand Down Expand Up @@ -1250,10 +1272,9 @@ void DWARFRewriter::updateUnitDebugInfo(
}
}
} else if (LowPCAttrInfo) {
const std::optional<uint64_t> Result =
LowPCAttrInfo.getDIEInteger().getValue();
if (Result.has_value()) {
const uint64_t Address = Result.value();
uint64_t Address = 0;
uint64_t SectionIndex = 0;
if (getLowPC(*Die, Unit, Address, SectionIndex)) {
uint64_t NewAddress = 0;
if (const BinaryFunction *Function =
BC.getBinaryFunctionContainingAddress(Address)) {
Expand Down
560 changes: 526 additions & 34 deletions bolt/lib/Rewrite/LinuxKernelRewriter.cpp

Large diffs are not rendered by default.

27 changes: 14 additions & 13 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> KeepNops;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TimeBuild;
Expand Down Expand Up @@ -199,10 +200,7 @@ static cl::opt<cl::boolOrDefault> RelocationMode(
"relocs", cl::desc("use relocations in the binary (default=autodetect)"),
cl::cat(BoltCategory));

static cl::opt<std::string>
SaveProfile("w",
cl::desc("save recorded profile to a file"),
cl::cat(BoltOutputCategory));
extern cl::opt<std::string> SaveProfile;

static cl::list<std::string>
SkipFunctionNames("skip-funcs",
Expand Down Expand Up @@ -732,6 +730,13 @@ Error RewriteInstance::run() {
// Skip disassembling if we have a translation table and we are running an
// aggregation job.
if (opts::AggregateOnly && BAT->enabledFor(InputFile)) {
// YAML profile in BAT mode requires CFG for .bolt.org.text functions
if (!opts::SaveProfile.empty() ||
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
selectFunctionsToProcess();
disassembleFunctions();
buildFunctionsCFG();
}
processProfileData();
return Error::success();
}
Expand Down Expand Up @@ -2028,13 +2033,8 @@ void RewriteInstance::adjustCommandLineOptions() {
if (opts::Lite)
BC->outs() << "BOLT-INFO: enabling lite mode\n";

if (!opts::SaveProfile.empty() && BAT->enabledFor(InputFile)) {
BC->errs()
<< "BOLT-ERROR: unable to save profile in YAML format for input "
"file processed by BOLT. Please remove -w option and use branch "
"profile.\n";
exit(1);
}
if (BC->IsLinuxKernel && !opts::KeepNops.getNumOccurrences())
opts::KeepNops = true;
}

namespace {
Expand Down Expand Up @@ -3126,12 +3126,13 @@ void RewriteInstance::processProfileData() {
}
}

if (!opts::SaveProfile.empty()) {
if (!opts::SaveProfile.empty() && !BAT->enabledFor(InputFile)) {
YAMLProfileWriter PW(opts::SaveProfile);
PW.writeProfile(*this);
}
if (opts::AggregateOnly &&
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML &&
!BAT->enabledFor(InputFile)) {
YAMLProfileWriter PW(opts::OutputFilename);
PW.writeProfile(*this);
}
Expand Down
24 changes: 9 additions & 15 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Code;
}

bool createTailCall(MCInst &Inst, const MCSymbol *Target,
void createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
}
Expand All @@ -1036,11 +1036,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
}

bool createTrap(MCInst &Inst) const override {
void createTrap(MCInst &Inst) const override {
Inst.clear();
Inst.setOpcode(AArch64::BRK);
Inst.addOperand(MCOperand::createImm(1));
return true;
}

bool convertJmpToTailCall(MCInst &Inst) override {
Expand Down Expand Up @@ -1068,16 +1067,15 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.getOperand(0).getImm() == 0;
}

bool createNoop(MCInst &Inst) const override {
void createNoop(MCInst &Inst) const override {
Inst.setOpcode(AArch64::HINT);
Inst.clear();
Inst.addOperand(MCOperand::createImm(0));
return true;
}

bool mayStore(const MCInst &Inst) const override { return false; }

bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
void createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
bool IsTailCall) override {
Inst.setOpcode(IsTailCall ? AArch64::B : AArch64::BL);
Inst.clear();
Expand All @@ -1086,7 +1084,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
*Ctx, 0)));
if (IsTailCall)
convertJmpToTailCall(Inst);
return true;
}

bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
Expand Down Expand Up @@ -1293,14 +1290,13 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return true;
}

bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
Inst.setOpcode(AArch64::B);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
Inst, MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx),
*Ctx, 0)));
return true;
}

bool shouldRecordCodeRelocation(uint64_t RelType) const override {
Expand Down Expand Up @@ -1353,14 +1349,13 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return StringRef("\0\0\0\0", 4);
}

bool createReturn(MCInst &Inst) const override {
void createReturn(MCInst &Inst) const override {
Inst.setOpcode(AArch64::RET);
Inst.clear();
Inst.addOperand(MCOperand::createReg(AArch64::LR));
return true;
}

bool createStackPointerIncrement(
void createStackPointerIncrement(
MCInst &Inst, int Size,
bool NoFlagsClobber = false /*unused for AArch64*/) const override {
Inst.setOpcode(AArch64::SUBXri);
Expand All @@ -1369,10 +1364,9 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createReg(AArch64::SP));
Inst.addOperand(MCOperand::createImm(Size));
Inst.addOperand(MCOperand::createImm(0));
return true;
}

bool createStackPointerDecrement(
void createStackPointerDecrement(
MCInst &Inst, int Size,
bool NoFlagsClobber = false /*unused for AArch64*/) const override {
Inst.setOpcode(AArch64::ADDXri);
Expand All @@ -1381,12 +1375,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createReg(AArch64::SP));
Inst.addOperand(MCOperand::createImm(Size));
Inst.addOperand(MCOperand::createImm(0));
return true;
}

void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
int64_t Disp) const {
Inst.setOpcode(AArch64::BR);
Inst.clear();
Inst.addOperand(MCOperand::createReg(MemBaseReg));
}

Expand Down
13 changes: 5 additions & 8 deletions bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,46 +219,43 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
return true;
}

bool createReturn(MCInst &Inst) const override {
void createReturn(MCInst &Inst) const override {
// TODO "c.jr ra" when RVC is enabled
Inst.setOpcode(RISCV::JALR);
Inst.clear();
Inst.addOperand(MCOperand::createReg(RISCV::X0));
Inst.addOperand(MCOperand::createReg(RISCV::X1));
Inst.addOperand(MCOperand::createImm(0));
return true;
}

bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
Inst.setOpcode(RISCV::JAL);
Inst.clear();
Inst.addOperand(MCOperand::createReg(RISCV::X0));
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)));
return true;
}

StringRef getTrapFillValue() const override {
return StringRef("\0\0\0\0", 4);
}

bool createCall(unsigned Opcode, MCInst &Inst, const MCSymbol *Target,
void createCall(unsigned Opcode, MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) {
Inst.setOpcode(Opcode);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(RISCVMCExpr::create(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
RISCVMCExpr::VK_RISCV_CALL, *Ctx)));
return true;
}

bool createCall(MCInst &Inst, const MCSymbol *Target,
void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
return createCall(RISCV::PseudoCALL, Inst, Target, Ctx);
}

bool createTailCall(MCInst &Inst, const MCSymbol *Target,
void createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
return createCall(RISCV::PseudoTAIL, Inst, Target, Ctx);
}
Expand Down
92 changes: 59 additions & 33 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ class X86MCPlusBuilder : public MCPlusBuilder {
}

bool isUnsupportedBranch(const MCInst &Inst) const override {
if (isDynamicBranch(Inst))
return true;

switch (Inst.getOpcode()) {
default:
return false;
Expand Down Expand Up @@ -2230,7 +2233,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return true;
}

bool createStackPointerIncrement(MCInst &Inst, int Size,
void createStackPointerIncrement(MCInst &Inst, int Size,
bool NoFlagsClobber) const override {
if (NoFlagsClobber) {
Inst.setOpcode(X86::LEA64r);
Expand All @@ -2241,17 +2244,16 @@ class X86MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
Inst.addOperand(MCOperand::createImm(-Size)); // Displacement
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
return true;
return;
}
Inst.setOpcode(X86::SUB64ri8);
Inst.clear();
Inst.addOperand(MCOperand::createReg(X86::RSP));
Inst.addOperand(MCOperand::createReg(X86::RSP));
Inst.addOperand(MCOperand::createImm(Size));
return true;
}

bool createStackPointerDecrement(MCInst &Inst, int Size,
void createStackPointerDecrement(MCInst &Inst, int Size,
bool NoFlagsClobber) const override {
if (NoFlagsClobber) {
Inst.setOpcode(X86::LEA64r);
Expand All @@ -2262,22 +2264,22 @@ class X86MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
Inst.addOperand(MCOperand::createImm(Size)); // Displacement
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
return true;
return;
}
Inst.setOpcode(X86::ADD64ri8);
Inst.clear();
Inst.addOperand(MCOperand::createReg(X86::RSP));
Inst.addOperand(MCOperand::createReg(X86::RSP));
Inst.addOperand(MCOperand::createImm(Size));
return true;
}

bool createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg, int Offset,
void createSaveToStack(MCInst &Inst, const MCPhysReg &StackReg, int Offset,
const MCPhysReg &SrcReg, int Size) const override {
unsigned NewOpcode;
switch (Size) {
default:
return false;
llvm_unreachable("Invalid operand size");
return;
case 2: NewOpcode = X86::MOV16mr; break;
case 4: NewOpcode = X86::MOV32mr; break;
case 8: NewOpcode = X86::MOV64mr; break;
Expand All @@ -2290,25 +2292,25 @@ class X86MCPlusBuilder : public MCPlusBuilder {
Inst.addOperand(MCOperand::createImm(Offset)); // Displacement
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
Inst.addOperand(MCOperand::createReg(SrcReg));
return true;
}

bool createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
void createRestoreFromStack(MCInst &Inst, const MCPhysReg &StackReg,
int Offset, const MCPhysReg &DstReg,
int Size) const override {
return createLoad(Inst, StackReg, /*Scale=*/1, /*IndexReg=*/X86::NoRegister,
Offset, nullptr, /*AddrSegmentReg=*/X86::NoRegister,
DstReg, Size);
}

bool createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
void createLoad(MCInst &Inst, const MCPhysReg &BaseReg, int64_t Scale,
const MCPhysReg &IndexReg, int64_t Offset,
const MCExpr *OffsetExpr, const MCPhysReg &AddrSegmentReg,
const MCPhysReg &DstReg, int Size) const override {
unsigned NewOpcode;
switch (Size) {
default:
return false;
llvm_unreachable("Invalid operand size");
return;
case 2: NewOpcode = X86::MOV16rm; break;
case 4: NewOpcode = X86::MOV32rm; break;
case 8: NewOpcode = X86::MOV64rm; break;
Expand All @@ -2324,7 +2326,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
else
Inst.addOperand(MCOperand::createImm(Offset)); // Displacement
Inst.addOperand(MCOperand::createReg(AddrSegmentReg)); // AddrSegmentReg
return true;
}

InstructionListType createLoadImmediate(const MCPhysReg Dest,
Expand All @@ -2338,7 +2339,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return Insts;
}

bool createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
void createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
const MCOperand &BaseReg, const MCOperand &Scale,
const MCOperand &IndexReg, const MCOperand &Offset,
const MCOperand &TmpReg) const override {
Expand All @@ -2362,17 +2363,16 @@ class X86MCPlusBuilder : public MCPlusBuilder {

Insts.push_back(Load);
Insts.push_back(IJmp);
return true;
}

bool createNoop(MCInst &Inst) const override {
void createNoop(MCInst &Inst) const override {
Inst.setOpcode(X86::NOOP);
return true;
Inst.clear();
}

bool createReturn(MCInst &Inst) const override {
void createReturn(MCInst &Inst) const override {
Inst.setOpcode(X86::RET64);
return true;
Inst.clear();
}

InstructionListType createInlineMemcpy(bool ReturnEnd) const override {
Expand Down Expand Up @@ -2729,23 +2729,32 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return FoundOne;
}

bool createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
void createUncondBranch(MCInst &Inst, const MCSymbol *TBB,
MCContext *Ctx) const override {
Inst.clear();
Inst.setOpcode(X86::JMP_1);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(TBB, MCSymbolRefExpr::VK_None, *Ctx)));
return true;
}

bool createCall(MCInst &Inst, const MCSymbol *Target,
void createLongUncondBranch(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) const override {
Inst.setOpcode(X86::JMP_4);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
}

void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
Inst.setOpcode(X86::CALL64pcrel32);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
return true;
}

bool createTailCall(MCInst &Inst, const MCSymbol *Target,
void createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
}
Expand All @@ -2757,10 +2766,27 @@ class X86MCPlusBuilder : public MCPlusBuilder {
createDirectCall(Seq.back(), Target, Ctx, /*IsTailCall*/ true);
}

bool createTrap(MCInst &Inst) const override {
void createTrap(MCInst &Inst) const override {
Inst.clear();
Inst.setOpcode(X86::TRAP);
return true;
}

void createCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC,
MCContext *Ctx) const override {
Inst.setOpcode(X86::JCC_1);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
Inst.addOperand(MCOperand::createImm(CC));
}

void createLongCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC,
MCContext *Ctx) const override {
Inst.setOpcode(X86::JCC_4);
Inst.clear();
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
Inst.addOperand(MCOperand::createImm(CC));
}

bool reverseBranchCondition(MCInst &Inst, const MCSymbol *TBB,
Expand Down Expand Up @@ -2862,15 +2888,14 @@ class X86MCPlusBuilder : public MCPlusBuilder {
Inst.setOpcode(X86::LFENCE);
}

bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
void createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
bool IsTailCall) override {
Inst.clear();
Inst.setOpcode(IsTailCall ? X86::JMP_4 : X86::CALL64pcrel32);
Inst.addOperand(MCOperand::createExpr(
MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
if (IsTailCall)
setTailCall(Inst);
return true;
}

void createShortJmp(InstructionListType &Seq, const MCSymbol *Target,
Expand Down Expand Up @@ -3066,6 +3091,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
void createSwap(MCInst &Inst, MCPhysReg Source, MCPhysReg MemBaseReg,
int64_t Disp) const {
Inst.setOpcode(X86::XCHG64rm);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Source));
Inst.addOperand(MCOperand::createReg(Source));
Inst.addOperand(MCOperand::createReg(MemBaseReg)); // BaseReg
Expand All @@ -3078,6 +3104,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
int64_t Disp) const {
Inst.setOpcode(X86::JMP64m);
Inst.clear();
Inst.addOperand(MCOperand::createReg(MemBaseReg)); // BaseReg
Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
Expand Down Expand Up @@ -3540,9 +3567,10 @@ class X86MCPlusBuilder : public MCPlusBuilder {
}

private:
bool createMove(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
void createMove(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
MCContext *Ctx) const {
Inst.setOpcode(X86::MOV64rm);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg
Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt
Expand All @@ -3551,13 +3579,12 @@ class X86MCPlusBuilder : public MCPlusBuilder {
MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
*Ctx))); // Displacement
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg

return true;
}

bool createLea(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
void createLea(MCInst &Inst, const MCSymbol *Src, unsigned Reg,
MCContext *Ctx) const {
Inst.setOpcode(X86::LEA64r);
Inst.clear();
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg
Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt
Expand All @@ -3566,7 +3593,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
MCSymbolRefExpr::create(Src, MCSymbolRefExpr::VK_None,
*Ctx))); // Displacement
Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
return true;
}
};

Expand Down
4 changes: 4 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ cl::opt<ProfileFormatKind> ProfileFormat(
clEnumValN(PF_YAML, "yaml", "dense YAML representation")),
cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));

cl::opt<std::string> SaveProfile("w",
cl::desc("save recorded profile to a file"),
cl::cat(BoltOutputCategory));

cl::opt<bool> SplitEH("split-eh", cl::desc("split C++ exception handling code"),
cl::Hidden, cl::cat(BoltOptCategory));

Expand Down
81 changes: 81 additions & 0 deletions bolt/test/X86/Inputs/blarge_new.preagg.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
B 40164b 401608 109 0
B 401611 4017e0 115 0
B 4017f0 401616 117 0
B 401ba2 4015da 6 0
B 4015d5 401b60 1 0
B 40159a 401b60 5 0
B 401b9d 401b70 615 2
B 401b90 401b99 344 37
B 401ba2 40159f 8 0
B 4015b0 401070 9 0
B 401544 4014a0 6 0
B 40188a 401928 5 0
B 40152a 4014b0 21 0
B 40169e 40165b 2 0
B 4014dd 401070 12 1
B 401509 4014ec 2 2
B 401510 401030 673 0
B 4019de 401080 1 0
B 401500 401070 22 0
B 401921 4014d6 9 0
B 4019b3 401080 3 0
B 40162d 401070 113 0
B 4014d1 401800 27 0
B 401a3f 401080 1 0
B 4018d2 401050 17 0
B 401664 4017c0 2 0
B 401680 401070 2 0
B 4017d0 401669 2 0
B 4018f7 40190d 9 0
B 4015bc 401592 6 0
B 401964 401090 5 0
B 4015f8 4015cd 1 0
B 4015ec 401070 6 0
F 40165b 401664 2
F 4017c0 4017d0 2
F 401669 401680 2
F 40190d 401921 9
F 4014d6 4014dd 9
F 401800 4018d2 17
F 4018d7 4018f7 9
F 40159f 4015b0 8
F 401515 401544 6
F 401070 401500 1
F 401070 401070 157
F 4014a0 4014d1 6
F 401616 40162d 112
F 4019e3 401a3f 1
F 4014e2 401500 19
F 401090 401090 5
F 401030 401030 673
F 401505 401510 668
F 401616 4017f0 2
F 401070 4015b0 1
F 4015da 4015ec 6
F 401b60 401b90 6
F 4019b8 4019de 1
F 401969 4019b3 3
F 401505 401509 2
F 401515 40152a 21
F 401592 40159a 4
F 401050 401050 17
F 4015cd 4015d5 1
F 401070 4014dd 1
F 401b99 401ba2 8
F 401b70 401b90 326
F 401b99 401b9d 324
F 401592 4015bc 1
F 401608 401611 109
F 401b70 401b9d 268
F 4015b5 4015bc 5
F 401b99 401b90 1
F 401b70 401ba2 5
F 401632 40164b 108
F 401080 401080 5
F 4014b0 4014d1 21
F 4017e0 4017f0 115
F 4015f1 4015f8 1
F 401685 40169e 2
F 401928 401964 5
F 401800 40188a 5
F 4014ec 401500 2
Loading