18 changes: 8 additions & 10 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ class BinaryFunction {
/// different parameters by every pass.
mutable uint64_t Hash{0};

/// Function GUID assigned externally.
uint64_t GUID{0};

/// For PLT functions it contains a symbol associated with a function
/// reference. It is nullptr for non-PLT functions.
const MCSymbol *PLTSymbol{nullptr};
Expand Down Expand Up @@ -1790,11 +1793,6 @@ class BinaryFunction {
return ParentFragments.contains(&Other);
}

/// Returns if this function is a parent of \p Other function.
bool isParentOf(const BinaryFunction &Other) const {
return Fragments.contains(&Other);
}

/// Return the child fragment form parent function
iterator_range<FragmentsSetTy::const_iterator> getFragments() const {
return iterator_range<FragmentsSetTy::const_iterator>(Fragments.begin(),
Expand All @@ -1804,11 +1802,6 @@ class BinaryFunction {
/// Return the parent function for split function fragments.
FragmentsSetTy *getParentFragments() { return &ParentFragments; }

/// Returns if this function is a parent or child of \p Other function.
bool isParentOrChildOf(const BinaryFunction &Other) const {
return isChildOf(Other) || isParentOf(Other);
}

/// Set the profile data for the number of times the function was called.
BinaryFunction &setExecutionCount(uint64_t Count) {
ExecutionCount = Count;
Expand Down Expand Up @@ -2256,6 +2249,11 @@ class BinaryFunction {
/// Returns the last computed hash value of the function.
size_t getHash() const { return Hash; }

/// Returns the function GUID.
uint64_t getGUID() const { return GUID; }

void setGUID(uint64_t Id) { GUID = Id; }

using OperandHashFuncTy =
function_ref<typename std::string(const MCOperand &)>;

Expand Down
12 changes: 2 additions & 10 deletions bolt/include/bolt/Core/DIEBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,6 @@ class DIEBuilder {

/// Returns current state of the DIEBuilder
State &getState() { return *BuilderState.get(); }
/// Resolve the reference in DIE, if target is not loaded into IR,
/// pre-allocate it. \p RefCU will be updated to the Unit specific by \p
/// RefValue.
DWARFDie resolveDIEReference(
const DWARFFormValue &RefValue,
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
DWARFUnit *&RefCU, DWARFDebugInfoEntry &DwarfDebugInfoEntry);

/// Resolve the reference in DIE, if target is not loaded into IR,
/// pre-allocate it. \p RefCU will be updated to the Unit specific by \p
Expand All @@ -165,10 +158,9 @@ class DIEBuilder {
const DWARFFormValue &Val);

/// Clone an attribute in reference format.
void cloneDieReferenceAttribute(
void cloneDieOffsetReferenceAttribute(
DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE,
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
const DWARFFormValue &Val);
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref);

/// Clone an attribute in block format.
void cloneBlockAttribute(
Expand Down
64 changes: 44 additions & 20 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class DebugRangeListsSectionWriter : public DebugRangesSectionWriter {
};
virtual ~DebugRangeListsSectionWriter(){};

static void setAddressWriter(DebugAddrWriter *AddrW) { AddrWriter = AddrW; }
void setAddressWriter(DebugAddrWriter *AddrW) { AddrWriter = AddrW; }

/// Add ranges with caching.
uint64_t addRanges(
Expand Down Expand Up @@ -284,7 +284,7 @@ class DebugRangeListsSectionWriter : public DebugRangesSectionWriter {
}

private:
static DebugAddrWriter *AddrWriter;
DebugAddrWriter *AddrWriter = nullptr;
/// Used to find unique CU ID.
DWARFUnit *CU;
/// Current relative offset of range list entry within this CUs rangelist
Expand Down Expand Up @@ -336,21 +336,36 @@ using AddressSectionBuffer = SmallVector<char, 4>;
class DebugAddrWriter {
public:
DebugAddrWriter() = delete;
DebugAddrWriter(BinaryContext *BC_);
DebugAddrWriter(BinaryContext *BC_) : DebugAddrWriter(BC_, UCHAR_MAX) {};
DebugAddrWriter(BinaryContext *BC_, uint8_t AddressByteSize);
virtual ~DebugAddrWriter(){};
/// Given an address returns an index in .debug_addr.
/// Adds Address to map.
uint32_t getIndexFromAddress(uint64_t Address, DWARFUnit &CU);

/// Write out entries in to .debug_addr section for CUs.
virtual void update(DIEBuilder &DIEBlder, DWARFUnit &CUs);
virtual std::optional<uint64_t> finalize(const size_t BufferSize);

/// Return buffer with all the entries in .debug_addr already writen out using
/// update(...).
virtual AddressSectionBuffer &finalize() { return *Buffer; }
virtual std::unique_ptr<AddressSectionBuffer> releaseBuffer() {
return std::move(Buffer);
}

/// Returns buffer size.
virtual size_t getBufferSize() const { return Buffer->size(); }

/// Returns True if Buffer is not empty.
bool isInitialized() const { return !Buffer->empty(); }

/// Returns False if .debug_addr section was created..
bool isInitialized() const { return !AddressMaps.empty(); }
/// Updates address base with the given Offset.
virtual void updateAddrBase(DIEBuilder &DIEBlder, DWARFUnit &CU,
const uint64_t Offset);

/// Appends an AddressSectionBuffer to the address writer's buffer.
void appendToAddressBuffer(const AddressSectionBuffer &Buffer) {
*AddressStream << Buffer;
}

protected:
class AddressForDWOCU {
Expand Down Expand Up @@ -407,23 +422,32 @@ class DebugAddrWriter {
}

BinaryContext *BC;
/// Maps DWOID to AddressForDWOCU.
std::unordered_map<uint64_t, AddressForDWOCU> AddressMaps;
/// Address for the DWO CU associated with the address writer.
AddressForDWOCU Map;
uint8_t AddressByteSize;
/// Mutex used for parallel processing of debug info.
std::mutex WriterMutex;
std::unique_ptr<AddressSectionBuffer> Buffer;
std::unique_ptr<raw_svector_ostream> AddressStream;
/// Used to track sections that were not modified so that they can be re-used.
DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets;
static DenseMap<uint64_t, uint64_t> UnmodifiedAddressOffsets;
};

class DebugAddrWriterDwarf5 : public DebugAddrWriter {
public:
DebugAddrWriterDwarf5() = delete;
DebugAddrWriterDwarf5(BinaryContext *BC) : DebugAddrWriter(BC) {}
DebugAddrWriterDwarf5(BinaryContext *BC, uint8_t AddressByteSize,
std::optional<uint64_t> AddrOffsetSectionBase)
: DebugAddrWriter(BC, AddressByteSize),
AddrOffsetSectionBase(AddrOffsetSectionBase) {}

/// Write out entries in to .debug_addr section for CUs.
virtual void update(DIEBuilder &DIEBlder, DWARFUnit &CUs) override;
virtual std::optional<uint64_t> finalize(const size_t BufferSize) override;

/// Updates address base with the given Offset.
virtual void updateAddrBase(DIEBuilder &DIEBlder, DWARFUnit &CU,
const uint64_t Offset) override;

protected:
/// Given DWARFUnit \p Unit returns either DWO ID or it's offset within
Expand All @@ -435,6 +459,10 @@ class DebugAddrWriterDwarf5 : public DebugAddrWriter {
}
return Unit.getOffset();
}

private:
std::optional<uint64_t> AddrOffsetSectionBase = std::nullopt;
static constexpr uint32_t HeaderSize = 8;
};

/// This class is NOT thread safe.
Expand Down Expand Up @@ -583,12 +611,10 @@ class DebugLoclistWriter : public DebugLocWriter {
public:
~DebugLoclistWriter() {}
DebugLoclistWriter() = delete;
DebugLoclistWriter(DWARFUnit &Unit, uint8_t DV, bool SD)
: DebugLocWriter(DV, LocWriterKind::DebugLoclistWriter), CU(Unit),
IsSplitDwarf(SD) {
assert(DebugLoclistWriter::AddrWriter &&
"Please use SetAddressWriter to initialize "
"DebugAddrWriter before instantiation.");
DebugLoclistWriter(DWARFUnit &Unit, uint8_t DV, bool SD,
DebugAddrWriter &AddrW)
: DebugLocWriter(DV, LocWriterKind::DebugLoclistWriter),
AddrWriter(AddrW), CU(Unit), IsSplitDwarf(SD) {
if (DwarfVersion >= 5) {
LocBodyBuffer = std::make_unique<DebugBufferVector>();
LocBodyStream = std::make_unique<raw_svector_ostream>(*LocBodyBuffer);
Expand All @@ -600,8 +626,6 @@ class DebugLoclistWriter : public DebugLocWriter {
}
}

static void setAddressWriter(DebugAddrWriter *AddrW) { AddrWriter = AddrW; }

/// Stores location lists internally to be written out during finalize phase.
virtual void addList(DIEBuilder &DIEBldr, DIE &Die, DIEValue &AttrInfo,
DebugLocationsVector &LocList) override;
Expand Down Expand Up @@ -630,7 +654,7 @@ class DebugLoclistWriter : public DebugLocWriter {
/// Writes out locations in to a local buffer and applies debug info patches.
void finalizeDWARF5(DIEBuilder &DIEBldr, DIE &Die);

static DebugAddrWriter *AddrWriter;
DebugAddrWriter &AddrWriter;
DWARFUnit &CU;
bool IsSplitDwarf{false};
// Used for DWARF5 to store location lists before being finalized.
Expand Down
13 changes: 7 additions & 6 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ enum class IndirectBranchType : char {
POSSIBLE_PIC_JUMP_TABLE, /// Possibly a jump table for PIC.
POSSIBLE_GOTO, /// Possibly a gcc's computed goto.
POSSIBLE_FIXED_BRANCH, /// Possibly an indirect branch to a fixed location.
POSSIBLE_PIC_FIXED_BRANCH, /// Possibly an indirect jump to a fixed entry in a
/// PIC jump table.
};

class MCPlusBuilder {
Expand Down Expand Up @@ -1474,12 +1476,11 @@ class MCPlusBuilder {
/// will be set to the different components of the branch. \p MemLocInstr
/// is the instruction that loads up the indirect function pointer. It may
/// or may not be same as \p Instruction.
virtual IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue,
const MCExpr *&DispExpr, MCInst *&PCRelBaseOut) const {
virtual IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const {
llvm_unreachable("not implemented");
return IndirectBranchType::UNKNOWN;
}
Expand Down
33 changes: 33 additions & 0 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,36 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
static const bool flow = true;
};

namespace bolt {
struct PseudoProbeInfo {
llvm::yaml::Hex64 GUID;
uint64_t Index;
uint8_t Type;

bool operator==(const PseudoProbeInfo &Other) const {
return GUID == Other.GUID && Index == Other.Index;
}
bool operator!=(const PseudoProbeInfo &Other) const {
return !(*this == Other);
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
}

static const bool flow = true;
};
} // end namespace yaml
} // end namespace llvm

LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)

namespace llvm {
namespace yaml {
Expand All @@ -111,6 +136,7 @@ struct BinaryBasicBlockProfile {
uint64_t EventCount{0};
std::vector<CallSiteInfo> CallSites;
std::vector<SuccessorInfo> Successors;
std::vector<PseudoProbeInfo> PseudoProbes;

bool operator==(const BinaryBasicBlockProfile &Other) const {
return Index == Other.Index;
Expand All @@ -132,6 +158,8 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::CallSiteInfo>());
YamlIO.mapOptional("succ", BBP.Successors,
std::vector<bolt::SuccessorInfo>());
YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
}
};

Expand All @@ -151,6 +179,8 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
llvm::yaml::Hex64 GUID{0};
llvm::yaml::Hex64 PseudoProbeDescHash{0};
bool Used{false};
};
} // end namespace bolt
Expand All @@ -164,6 +194,9 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
(uint64_t)0);
}
};

Expand Down
56 changes: 56 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,59 @@ class YAMLProfileReader : public ProfileReaderBase {
using ProfileLookupMap =
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;

/// A class for matching binary functions in functions in the YAML profile.
/// First, a call graph is constructed for both profiled and binary functions.
/// Then functions are hashed based on the names of their callee/caller
/// functions. Finally, functions are matched based on these neighbor hashes.
class CallGraphMatcher {
public:
/// Constructs the call graphs for binary and profiled functions and
/// computes neighbor hashes for binary functions.
CallGraphMatcher(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF);

/// Returns the YamlBFs adjacent to the parameter YamlBF in the call graph.
std::optional<std::set<yaml::bolt::BinaryFunctionProfile *>>
getAdjacentYamlBFs(yaml::bolt::BinaryFunctionProfile &YamlBF) {
auto It = YamlBFAdjacencyMap.find(&YamlBF);
return It == YamlBFAdjacencyMap.end() ? std::nullopt
: std::make_optional(It->second);
}

/// Returns the binary functions with the parameter neighbor hash.
std::optional<std::vector<BinaryFunction *>>
getBFsWithNeighborHash(uint64_t NeighborHash) {
auto It = NeighborHashToBFs.find(NeighborHash);
return It == NeighborHashToBFs.end() ? std::nullopt
: std::make_optional(It->second);
}

private:
/// Adds edges to the binary function call graph given the callsites of the
/// parameter function.
void constructBFCG(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP);

/// Using the constructed binary function call graph, computes and creates
/// mappings from "neighbor hash" (composed of the function names of callee
/// and caller functions of a function) to binary functions.
void computeBFNeighborHashes(BinaryContext &BC);

/// Constructs the call graph for profile functions.
void constructYAMLFCG(yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF);

/// Adjacency map for binary functions in the call graph.
DenseMap<BinaryFunction *, std::set<BinaryFunction *>> BFAdjacencyMap;

/// Maps neighbor hashes to binary functions.
DenseMap<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs;

/// Adjacency map for profile functions in the call graph.
DenseMap<yaml::bolt::BinaryFunctionProfile *,
std::set<yaml::bolt::BinaryFunctionProfile *>>
YamlBFAdjacencyMap;
};

private:
/// Adjustments for basic samples profiles (without LBR).
bool NormalizeByInsnCount{false};
Expand Down Expand Up @@ -100,6 +153,9 @@ class YAMLProfileReader : public ProfileReaderBase {
/// Matches functions using exact hash.
size_t matchWithHash(BinaryContext &BC);

/// Matches functions using the call graph.
size_t matchWithCallGraph(BinaryContext &BC);

/// Matches functions with similarly named profiled functions.
size_t matchWithNameSimilarity(BinaryContext &BC);

Expand Down
24 changes: 9 additions & 15 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ class DWARFRewriter {
/// .debug_aranges DWARF section.
std::unique_ptr<DebugARangesSectionWriter> ARangesSectionWriter;

/// Stores and serializes information that will be put into the
/// .debug_addr DWARF section.
std::unique_ptr<DebugAddrWriter> AddrWriter;

/// Stores and serializes information that will be put in to the
/// .debug_addr DWARF section.
/// Does not do de-duplication.
Expand All @@ -93,10 +89,11 @@ class DWARFRewriter {
std::unordered_map<uint64_t, std::unique_ptr<DebugRangesSectionWriter>>
LegacyRangesWritersByCU;

std::mutex LocListDebugInfoPatchesMutex;
/// Stores address writer for each CU.
std::unordered_map<uint64_t, std::unique_ptr<DebugAddrWriter>>
AddressWritersByCU;

/// Dwo id specific its RangesBase.
std::unordered_map<uint64_t, uint64_t> DwoRangesBase;
std::mutex LocListDebugInfoPatchesMutex;

std::unordered_map<DWARFUnit *, uint64_t> LineTablePatchMap;
std::unordered_map<const DWARFUnit *, uint64_t> TypeUnitRelocMap;
Expand All @@ -115,6 +112,7 @@ class DWARFRewriter {
void updateUnitDebugInfo(DWARFUnit &Unit, DIEBuilder &DIEBldr,
DebugLocWriter &DebugLocWriter,
DebugRangesSectionWriter &RangesSectionWriter,
DebugAddrWriter &AddressWriter,
std::optional<uint64_t> RangesBase = std::nullopt);

/// Patches the binary for an object's address ranges to be updated.
Expand All @@ -141,13 +139,15 @@ class DWARFRewriter {
/// Process and write out CUs that are passsed in.
void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
CUOffsetMap &CUMap,
const std::list<DWARFUnit *> &CUs);
const std::list<DWARFUnit *> &CUs,
DebugAddrWriter &FinalAddrWriter);

/// Finalize debug sections in the main binary.
void finalizeDebugSections(DIEBuilder &DIEBlder,
DWARF5AcceleratorTable &DebugNamesTable,
DIEStreamer &Streamer, raw_svector_ostream &ObjOS,
CUOffsetMap &CUMap);
CUOffsetMap &CUMap,
DebugAddrWriter &FinalAddrWriter);

/// Patches the binary for DWARF address ranges (e.g. in functions and lexical
/// blocks) to be updated.
Expand Down Expand Up @@ -188,12 +188,6 @@ class DWARFRewriter {
/// Update stmt_list for CUs based on the new .debug_line \p Layout.
void updateLineTableOffsets(const MCAssembler &Asm);

uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; }

void setDwoRangesBase(uint64_t DWOId, uint64_t RangesBase) {
DwoRangesBase[DWOId] = RangesBase;
}

using OverriddenSectionsMap = std::unordered_map<DWARFSectionKind, StringRef>;
/// Output .dwo files.
void writeDWOFiles(DWARFUnit &, const OverriddenSectionsMap &,
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ class RewriteInstance {
std::unordered_map<const MCSymbol *, uint32_t> SymbolIndex;

/// Store all non-zero symbols in this map for a quick address lookup.
std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
std::multimap<uint64_t, llvm::object::SymbolRef> FileSymRefs;

/// FILE symbols used for disambiguating split function parents.
std::vector<ELFSymbolRef> FileSymbols;
Expand Down
11 changes: 10 additions & 1 deletion bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,16 @@ class RuntimeLibrary {
uint64_t RuntimeFiniAddress{0};
uint64_t RuntimeStartAddress{0};

/// Get the full path to a runtime library specified by \p LibFileName.
/// Get the full path to a runtime library specified by \p LibFileName and \p
/// ToolPath.
static std::string getLibPathByToolPath(StringRef ToolPath,
StringRef LibFileName);

/// Get the full path to a runtime library by the install directory.
static std::string getLibPathByInstalled(StringRef LibFileName);

/// Gets the full path to a runtime library based on whether it exists
/// in the install libdir or runtime libdir.
static std::string getLibPath(StringRef ToolPath, StringRef LibFileName);

/// Load a static runtime library specified by \p LibPath.
Expand Down
2 changes: 2 additions & 0 deletions bolt/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
add_compile_definitions(CMAKE_INSTALL_FULL_LIBDIR="${CMAKE_INSTALL_FULL_LIBDIR}")

add_subdirectory(Core)
add_subdirectory(Passes)
add_subdirectory(Profile)
Expand Down
36 changes: 23 additions & 13 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ PrintMemData("print-mem-data",

cl::opt<std::string> CompDirOverride(
"comp-dir-override",
cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
"location, which is used with DW_AT_dwo_name to construct a path "
"to *.dwo files."),
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
Expand Down Expand Up @@ -646,7 +646,7 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
const bool DoesBelongToFunction =
BF.containsAddress(Value) ||
(TargetBF && TargetBF->isParentOrChildOf(BF));
(TargetBF && areRelatedFragments(TargetBF, &BF));
if (!DoesBelongToFunction) {
LLVM_DEBUG({
if (!BF.containsAddress(Value)) {
Expand Down Expand Up @@ -839,9 +839,11 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
assert(Address == JT->getAddress() && "unexpected non-empty jump table");

// Prevent associating a jump table to a specific fragment twice.
// This simple check arises from the assumption: no more than 2 fragments.
if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
assert(JT->Parents[0]->isParentOrChildOf(Function) &&
if (!llvm::is_contained(JT->Parents, &Function)) {
assert(llvm::all_of(JT->Parents,
[&](const BinaryFunction *BF) {
return areRelatedFragments(&Function, BF);
}) &&
"cannot re-use jump table of a different function");
// Duplicate the entry for the parent function for easy access
JT->Parents.push_back(&Function);
Expand All @@ -852,8 +854,8 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
JT->print(this->outs());
}
Function.JumpTables.emplace(Address, JT);
JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
for (BinaryFunction *Parent : JT->Parents)
Parent->setHasIndirectTargetToSplitFragment(true);
}

bool IsJumpTableParent = false;
Expand Down Expand Up @@ -1209,12 +1211,13 @@ void BinaryContext::generateSymbolHashes() {
}

bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
BinaryFunction &Function) const {
BinaryFunction &Function) {
assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
if (TargetFunction.isChildOf(Function))
return true;
TargetFunction.addParentFragment(Function);
Function.addFragment(TargetFunction);
FragmentClasses.unionSets(&TargetFunction, &Function);
if (!HasRelocations) {
TargetFunction.setSimple(false);
Function.setSimple(false);
Expand Down Expand Up @@ -1336,7 +1339,7 @@ void BinaryContext::processInterproceduralReferences() {

if (TargetFunction) {
if (TargetFunction->isFragment() &&
!TargetFunction->isChildOf(Function)) {
!areRelatedFragments(TargetFunction, &Function)) {
this->errs()
<< "BOLT-WARNING: interprocedural reference between unrelated "
"fragments: "
Expand Down Expand Up @@ -2367,10 +2370,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
/*RelaxAll=*/false,
/*IncrementalLinkerCompatible=*/false,
/*DWARFMustBeAtTheEnd=*/false));
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));

Streamer->initSections(false, *STI);

Expand Down Expand Up @@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
return nullptr;
}

/// Deregister JumpTable registered at a given \p Address and delete it.
void BinaryContext::deleteJumpTable(uint64_t Address) {
assert(JumpTables.count(Address) && "Must have a jump table at address");
JumpTable *JT = JumpTables.at(Address);
for (BinaryFunction *Parent : JT->Parents)
Parent->JumpTables.erase(Address);
JumpTables.erase(Address);
delete JT;
}

DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
const DWARFAddressRangesVector &InputRanges) const {
DebugAddressRangesVector OutputRanges;
Expand Down
47 changes: 45 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
// setting the value of the register used by the branch.
MCInst *MemLocInstr;

// The instruction loading the fixed PIC jump table entry value.
MCInst *FixedEntryLoadInstr;

// Address of the table referenced by MemLocInstr. Could be either an
// array of function pointers, or a jump table.
uint64_t ArrayStart = 0;
Expand Down Expand Up @@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,

IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr);

if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
return BranchType;
Expand Down Expand Up @@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
if (BaseRegNum == BC.MRI->getProgramCounter())
ArrayStart += getAddress() + Offset + Size;

if (FixedEntryLoadInstr) {
assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH &&
"Invalid IndirectBranch type");
MCInst::iterator FixedEntryDispOperand =
BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr);
assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() &&
"Invalid memory instruction");
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
ErrorOr<int64_t> Value =
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
if (!Value)
return IndirectBranchType::UNKNOWN;

BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " referencing data at 0x" << Twine::utohexstr(EntryAddress)
<< " the destination value is 0x"
<< Twine::utohexstr(ArrayStart + *Value) << '\n';

TargetAddress = ArrayStart + *Value;

// Remove spurious JumpTable at EntryAddress caused by PIC reference from
// the load instruction.
BC.deleteJumpTable(EntryAddress);

// Replace FixedEntryDispExpr used in target address calculation with outer
// jump table reference.
JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart);
assert(JT && "Must have a containing jump table for PIC fixed branch");
BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(),
EntryAddress - ArrayStart, &*BC.Ctx);

return BranchType;
}

LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');

Expand Down Expand Up @@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
}
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
Expand Down Expand Up @@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches(
int64_t DispValue;
const MCExpr *DispExpr;
MCInst *PCRelBaseInstr;
MCInst *FixedEntryLoadInstr;
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr,
FixedEntryLoadInstr);
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
continue;

Expand Down
27 changes: 10 additions & 17 deletions bolt/lib/Core/DIEBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -551,15 +551,6 @@ void DIEBuilder::finish() {
updateReferences();
}

DWARFDie DIEBuilder::resolveDIEReference(
const DWARFFormValue &RefValue,
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
DWARFUnit *&RefCU, DWARFDebugInfoEntry &DwarfDebugInfoEntry) {
assert(RefValue.isFormClass(DWARFFormValue::FC_Reference));
uint64_t RefOffset = *RefValue.getAsReference();
return resolveDIEReference(AttrSpec, RefOffset, RefCU, DwarfDebugInfoEntry);
}

DWARFDie DIEBuilder::resolveDIEReference(
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
const uint64_t RefOffset, DWARFUnit *&RefCU,
Expand Down Expand Up @@ -603,17 +594,14 @@ DWARFDie DIEBuilder::resolveDIEReference(
return DWARFDie();
}

void DIEBuilder::cloneDieReferenceAttribute(
void DIEBuilder::cloneDieOffsetReferenceAttribute(
DIE &Die, const DWARFUnit &U, const DWARFDie &InputDIE,
const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
const DWARFFormValue &Val) {
const uint64_t Ref = *Val.getAsReference();

const DWARFAbbreviationDeclaration::AttributeSpec AttrSpec, uint64_t Ref) {
DIE *NewRefDie = nullptr;
DWARFUnit *RefUnit = nullptr;

DWARFDebugInfoEntry DDIEntry;
const DWARFDie RefDie = resolveDIEReference(Val, AttrSpec, RefUnit, DDIEntry);
const DWARFDie RefDie = resolveDIEReference(AttrSpec, Ref, RefUnit, DDIEntry);

if (!RefDie)
return;
Expand Down Expand Up @@ -818,7 +806,7 @@ void DIEBuilder::cloneAddressAttribute(
void DIEBuilder::cloneRefsigAttribute(
DIE &Die, DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
const DWARFFormValue &Val) {
const std::optional<uint64_t> SigVal = Val.getRawUValue();
const std::optional<uint64_t> SigVal = Val.getAsSignatureReference();
Die.addValue(getState().DIEAlloc, AttrSpec.Attr, dwarf::DW_FORM_ref_sig8,
DIEInteger(*SigVal));
}
Expand Down Expand Up @@ -886,11 +874,16 @@ void DIEBuilder::cloneAttribute(
cloneStringAttribute(Die, U, AttrSpec, Val);
break;
case dwarf::DW_FORM_ref_addr:
cloneDieOffsetReferenceAttribute(Die, U, InputDIE, AttrSpec,
*Val.getAsDebugInfoReference());
break;
case dwarf::DW_FORM_ref1:
case dwarf::DW_FORM_ref2:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_ref8:
cloneDieReferenceAttribute(Die, U, InputDIE, AttrSpec, Val);
cloneDieOffsetReferenceAttribute(Die, U, InputDIE, AttrSpec,
Val.getUnit()->getOffset() +
*Val.getAsRelativeReference());
break;
case dwarf::DW_FORM_block:
case dwarf::DW_FORM_block1:
Expand Down
100 changes: 43 additions & 57 deletions bolt/lib/Core/DebugData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,6 @@ void DebugRangesSectionWriter::appendToRangeBuffer(
*RangesStream << CUBuffer;
}

DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;

uint64_t DebugRangeListsSectionWriter::addRanges(
DebugAddressRangesVector &&Ranges,
std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
Expand Down Expand Up @@ -390,7 +388,9 @@ void DebugARangesSectionWriter::writeARangesSection(
}
}

DebugAddrWriter::DebugAddrWriter(BinaryContext *BC) : BC(BC) {
DebugAddrWriter::DebugAddrWriter(BinaryContext *BC,
const uint8_t AddressByteSize)
: BC(BC), AddressByteSize(AddressByteSize) {
Buffer = std::make_unique<AddressSectionBuffer>();
AddressStream = std::make_unique<raw_svector_ostream>(*Buffer);
}
Expand All @@ -405,11 +405,6 @@ void DebugAddrWriter::AddressForDWOCU::dump() {
}
uint32_t DebugAddrWriter::getIndexFromAddress(uint64_t Address, DWARFUnit &CU) {
std::lock_guard<std::mutex> Lock(WriterMutex);
const uint64_t CUID = getCUID(CU);
if (!AddressMaps.count(CUID))
AddressMaps[CUID] = AddressForDWOCU();

AddressForDWOCU &Map = AddressMaps[CUID];
auto Entry = Map.find(Address);
if (Entry == Map.end()) {
auto Index = Map.getNextIndex();
Expand Down Expand Up @@ -449,29 +444,23 @@ static void updateAddressBase(DIEBuilder &DIEBlder, DebugAddrWriter &AddrWriter,
}
}

void DebugAddrWriter::update(DIEBuilder &DIEBlder, DWARFUnit &CU) {
// Handling the case where debug information is a mix of Debug fission and
// monolithic.
if (!CU.getDWOId())
return;
const uint64_t CUID = getCUID(CU);
auto AM = AddressMaps.find(CUID);
// Adding to map even if it did not contribute to .debug_addr.
// The Skeleton CU might still have DW_AT_GNU_addr_base.
uint64_t Offset = Buffer->size();
// If does not exist this CUs DWO section didn't contribute to .debug_addr.
if (AM == AddressMaps.end())
return;
std::vector<IndexAddressPair> SortedMap(AM->second.indexToAddressBegin(),
AM->second.indexToAdddessEnd());
void DebugAddrWriter::updateAddrBase(DIEBuilder &DIEBlder, DWARFUnit &CU,
const uint64_t Offset) {
updateAddressBase(DIEBlder, *this, CU, Offset);
}

std::optional<uint64_t> DebugAddrWriter::finalize(const size_t BufferSize) {
if (Map.begin() == Map.end())
return std::nullopt;
std::vector<IndexAddressPair> SortedMap(Map.indexToAddressBegin(),
Map.indexToAdddessEnd());
// Sorting address in increasing order of indices.
llvm::sort(SortedMap, llvm::less_first());

uint8_t AddrSize = CU.getAddressByteSize();
uint32_t Counter = 0;
auto WriteAddress = [&](uint64_t Address) -> void {
++Counter;
switch (AddrSize) {
switch (AddressByteSize) {
default:
assert(false && "Address Size is invalid.");
break;
Expand All @@ -490,10 +479,19 @@ void DebugAddrWriter::update(DIEBuilder &DIEBlder, DWARFUnit &CU) {
WriteAddress(0);
WriteAddress(Val.second);
}
updateAddressBase(DIEBlder, *this, CU, Offset);
return std::nullopt;
}

void DebugAddrWriterDwarf5::updateAddrBase(DIEBuilder &DIEBlder, DWARFUnit &CU,
const uint64_t Offset) {
/// Header for DWARF5 has size 8, so we add it to the offset.
updateAddressBase(DIEBlder, *this, CU, Offset + HeaderSize);
}

void DebugAddrWriterDwarf5::update(DIEBuilder &DIEBlder, DWARFUnit &CU) {
DenseMap<uint64_t, uint64_t> DebugAddrWriter::UnmodifiedAddressOffsets;

std::optional<uint64_t>
DebugAddrWriterDwarf5::finalize(const size_t BufferSize) {
// Need to layout all sections within .debug_addr
// Within each section sort Address by index.
const endianness Endian = BC->DwCtx->isLittleEndian()
Expand All @@ -504,55 +502,44 @@ void DebugAddrWriterDwarf5::update(DIEBuilder &DIEBlder, DWARFUnit &CU) {
Endian == llvm::endianness::little, 0);
DWARFDebugAddrTable AddrTable;
DIDumpOptions DumpOpts;
constexpr uint32_t HeaderSize = 8;
const uint64_t CUID = getCUID(CU);
const uint8_t AddrSize = CU.getAddressByteSize();
auto AMIter = AddressMaps.find(CUID);
// A case where CU has entry in .debug_addr, but we don't modify addresses
// for it.
if (AMIter == AddressMaps.end()) {
AMIter = AddressMaps.insert({CUID, AddressForDWOCU()}).first;
std::optional<uint64_t> BaseOffset = CU.getAddrOffsetSectionBase();
if (!BaseOffset)
return;
if (Map.begin() == Map.end()) {
if (!AddrOffsetSectionBase)
return std::nullopt;
// Address base offset is to the first entry.
// The size of header is 8 bytes.
uint64_t Offset = *BaseOffset - HeaderSize;
uint64_t Offset = *AddrOffsetSectionBase - HeaderSize;
auto Iter = UnmodifiedAddressOffsets.find(Offset);
if (Iter != UnmodifiedAddressOffsets.end()) {
updateAddressBase(DIEBlder, *this, CU, Iter->getSecond());
return;
}
UnmodifiedAddressOffsets[Offset] = Buffer->size() + HeaderSize;
if (Error Err = AddrTable.extract(AddrData, &Offset, 5, AddrSize,
if (Iter != UnmodifiedAddressOffsets.end())
return Iter->second;
UnmodifiedAddressOffsets[Offset] = BufferSize;
if (Error Err = AddrTable.extract(AddrData, &Offset, 5, AddressByteSize,
DumpOpts.WarningHandler)) {
DumpOpts.RecoverableErrorHandler(std::move(Err));
return;
return std::nullopt;
}

uint32_t Index = 0;
for (uint64_t Addr : AddrTable.getAddressEntries())
AMIter->second.insert(Addr, Index++);
Map.insert(Addr, Index++);
}

updateAddressBase(DIEBlder, *this, CU, Buffer->size() + HeaderSize);

std::vector<IndexAddressPair> SortedMap(AMIter->second.indexToAddressBegin(),
AMIter->second.indexToAdddessEnd());
std::vector<IndexAddressPair> SortedMap(Map.indexToAddressBegin(),
Map.indexToAdddessEnd());
// Sorting address in increasing order of indices.
llvm::sort(SortedMap, llvm::less_first());
// Writing out Header
const uint32_t Length = SortedMap.size() * AddrSize + 4;
const uint32_t Length = SortedMap.size() * AddressByteSize + 4;
support::endian::write(*AddressStream, Length, Endian);
support::endian::write(*AddressStream, static_cast<uint16_t>(5), Endian);
support::endian::write(*AddressStream, static_cast<uint8_t>(AddrSize),
support::endian::write(*AddressStream, static_cast<uint8_t>(AddressByteSize),
Endian);
support::endian::write(*AddressStream, static_cast<uint8_t>(0), Endian);

uint32_t Counter = 0;
auto writeAddress = [&](uint64_t Address) -> void {
++Counter;
switch (AddrSize) {
switch (AddressByteSize) {
default:
llvm_unreachable("Address Size is invalid.");
break;
Expand All @@ -571,6 +558,7 @@ void DebugAddrWriterDwarf5::update(DIEBuilder &DIEBlder, DWARFUnit &CU) {
writeAddress(0);
writeAddress(Val.second);
}
return std::nullopt;
}

void DebugLocWriter::init() {
Expand Down Expand Up @@ -723,11 +711,11 @@ void DebugLoclistWriter::addList(DIEBuilder &DIEBldr, DIE &Die,
DIEValue &AttrInfo,
DebugLocationsVector &LocList) {
if (DwarfVersion < 5)
writeLegacyLocList(AttrInfo, LocList, DIEBldr, Die, *AddrWriter, *LocBuffer,
writeLegacyLocList(AttrInfo, LocList, DIEBldr, Die, AddrWriter, *LocBuffer,
CU, *LocStream);
else
writeDWARF5LocList(NumberOfEntries, AttrInfo, LocList, Die, DIEBldr,
*AddrWriter, *LocBodyBuffer, RelativeLocListOffsets, CU,
AddrWriter, *LocBodyBuffer, RelativeLocListOffsets, CU,
*LocBodyStream);
}

Expand Down Expand Up @@ -789,8 +777,6 @@ void DebugLoclistWriter::finalize(DIEBuilder &DIEBldr, DIE &Die) {
finalizeDWARF5(DIEBldr, Die);
}

DebugAddrWriter *DebugLoclistWriter::AddrWriter = nullptr;

static std::string encodeLE(size_t ByteSize, uint64_t NewValue) {
std::string LE64(ByteSize, 0);
for (size_t I = 0; I < ByteSize; ++I) {
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Core/Exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
"BOLT-ERROR: cannot find landing pad fragment");
BC.addInterproceduralReference(this, Fragment->getAddress());
BC.processInterproceduralReferences();
assert(isParentOrChildOf(*Fragment) &&
assert(BC.areRelatedFragments(this, Fragment) &&
"BOLT-ERROR: cannot have landing pads in different functions");
setHasIndirectTargetToSplitFragment(true);
BC.addFragmentsToSkip(this);
Expand Down
7 changes: 2 additions & 5 deletions bolt/lib/Passes/AsmDump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,8 @@ void dumpFunction(const BinaryFunction &BF) {
auto FOut = std::make_unique<formatted_raw_ostream>(OS);
FOut->SetUnbuffered();
std::unique_ptr<MCStreamer> AsmStreamer(
createAsmStreamer(*LocalCtx, std::move(FOut),
/*isVerboseAsm=*/true,
/*useDwarfDirectory=*/false, InstructionPrinter,
std::move(MCEInstance.MCE), std::move(MAB),
/*ShowInst=*/false));
createAsmStreamer(*LocalCtx, std::move(FOut), InstructionPrinter,
std::move(MCEInstance.MCE), std::move(MAB)));
AsmStreamer->initSections(true, *BC.STI);
std::unique_ptr<TargetMachine> TM(BC.TheTarget->createTargetMachine(
BC.TripleName, "", "", TargetOptions(), std::nullopt));
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
JumpTableInfoType HotTargets;
MCInst *MemLocInstr;
MCInst *PCRelBaseOut;
MCInst *FixedEntryLoadInstr;
unsigned BaseReg, IndexReg;
int64_t DispValue;
const MCExpr *DispExpr;
MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut,
FixedEntryLoadInstr);

assert(MemLocInstr && "There should always be a load for jump tables");
if (!MemLocInstr)
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_llvm_library(LLVMBOLTProfile

LINK_COMPONENTS
Demangle
MC
Support
TransformUtils
)
Expand Down
31 changes: 31 additions & 0 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));

extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
extern cl::opt<bool> ProfileUsePseudoProbes;
extern cl::opt<std::string> SaveProfile;

cl::opt<bool> ReadPreAggregated(
Expand Down Expand Up @@ -2298,6 +2299,9 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,

yaml::bolt::BinaryProfile BP;

const MCPseudoProbeDecoder *PseudoProbeDecoder =
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;

// Fill out the header info.
BP.Header.Version = 1;
BP.Header.FileName = std::string(BC.getFilename());
Expand Down Expand Up @@ -2398,6 +2402,33 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF->getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
const auto &FragmentProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
ProbeMap.lower_bound(FuncAddr + F->getSize()));
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
}
}
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
Expand Down
162 changes: 155 additions & 7 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,87 @@ llvm::cl::opt<bool>
MatchProfileWithFunctionHash("match-profile-with-function-hash",
cl::desc("Match profile with function hash"),
cl::Hidden, cl::cat(BoltOptCategory));
llvm::cl::opt<bool>
MatchWithCallGraph("match-with-call-graph",
cl::desc("Match functions with call graph"), cl::Hidden,
cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUsePseudoProbes(
"profile-use-pseudo-probes",
cl::desc("Use pseudo probes for profile generation and matching"),
cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
namespace bolt {

YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF) {
constructBFCG(BC, YamlBP);
constructYAMLFCG(YamlBP, IdToYAMLBF);
computeBFNeighborHashes(BC);
}

void YAMLProfileReader::CallGraphMatcher::constructBFCG(
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
for (const BinaryBasicBlock &BB : BF->blocks()) {
for (const MCInst &Instr : BB) {
if (!BC.MIB->isCall(Instr))
continue;
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
if (!BD)
continue;
BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
if (!CalleeBF)
continue;

BFAdjacencyMap[CalleeBF].insert(BF);
BFAdjacencyMap[BF].insert(CalleeBF);
}
}
}
}

void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
BinaryContext &BC) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
auto It = BFAdjacencyMap.find(BF);
if (It == BFAdjacencyMap.end())
continue;
auto &AdjacentBFs = It->second;
std::string HashStr;
for (BinaryFunction *BF : AdjacentBFs)
HashStr += BF->getOneName();
uint64_t Hash = std::hash<std::string>{}(HashStr);
NeighborHashToBFs[Hash].push_back(BF);
}
}

void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {

for (auto &CallerYamlBF : YamlBP.Functions) {
for (auto &YamlBB : CallerYamlBF.Blocks) {
for (auto &CallSite : YamlBB.CallSites) {
auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
if (IdToYAMLBFIt == IdToYAMLBF.end())
continue;
YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
}
}
}
}

bool YAMLProfileReader::isYAML(const StringRef Filename) {
if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
StringRef Buffer = (*MB)->getBuffer();
Expand Down Expand Up @@ -350,7 +422,7 @@ bool YAMLProfileReader::profileMatches(
}

bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
if (opts::MatchProfileWithFunctionHash)
if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
return true;
for (StringRef Name : BF.getNames())
if (ProfileFunctionNames.contains(Name))
Expand Down Expand Up @@ -446,6 +518,79 @@ size_t YAMLProfileReader::matchWithLTOCommonName() {
return MatchedWithLTOCommonName;
}

size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
if (!opts::MatchWithCallGraph)
return 0;

size_t MatchedWithCallGraph = 0;
CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);

ItaniumPartialDemangler Demangler;
auto GetBaseName = [&](std::string &FunctionName) {
if (Demangler.partialDemangle(FunctionName.c_str()))
return std::string("");
size_t BufferSize = 1;
char *Buffer = static_cast<char *>(std::malloc(BufferSize));
char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
if (!BaseName) {
std::free(Buffer);
return std::string("");
}
if (Buffer != BaseName)
Buffer = BaseName;
std::string BaseNameStr(Buffer, BufferSize);
std::free(Buffer);
return BaseNameStr;
};

// Matches YAMLBF to BFs with neighbor hashes.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
if (!AdjacentYamlBFsOpt)
continue;
std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
AdjacentYamlBFsOpt.value();
std::string AdjacentYamlBFsHashStr;
for (auto *AdjacentYamlBF : AdjacentYamlBFs)
AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
if (!BFsWithSameHashOpt)
continue;
std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
// Finds the binary function with the longest common prefix to the profiled
// function and matches.
BinaryFunction *ClosestBF = nullptr;
size_t LCP = 0;
std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
for (BinaryFunction *BF : BFsWithSameHash) {
if (ProfiledFunctions.count(BF))
continue;
std::string BFName = std::string(BF->getOneName());
std::string BFBaseName = GetBaseName(BFName);
size_t PrefixLength = 0;
size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
for (size_t I = 0; I < N; ++I) {
if (YamlBFBaseName[I] != BFBaseName[I])
break;
++PrefixLength;
}
if (PrefixLength >= LCP) {
LCP = PrefixLength;
ClosestBF = BF;
}
}
if (ClosestBF) {
matchProfileToFunction(YamlBF, *ClosestBF);
++MatchedWithCallGraph;
}
}

return MatchedWithCallGraph;
}

size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
if (opts::NameSimilarityFunctionMatchingThreshold == 0)
return 0;
Expand Down Expand Up @@ -581,9 +726,14 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}

// Map profiled function ids to names.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
IdToYamLBF[YamlBF.Id] = &YamlBF;

const size_t MatchedWithExactName = matchWithExactName();
const size_t MatchedWithHash = matchWithHash(BC);
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);

for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
Expand All @@ -603,18 +753,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
<< " functions with call graph\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}

// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");

// Map profiled function ids to names.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
IdToYamLBF[YamlBF.Id] = &YamlBF;

uint64_t NumUnused = 0;
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Id >= YamlProfileToFunction.size()) {
Expand All @@ -630,7 +777,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {

BC.setNumUnusedProfiledObjects(NumUnused);

if (opts::Lite && opts::MatchProfileWithFunctionHash) {
if (opts::Lite &&
(opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
if (!BF->hasProfile())
BF->setIgnored();
Expand Down
25 changes: 25 additions & 0 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

namespace opts {
extern llvm::cl::opt<bool> ProfileUseDFS;
extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -57,6 +58,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
const MCPseudoProbeDecoder *PseudoProbeDecoder =
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;

const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;

Expand All @@ -69,6 +72,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF.getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
}
}

BinaryFunction::BasicBlockOrderType Order;
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
Expand Down Expand Up @@ -177,6 +187,21 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
++BranchInfo;
}

if (PseudoProbeDecoder) {
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
const auto &BlockProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
for (const auto &[_, Probes] : BlockProbes)
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
}

YamlBF.Blocks.emplace_back(YamlBB);
}
return YamlBF;
Expand Down
7 changes: 6 additions & 1 deletion bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
cl::ReallyHidden,
cl::cat(BoltOptCategory));

static cl::opt<bool> ShortenInstructions("shorten-instructions",
cl::desc("shorten instructions"),
cl::init(true),
cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -378,7 +382,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
else if (opts::Hugify)
Manager.registerPass(std::make_unique<HugePage>(NeverPrint));

Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),
opts::ShortenInstructions);

Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
!opts::KeepNops);
Expand Down
250 changes: 135 additions & 115 deletions bolt/lib/Rewrite/DWARFRewriter.cpp

Large diffs are not rendered by default.

33 changes: 30 additions & 3 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/LEB128.h"
#include <memory>

#undef DEBUG_TYPE
#define DEBUG_TYPE "pseudo-probe-rewriter"
Expand Down Expand Up @@ -48,6 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));

extern cl::opt<bool> ProfileUsePseudoProbes;
} // namespace opts

namespace {
Expand All @@ -72,23 +74,38 @@ class PseudoProbeRewriter final : public MetadataRewriter {
void parsePseudoProbe();

/// PseudoProbe decoder
MCPseudoProbeDecoder ProbeDecoder;
std::shared_ptr<MCPseudoProbeDecoder> ProbeDecoderPtr;

public:
PseudoProbeRewriter(BinaryContext &BC)
: MetadataRewriter("pseudo-probe-rewriter", BC) {}
: MetadataRewriter("pseudo-probe-rewriter", BC),
ProbeDecoderPtr(std::make_shared<MCPseudoProbeDecoder>()) {
BC.setPseudoProbeDecoder(ProbeDecoderPtr);
}

Error preCFGInitializer() override;
Error postEmitFinalizer() override;

~PseudoProbeRewriter() override { ProbeDecoderPtr.reset(); }
};

Error PseudoProbeRewriter::preCFGInitializer() {
if (opts::ProfileUsePseudoProbes)
parsePseudoProbe();

return Error::success();
}

Error PseudoProbeRewriter::postEmitFinalizer() {
parsePseudoProbe();
if (!opts::ProfileUsePseudoProbes)
parsePseudoProbe();
updatePseudoProbes();

return Error::success();
}

void PseudoProbeRewriter::parsePseudoProbe() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
PseudoProbeDescSection = BC.getUniqueSectionByName(".pseudo_probe_desc");
PseudoProbeSection = BC.getUniqueSectionByName(".pseudo_probe");

Expand Down Expand Up @@ -138,9 +155,18 @@ void PseudoProbeRewriter::parsePseudoProbe() {
ProbeDecoder.printGUID2FuncDescMap(outs());
ProbeDecoder.printProbesForAllAddresses(outs());
}

for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) {
if (!FuncStartAddrs.contains(GUID))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]);
assert(BF);
BF->setGUID(GUID);
}
}

void PseudoProbeRewriter::updatePseudoProbes() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
// check if there is pseudo probe section decoded
if (ProbeDecoder.getAddress2ProbesMap().empty())
return;
Expand Down Expand Up @@ -241,6 +267,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

void PseudoProbeRewriter::encodePseudoProbes() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
// Buffer for new pseudo probes section
SmallString<8> Contents;
MCDecodedPseudoProbe *LastProbe = nullptr;
Expand Down
18 changes: 14 additions & 4 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ Error RewriteInstance::run() {
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
selectFunctionsToProcess();
disassembleFunctions();
processMetadataPreCFG();
buildFunctionsCFG();
}
processProfileData();
Expand Down Expand Up @@ -886,7 +887,7 @@ void RewriteInstance::discoverFileObjects() {
if (SymName == "__hot_start" || SymName == "__hot_end")
continue;

FileSymRefs[SymbolAddress] = Symbol;
FileSymRefs.emplace(SymbolAddress, Symbol);

// Skip section symbols that will be registered by disassemblePLT().
if (SymbolType == SymbolRef::ST_Debug) {
Expand Down Expand Up @@ -1052,7 +1053,9 @@ void RewriteInstance::discoverFileObjects() {

// Remove the symbol from FileSymRefs so that we can skip it from
// in the future.
auto SI = FileSymRefs.find(SymbolAddress);
auto SI = llvm::find_if(
llvm::make_range(FileSymRefs.equal_range(SymbolAddress)),
[&](auto SymIt) { return SymIt.second == Symbol; });
assert(SI != FileSymRefs.end() && "symbol expected to be present");
assert(SI->second == Symbol && "wrong symbol found");
FileSymRefs.erase(SI);
Expand Down Expand Up @@ -1260,6 +1263,7 @@ void RewriteInstance::discoverFileObjects() {

registerFragments();
FileSymbols.clear();
FileSymRefs.clear();

discoverBOLTReserved();
}
Expand Down Expand Up @@ -1429,11 +1433,17 @@ void RewriteInstance::registerFragments() {
// of the last local symbol.
ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);

for (auto &[ParentName, BF] : AmbiguousFragments) {
for (auto &Fragment : AmbiguousFragments) {
const StringRef &ParentName = Fragment.first;
BinaryFunction *BF = Fragment.second;
const uint64_t Address = BF->getAddress();

// Get fragment's own symbol
const auto SymIt = FileSymRefs.find(Address);
const auto SymIt = llvm::find_if(
llvm::make_range(FileSymRefs.equal_range(Address)), [&](auto SI) {
StringRef Name = cantFail(SI.second.getName());
return Name.contains(ParentName);
});
if (SymIt == FileSymRefs.end()) {
BC->errs()
<< "BOLT-ERROR: symbol lookup failed for function at address 0x"
Expand Down
8 changes: 4 additions & 4 deletions bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ cl::opt<bool>
"(which is what --hot-text relies on)."),
cl::cat(BoltOptCategory));

static cl::opt<std::string> RuntimeHugifyLib(
"runtime-hugify-lib",
cl::desc("specify file name of the runtime hugify library"),
cl::init("libbolt_rt_hugify.a"), cl::cat(BoltOptCategory));
static cl::opt<std::string>
RuntimeHugifyLib("runtime-hugify-lib",
cl::desc("specify path of the runtime hugify library"),
cl::init("libbolt_rt_hugify.a"), cl::cat(BoltOptCategory));

} // namespace opts

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace opts {

cl::opt<std::string> RuntimeInstrumentationLib(
"runtime-instrumentation-lib",
cl::desc("specify file name of the runtime instrumentation library"),
cl::desc("specify path of the runtime instrumentation library"),
cl::init("libbolt_rt_instr.a"), cl::cat(BoltOptCategory));

extern cl::opt<bool> InstrumentationFileAppendPID;
Expand Down
35 changes: 29 additions & 6 deletions bolt/lib/RuntimeLibs/RuntimeLibrary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ using namespace bolt;

void RuntimeLibrary::anchor() {}

std::string RuntimeLibrary::getLibPath(StringRef ToolPath,
StringRef LibFileName) {
std::string RuntimeLibrary::getLibPathByToolPath(StringRef ToolPath,
StringRef LibFileName) {
StringRef Dir = llvm::sys::path::parent_path(ToolPath);
SmallString<128> LibPath = llvm::sys::path::parent_path(Dir);
llvm::sys::path::append(LibPath, "lib" LLVM_LIBDIR_SUFFIX);
Expand All @@ -38,13 +38,36 @@ std::string RuntimeLibrary::getLibPath(StringRef ToolPath,
llvm::sys::path::append(LibPath, "lib" LLVM_LIBDIR_SUFFIX);
}
llvm::sys::path::append(LibPath, LibFileName);
if (!llvm::sys::fs::exists(LibPath)) {
errs() << "BOLT-ERROR: library not found: " << LibPath << "\n";
exit(1);
}
return std::string(LibPath);
}

std::string RuntimeLibrary::getLibPathByInstalled(StringRef LibFileName) {
SmallString<128> LibPath(CMAKE_INSTALL_FULL_LIBDIR);
llvm::sys::path::append(LibPath, LibFileName);
return std::string(LibPath);
}

std::string RuntimeLibrary::getLibPath(StringRef ToolPath,
StringRef LibFileName) {
if (llvm::sys::fs::exists(LibFileName)) {
return std::string(LibFileName);
}

std::string ByTool = getLibPathByToolPath(ToolPath, LibFileName);
if (llvm::sys::fs::exists(ByTool)) {
return ByTool;
}

std::string ByInstalled = getLibPathByInstalled(LibFileName);
if (llvm::sys::fs::exists(ByInstalled)) {
return ByInstalled;
}

errs() << "BOLT-ERROR: library not found: " << ByTool << ", " << ByInstalled
<< ", or " << LibFileName << "\n";
exit(1);
}

void RuntimeLibrary::loadLibrary(StringRef LibPath, BOLTLinker &Linker,
BOLTLinker::SectionsMapper MapSections) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf =
Expand Down
13 changes: 8 additions & 5 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,16 +852,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Uses;
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInstr) const override {
MemLocInstrOut = nullptr;
BaseRegNumOut = AArch64::NoRegister;
IndexRegNumOut = AArch64::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInstr = nullptr;

// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Target/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ set(LLVM_LINK_COMPONENTS
AArch64Desc
)

if(BOLT_BUILT_STANDALONE)
set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_SRC_DIR}/lib/Target/AArch64/AArch64.td)
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target/AArch64)
tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables)
tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)

add_public_tablegen_target(AArch64CommonTableGen)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
endif()

add_llvm_library(LLVMBOLTTargetAArch64
AArch64MCPlusBuilder.cpp

Expand Down
13 changes: 13 additions & 0 deletions bolt/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@ set(LLVM_LINK_COMPONENTS
RISCVDesc
)

if(BOLT_BUILT_STANDALONE)
# tablegen, copied from llvm/lib/Target/RISCV/CMakeLists.txt
set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_SRC_DIR}/lib/Target/RISCV/RISCV.td)
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target/RISCV)
tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM RISCVGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM RISCVGenSearchableTables.inc -gen-searchable-tables)
tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget)

add_public_tablegen_target(RISCVCommonTableGen)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
endif()

add_llvm_library(LLVMBOLTTargetRISCV
RISCVMCPlusBuilder.cpp

Expand Down
3 changes: 2 additions & 1 deletion bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,14 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut) const override {
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override {
MemLocInstr = nullptr;
BaseRegNum = 0;
IndexRegNum = 0;
DispValue = 0;
DispExpr = nullptr;
PCRelBaseOut = nullptr;
FixedEntryLoadInst = nullptr;

// Check for the following long tail call sequence:
// 1: auipc xi, %pcrel_hi(sym)
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Target/X86/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ set(LLVM_LINK_COMPONENTS
X86Desc
)

if(BOLT_BUILT_STANDALONE)
set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_SRC_DIR}/lib/Target/X86/X86.td)
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target/X86)
tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info -instr-info-expand-mi-operand-info=0)
tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1)
tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)

add_public_tablegen_target(X86CommonTableGen)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
endif()

add_llvm_library(LLVMBOLTTargetX86
X86MCPlusBuilder.cpp
X86MCSymbolizer.cpp
Expand Down
105 changes: 66 additions & 39 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1866,8 +1866,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return true;
}

/// Analyzes PIC-style jump table code template and return identified
/// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
/// (POSSIBLE_PIC_FIXED_BRANCH case).
template <typename Itr>
std::pair<IndirectBranchType, MCInst *>
std::tuple<IndirectBranchType, MCInst *, MCInst *>
analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
// Analyze PIC-style jump table code template:
//
Expand All @@ -1876,6 +1879,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// add %r2, %r1
// jmp *%r1
//
// or a fixed indirect jump template:
//
// movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
// add %r2, %r1
// jmp *%r1
//
// (with any irrelevant instructions in-between)
//
// When we call this helper we've already determined %r1 and %r2, and
Expand Down Expand Up @@ -1916,8 +1926,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
MO.SegRegNum == X86::NoRegister;
};
LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
MCInst *MemLocInstr = nullptr;
const MCInst *MovInstr = nullptr;
MCInst *FirstInstr = nullptr;
MCInst *SecondInstr = nullptr;
enum {
NOMATCH = 0,
MATCH_JUMP_TABLE,
MATCH_FIXED_BRANCH,
} MatchingState = NOMATCH;
while (++II != IE) {
MCInst &Instr = *II;
const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
Expand All @@ -1926,68 +1941,76 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// Ignore instructions that don't affect R1, R2 registers.
continue;
}
if (!MovInstr) {
// Expect to see MOV instruction.
if (!isMOVSX64rm32(Instr)) {
LLVM_DEBUG(dbgs() << "MOV instruction expected.\n");
const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
const bool IsLEAInstr = isLEA64r(Instr);
if (MatchingState == NOMATCH) {
if (IsMOVSXInstr)
MatchingState = MATCH_JUMP_TABLE;
else if (IsLEAInstr)
MatchingState = MATCH_FIXED_BRANCH;
else
break;
}

// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
// If it sets %r1 - rename the registers so we have to only check
// a single form.
unsigned MovDestReg = Instr.getOperand(0).getReg();
if (MovDestReg != R2)
// Check if the first instruction is setting %r1 or %r2. In canonical
// form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
// we have to only check a single form.
unsigned DestReg = Instr.getOperand(0).getReg();
MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
if (DestReg != ExpectReg)
std::swap(R1, R2);
if (MovDestReg != R2) {
LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n");
if (DestReg != ExpectReg)
break;
}

// Verify operands for MOV.
// Verify operands
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isIndexed(*MO, R1))
// POSSIBLE_PIC_JUMP_TABLE
if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
(MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
FirstInstr = &Instr;
else
break;
MovInstr = &Instr;
} else {
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo))
unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
continue;
if (!isLEA64r(Instr)) {
LLVM_DEBUG(dbgs() << "LEA instruction expected\n");
if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
(MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
break;
}
if (Instr.getOperand(0).getReg() != R1) {
LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n");
if (Instr.getOperand(0).getReg() != ExpectReg)
break;
}

// Verify operands for LEA.
// Verify operands.
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isRIPRel(*MO))
break;
MemLocInstr = &Instr;
SecondInstr = &Instr;
break;
}
}

if (!MemLocInstr)
return std::make_pair(IndirectBranchType::UNKNOWN, nullptr);
if (!SecondInstr)
return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);

if (MatchingState == MATCH_FIXED_BRANCH) {
LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
FirstInstr, SecondInstr);
}
LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
MemLocInstr);
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
SecondInstr, nullptr);
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInst) const override {
// Try to find a (base) memory location from where the address for
// the indirect branch is loaded. For X86-64 the memory will be specified
// in the following format:
Expand All @@ -2014,6 +2037,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
IndexRegNumOut = X86::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInst = nullptr;

std::reverse_iterator<InstructionIterator> II(End);
std::reverse_iterator<InstructionIterator> IE(Begin);
Expand Down Expand Up @@ -2046,7 +2070,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
unsigned R2 = PrevInstr.getOperand(2).getReg();
if (R1 == R2)
return IndirectBranchType::UNKNOWN;
std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2);
std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
analyzePICJumpTable(PrevII, IE, R1, R2);
break;
}
return IndirectBranchType::UNKNOWN;
Expand Down Expand Up @@ -2090,6 +2115,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
return IndirectBranchType::UNKNOWN;
break;
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
break;
default:
if (MO->ScaleImm != PtrSize)
return IndirectBranchType::UNKNOWN;
Expand Down
32 changes: 28 additions & 4 deletions bolt/lib/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,39 @@
find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc)
find_first_existing_vc_file("${BOLT_SOURCE_DIR}" bolt_vc)

# The VC revision include that we want to generate.
set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSVersion.inc")

set(generate_vcs_version_script "${LLVM_CMAKE_DIR}/GenerateVersionFromVCS.cmake")

# Create custom target to generate the VC revision include.
add_custom_command(OUTPUT "${version_inc}"
DEPENDS "${llvm_vc}" "${bolt_vc}" "${generate_vcs_version_script}"
COMMAND ${CMAKE_COMMAND} "-DNAMES=BOLT"
"-DHEADER_FILE=${version_inc}"
"-DBOLT_SOURCE_DIR=${BOLT_SOURCE_DIR}"
"-DLLVM_VC_REPOSITORY=${llvm_vc_repository}"
"-DLLVM_VC_REVISION=${llvm_vc_revision}"
"-DLLVM_FORCE_VC_REVISION=${LLVM_FORCE_VC_REVISION}"
"-DLLVM_FORCE_VC_REPOSITORY=${LLVM_FORCE_VC_REPOSITORY}"
-P "${generate_vcs_version_script}")

# Mark the generated header as being generated.
set_source_files_properties("${version_inc}"
PROPERTIES GENERATED TRUE
HEADER_FILE_ONLY TRUE)

include_directories(${CMAKE_CURRENT_BINARY_DIR})

add_llvm_library(LLVMBOLTUtils
CommandLineOpts.cpp
Utils.cpp

${version_inc}
DISABLE_LLVM_LINK_LLVM_DYLIB

LINK_LIBS
${LLVM_PTHREAD_LIB}

DEPENDS
llvm_vcsrevision_h

LINK_COMPONENTS
Support
)
6 changes: 3 additions & 3 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
//===----------------------------------------------------------------------===//

#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/VCSRevision.h"
#include "VCSVersion.inc"

using namespace llvm;

namespace llvm {
namespace bolt {
const char *BoltRevision =
#ifdef LLVM_REVISION
LLVM_REVISION;
#ifdef BOLT_REVISION
BOLT_REVISION;
#else
"<unknown>";
#endif
Expand Down
22 changes: 17 additions & 5 deletions bolt/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,19 @@ add_library(bolt_rt_instr STATIC
instr.cpp
${CMAKE_CURRENT_BINARY_DIR}/config.h
)
set_target_properties(bolt_rt_instr PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_DIR}")
set_target_properties(bolt_rt_instr PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}")
add_library(bolt_rt_hugify STATIC
hugify.cpp
${CMAKE_CURRENT_BINARY_DIR}/config.h
)
set_target_properties(bolt_rt_hugify PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_DIR}")
set_target_properties(bolt_rt_hugify PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}")

if(NOT BOLT_BUILT_STANDALONE)
add_custom_command(TARGET bolt_rt_instr POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/lib/libbolt_rt_instr.a" "${LLVM_LIBRARY_DIR}")
add_custom_command(TARGET bolt_rt_hugify POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/lib/libbolt_rt_hugify.a" "${LLVM_LIBRARY_DIR}")
endif()

set(BOLT_RT_FLAGS
-ffreestanding
Expand All @@ -46,8 +53,8 @@ target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS})
target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
install(TARGETS bolt_rt_instr DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}")
install(TARGETS bolt_rt_hugify DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}")

if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
add_library(bolt_rt_instr_osx STATIC
Expand All @@ -59,5 +66,10 @@ if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Da
target_compile_options(bolt_rt_instr_osx PRIVATE
-target x86_64-apple-darwin19.6.0
${BOLT_RT_FLAGS})
install(TARGETS bolt_rt_instr_osx DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
install(TARGETS bolt_rt_instr_osx DESTINATION "${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}")

if(NOT BOLT_BUILT_STANDALONE)
add_custom_command(TARGET bolt_rt_instr_osx POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_BINARY_DIR}/lib/libbolt_rt_instr_osx.a" "${LLVM_LIBRARY_DIR}")
endif()
endif()
2 changes: 1 addition & 1 deletion bolt/test/AArch64/update-debug-reloc.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# update-debug-sections option.

RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
RUN: llvm-bolt %t.exe -o %t --update-debug-sections
RUN: llvm-bolt %t.exe -o %t --update-debug-sections 2>&1 | FileCheck %s

CHECK: BOLT-INFO: Target architecture: aarch64
CHECK-NOT: Reloc num: 10
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/AArch64/veneer-gold.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dummy:
.type foo, %function
foo:
# CHECK: <foo>:
# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
bl .L2
ret
.size foo, .-foo
Expand All @@ -38,7 +38,7 @@ foo:
.type foo2, %function
foo2:
# CHECK: <foo2>:
# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
bl .L2
ret
.size foo2, .-foo2
Expand Down
54 changes: 54 additions & 0 deletions bolt/test/X86/Inputs/ambiguous_fragment.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#--- file1
.file "file1.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func

#--- file2
.file "file2.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func

#--- file3
.file "file3.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func

#--- file4
.file "file4.cpp"
.section .text.cold
.type __func.cold.0, @function
__func.cold.0:
ud2
.size __func.cold.0, .-__func.cold.0
.section .text
.type __func, @function
__func:
ud2
.size __func, .-__func

#--- file5
.file "bolt-pseudo.o"
6 changes: 6 additions & 0 deletions bolt/test/X86/Inputs/ambiguous_fragment.script
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
SECTIONS {
. = 0x10000;
.text : { *(.text) }
. = 0x20000;
.text.cold : { *(.text.cold) }
}
2 changes: 1 addition & 1 deletion bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ main:
jae .L4
cmpq $0x1, %rdi
jne .L4
mov .Ljt_pic+8(%rip), %rax
movslq .Ljt_pic+8(%rip), %rax
lea .Ljt_pic(%rip), %rdx
add %rdx, %rax
jmpq *%rax
Expand Down
33 changes: 33 additions & 0 deletions bolt/test/X86/ambiguous_fragment.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## This reproduces a bug with misidentification of a parent fragment.

RUN: split-file %p/Inputs/ambiguous_fragment.s %t

RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file1 -o %t1.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file2 -o %t2.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file3 -o %t3.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file4 -o %t4.o
RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %t/file5 -o %t5.o

RUN: ld.lld %t1.o %t2.o %t3.o %t4.o %t5.o -o %t.exe \
RUN: --script %p/Inputs/ambiguous_fragment.script

RUN: llvm-objcopy %t.exe %t.exe2 \
RUN: --add-symbol=_Zfunc.cold.0=.text.cold:0x4,local,function \
RUN: --add-symbol=_Zfunc=.text:0xc,function

RUN: llvm-objdump --syms %t.exe2 | FileCheck %s --check-prefix=CHECK-SYMS

RUN: link_fdata %s %t.exe2 %t.preagg PREAGG
RUN: perf2bolt -v=1 %t.exe2 -p %t.preagg --pa -o %t.fdata -w %t.yaml | FileCheck %s

# PREAGG: B X:0 #__func# 1 0

CHECK-SYMS: 0000000000020004 {{.*}} __func.cold.0
CHECK-SYMS: 0000000000020004 {{.*}} _Zfunc.cold.0

CHECK-NOT: BOLT-ERROR: parent function not found for __func.cold.0
CHECK: BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/4(*3)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/1(*2) as a fragment of __func/1(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/2(*2) as a fragment of __func/2(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/3(*4) as a fragment of __func/3(*2)
CHECK-NEXT: BOLT-INFO: marking __func.cold.0/4(*2) as a fragment of __func/4(*3)
6 changes: 3 additions & 3 deletions bolt/test/X86/dwarf5-addr-section-reuse.s
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-main-addr-section-reuse.s -o %tmain.o
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-helper1-addr-section-reuse.s -o %thelper1.o
# RUN: llvm-mc -dwarf-version=5 -filetype=obj -triple x86_64-unknown-linux %p/Inputs/dwarf5-helper2-addr-section-reuse.s -o %thelper2.o
# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper1.o %thelper2.o -o %t.exe -Wl,-q
# RUN: %clang %cflags -dwarf-5 %thelper1.o %tmain.o %thelper2.o -o %t.exe -Wl,-q
# RUN: llvm-dwarfdump --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s
# RUN: llvm-bolt %t.exe -o %t.exe.bolt --update-debug-sections
# RUN: llvm-dwarfdump --debug-info %t.exe.bolt | FileCheck --check-prefix=POSTCHECK %s
Expand All @@ -14,5 +14,5 @@
# PRECHECK: DW_AT_addr_base (0x00000008)

# POSTCHECK: DW_AT_addr_base (0x00000008)
# POSTCHECK: DW_AT_addr_base (0x00000020)
# POSTCHECK: DW_AT_addr_base (0x00000020)
# POSTCHECK: DW_AT_addr_base (0x00000018)
# POSTCHECK: DW_AT_addr_base (0x00000008)
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
; BOLT-DWP: DW_TAG_compile_unit
; BOLT-DWP: DW_AT_dwo_name ("main.dwo.dwo")
; BOLT-DWP: DW_TAG_type_unit
; BOLT-DW-NOT: DW_AT_dwo_name
; BOLT-DWP-NOT: DW_AT_dwo_name
; BOLT-DWP: Contribution size = 68, Format = DWARF32, Version = 5
; BOLT-DWP-NEXT: "main"
; BOLT-DWP-NEXT: "int"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
# RUN: %clang %cflags %tmain.o %thelper.o -o %t.exe
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=POSTCHECKADDR %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-types %t.bolt | FileCheck --check-prefix=POSTCHECKTU %s

## This test checks that BOLT handles correctly backward and forward cross CU references
## for DWARF5 and DWARF4 with -fdebug-types-section
## for DWARF5 and DWARF4 with -fdebug-types-section and checks the address table is correct.

# POSTCHECK: version = 0x0005
# POSTCHECK: DW_TAG_type_unit
Expand All @@ -29,6 +30,15 @@
# POSTCHECK: DW_TAG_variable [20]
# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo3a")

# POSTCHECKADDR: Addrs: [
# POSTCHECKADDR-NEXT: 0x0000000000001360
# POSTCHECKADDR-NEXT: 0x0000000000000000
# POSTCHECKADDR-NEXT: ]
# POSTCHECKADDR: Addrs: [
# POSTCHECKADDR-NEXT: 0x00000000000013e0
# POSTCHECKADDR-NEXT: 0x0000000000000000
# POSTCHECKADDR-NEXT: ]

# POSTCHECKTU: version = 0x0004
# POSTCHECKTU: DW_TAG_type_unit
# POSTCHECKTU: DW_TAG_structure_type
Expand Down
19 changes: 18 additions & 1 deletion bolt/test/X86/dwarf5-locexpr-referrence.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=CHECK %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=CHECKADDR %s

## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr.
## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr
## and checks the address table is correct.

# CHECK: version = 0x0005
# CHECK: DW_TAG_variable
Expand All @@ -19,3 +21,18 @@
# CHECK-SAME: DW_OP_convert (0x00000028 -> 0x00000092)
# CHECK-SAME: DW_OP_convert (0x0000002c -> 0x00000096)
# CHECK: version = 0x0005

# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001330
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: 0x0000000000001333
# CHECKADDR-NEXT: ]
# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001340
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: 0x0000000000001343
# CHECKADDR-NEXT: ]
# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001320
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: ]
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-one-loclists-two-bases.test
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# POSTCHECK: version = 0x0005
# POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_variable
# POSTCHECK: DW_AT_location [DW_FORM_loclistx]
# POSTCHECK-SAME: indexed (0x0)
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-two-loclists.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
# POSTCHECK: version = 0x0005
# POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_variable
# POSTCHECK: DW_AT_location [DW_FORM_loclistx]
# POSTCHECK-SAME: indexed (0x0)
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/X86/dwarf5-two-rnglists.test
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
# POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000008)
# POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_subprogram
# POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
# POSTCHECK-SAME: indexed (0x1)
Expand All @@ -75,7 +75,7 @@
# POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000030)
# POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x00000045)
# POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000035)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:

# POSTCHECK: DW_TAG_subprogram
# POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
Expand Down
12 changes: 8 additions & 4 deletions bolt/test/X86/jump-table-fixed-ref-pic.test
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
## Verify that BOLT detects fixed destination of indirect jump for PIC
## case.

XFAIL: *

RUN: %clang %cflags -no-pie %S/Inputs/jump-table-fixed-ref-pic.s -Wl,-q -o %t
RUN: llvm-bolt %t --relocs -o %t.null 2>&1 | FileCheck %s
RUN: llvm-bolt %t --relocs -o %t.null -print-cfg 2>&1 | FileCheck %s

CHECK: BOLT-INFO: fixed PIC indirect branch detected in main {{.*}} the destination value is 0x[[#TGT:]]
CHECK: Binary Function "main" after building cfg

CHECK: BOLT-INFO: fixed indirect branch detected in main
CHECK: movslq ".rodata/1"+8(%rip), %rax
CHECK-NEXT: leaq ".rodata/1"(%rip), %rdx
CHECK-NEXT: addq %rdx, %rax
CHECK-NEXT: jmpq *%rax # UNKNOWN CONTROL FLOW
103 changes: 103 additions & 0 deletions bolt/test/X86/match-functions-with-call-graph.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
## Tests blocks matching by called function names in inferStaleProfile.

# REQUIRES: system-linux
# RUN: split-file %s %t
# RUN: %clang %cflags %t/main.cpp -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml --profile-ignore-hash -v=1 \
# RUN: --dyno-stats --print-cfg --infer-stale-profile=1 --match-with-call-graph 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: matched 1 functions with call graph

#--- main.cpp
void foo() {}

void bar() {}

void qux() {
foo();
bar();
}

void fred() {
foo();
qux();
bar();
bar();
foo();
}

int main() {
return 0;
}

#--- yaml
---
header:
profile-version: 1
binary-name: 'match-functions-with-calls-as-anchors.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main
fid: 0
hash: 0x0000000000000001
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000001
insns: 1
succ: [ { bid: 3, cnt: 1} ]
- name: _Z3foov
fid: 1
hash: 0x0000000000000002
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000002
insns: 1
succ: [ { bid: 3, cnt: 1} ]

- name: _Z3barv
fid: 2
hash: 0x0000000000000003
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000003
insns: 1
succ: [ { bid: 3, cnt: 1} ]
- name: _Z3quxv
fid: 3
hash: 0x0000000000000004
exec: 4
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000004
insns: 1
succ: [ { bid: 3, cnt: 1} ]
calls: [ { off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0} ]
- name: _Z4bazv
fid: 4
hash: 0x0000000000000005
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000005
insns: 1
succ: [ { bid: 3, cnt: 1} ]
calls: [ { off : 0, fid : 3, cnt : 0},
{ off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0},
{ off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0} ]
...
2 changes: 1 addition & 1 deletion bolt/test/X86/match-functions-with-calls-as-anchors.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Tests blocks matching by called function names in inferStaleProfile.

# REQUIRES: system-linux
# REQUIRES: system-linux, asserts
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
Expand Down
39 changes: 38 additions & 1 deletion bolt/test/X86/pseudoprobe-decoding-inline.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,42 @@
# REQUIRES: system-linux
# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s

# PREAGG: B X:0 #foo# 1 0
# PREAGG: B X:0 #bar# 1 0
# PREAGG: B X:0 #main# 1 0
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
# CHECK-YAML: guid: 0xE413754A191DB537
# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0xDB956436E78DD5FA
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
#
## Check that without --profile-use-pseudo-probes option, no pseudo probes are
## generated
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
# CHECK-NO-OPT-NOT: pseudo_probes
# CHECK-NO-OPT-NOT: guid
# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash

CHECK: Report of decoding input pseudo probe binaries

Expand Down
95 changes: 95 additions & 0 deletions bolt/test/X86/three-way-split-jt.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
## This reproduces an issue where the function is split into three fragments
## and all fragments access the same jump table.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.out -v=1 -print-only=main.warm -print-cfg 2>&1 | FileCheck %s

# CHECK-DAG: BOLT-INFO: marking main.warm as a fragment of main
# CHECK-DAG: BOLT-INFO: marking main.cold as a fragment of main
# CHECK-DAG: BOLT-INFO: processing main.warm as a sibling of non-ignored function
# CHECK-DAG: BOLT-INFO: processing main.cold as a sibling of non-ignored function
# CHECK-DAG: BOLT-WARNING: Ignoring main.cold
# CHECK-DAG: BOLT-WARNING: Ignoring main.warm
# CHECK-DAG: BOLT-WARNING: Ignoring main
# CHECK: BOLT-WARNING: skipped 3 functions due to cold fragments

# CHECK: PIC Jump table JUMP_TABLE for function main, main.warm, main.cold
# CHECK-NEXT: 0x0000 : __ENTRY_main@0x[[#]]
# CHECK-NEXT: 0x0004 : __ENTRY_main@0x[[#]]
# CHECK-NEXT: 0x0008 : __ENTRY_main.cold@0x[[#]]
# CHECK-NEXT: 0x000c : __ENTRY_main@0x[[#]]
.globl main
.type main, %function
.p2align 2
main:
LBB0:
andl $0xf, %ecx
cmpb $0x4, %cl
## exit through ret
ja LBB3

## jump table dispatch, jumping to label indexed by val in %ecx
LBB1:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8

LBB2:
xorq %rax, %rax
LBB3:
addq $0x8, %rsp
ret
.size main, .-main

.globl main.warm
.type main.warm, %function
.p2align 2
main.warm:
LBB20:
andl $0xb, %ebx
cmpb $0x1, %cl
# exit through ret
ja LBB23

## jump table dispatch, jumping to label indexed by val in %ecx
LBB21:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8

LBB22:
xorq %rax, %rax
LBB23:
addq $0x8, %rsp
ret
.size main.warm, .-main.warm

## cold fragment is only reachable through jump table
.globl main.cold
.type main.cold, %function
main.cold:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8
LBB4:
callq abort
.size main.cold, .-main.cold

.rodata
## jmp table, entries must be R_X86_64_PC32 relocs
.globl JUMP_TABLE
JUMP_TABLE:
.long LBB2-JUMP_TABLE
.long LBB3-JUMP_TABLE
.long LBB4-JUMP_TABLE
.long LBB3-JUMP_TABLE
14 changes: 13 additions & 1 deletion bolt/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,22 @@

tool_dirs = [config.llvm_tools_dir, config.test_source_root]

llvm_bolt_args = []

if config.libbolt_rt_instr:
llvm_bolt_args.append(f"--runtime-instrumentation-lib={config.libbolt_rt_instr}")

if config.libbolt_rt_hugify:
llvm_bolt_args.append(f"--runtime-hugify-lib={config.libbolt_rt_hugify}")

tools = [
ToolSubst("llc", unresolved="fatal"),
ToolSubst("llvm-dwarfdump", unresolved="fatal"),
ToolSubst("llvm-bolt", unresolved="fatal"),
ToolSubst(
"llvm-bolt",
unresolved="fatal",
extra_args=llvm_bolt_args,
),
ToolSubst("llvm-boltdiff", unresolved="fatal"),
ToolSubst("llvm-bolt-heatmap", unresolved="fatal"),
ToolSubst("llvm-bat-dump", unresolved="fatal"),
Expand Down
2 changes: 2 additions & 0 deletions bolt/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ config.bolt_clang = "@BOLT_CLANG_EXE@"
config.bolt_lld = "@BOLT_LLD_EXE@"
config.targets_to_build = "@BOLT_TARGETS_TO_BUILD@"
config.gnu_ld = "@GNU_LD_EXECUTABLE@"
config.libbolt_rt_instr = "@LIBBOLT_RT_INSTR@"
config.libbolt_rt_hugify = "@LIBBOLT_RT_HUGIFY@"

import lit.llvm
lit.llvm.initialize(lit_config, config)
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/perf2bolt/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import shutil

if shutil.which("perf") != None:
if shutil.which("perf") is not None:
config.available_features.add("perf")
4 changes: 2 additions & 2 deletions bolt/utils/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:20.04 AS builder
FROM ubuntu:24.04 AS builder

ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
Expand Down Expand Up @@ -26,6 +26,6 @@ RUN mkdir build && \
ninja install-llvm-bolt install-perf2bolt install-merge-fdata \
install-llvm-boltdiff install-bolt_rt

FROM ubuntu:20.04
FROM ubuntu:24.04

COPY --from=builder /home/bolt/install /usr/local
13 changes: 13 additions & 0 deletions clang-tools-extra/clang-doc/HTMLGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <optional>
#include <string>

Expand Down Expand Up @@ -979,6 +980,18 @@ static llvm::Error serializeIndex(ClangDocContext &CDCtx) {
"error creating index file: " +
FileErr.message());
}
llvm::SmallString<128> RootPath(CDCtx.OutDirectory);
if (llvm::sys::path::is_relative(RootPath)) {
llvm::sys::fs::make_absolute(RootPath);
}
// Replace the escaped characters with a forward slash. It shouldn't matter
// when rendering the webpage in a web browser. This helps to prevent the
// JavaScript from escaping characters incorrectly, and introducing bad paths
// in the URLs.
std::string RootPathEscaped = RootPath.str().str();
std::replace(RootPathEscaped.begin(), RootPathEscaped.end(), '\\', '/');
OS << "var RootPath = \"" << RootPathEscaped << "\";\n";

CDCtx.Idx.sort();
llvm::json::OStream J(OS, 2);
std::function<void(Index)> IndexToJSON = [&](const Index &I) {
Expand Down
Loading