23 changes: 7 additions & 16 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
Expand Down Expand Up @@ -2404,32 +2403,23 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
Streamer->emitLabel(SplitStartLabel);
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
Streamer->emitLabel(SplitEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
// private
Streamer->emitBytes(StringRef(""));
Streamer->switchSection(Section);
}

// To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
// MCStreamer::Finish(), which does more than we want
Streamer->emitBytes(StringRef(""));

MCAssembler &Assembler =
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
MCAsmLayout Layout(Assembler);
Assembler.layout(Layout);
Assembler.layout();

// Obtain fragment sizes.
std::vector<uint64_t> FragmentSizes;
// Main fragment size.
const uint64_t HotSize =
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
Assembler.getSymbolOffset(*StartLabel);
FragmentSizes.push_back(HotSize);
// Split fragment sizes.
uint64_t ColdSize = 0;
for (const auto &Labels : SplitLabels) {
uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
Layout.getSymbolOffset(*Labels.first);
uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
Assembler.getSymbolOffset(*Labels.first);
FragmentSizes.push_back(Size);
ColdSize += Size;
}
Expand All @@ -2439,7 +2429,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
for (FunctionFragment &FF : BF.getLayout().fragments()) {
BinaryBasicBlock *PrevBB = nullptr;
for (BinaryBasicBlock *BB : FF) {
const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
const uint64_t BBStartOffset =
Assembler.getSymbolOffset(*(BB->getLabel()));
BB->setOutputStartAddress(BBStartOffset);
if (PrevBB)
PrevBB->setOutputEndAddress(BBStartOffset);
Expand Down
37 changes: 0 additions & 37 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));

cl::opt<MacroFusionType>
AlignMacroOpFusion("align-macro-fusion",
cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
cl::init(MFT_HOT),
cl::values(clEnumValN(MFT_NONE, "none",
"do not insert alignment no-ops for macro-fusion"),
clEnumValN(MFT_HOT, "hot",
"only insert alignment no-ops on hot execution paths (default)"),
clEnumValN(MFT_ALL, "all",
"always align instructions to allow macro-fusion")),
cl::ZeroOrMore,
cl::cat(BoltRelocCategory));

static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
Expand Down Expand Up @@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
Streamer.emitLabel(EntrySymbol);
}

// Check if special alignment for macro-fusion is needed.
bool MayNeedMacroFusionAlignment =
(opts::AlignMacroOpFusion == MFT_ALL) ||
(opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
BinaryBasicBlock::const_iterator MacroFusionPair;
if (MayNeedMacroFusionAlignment) {
MacroFusionPair = BB->getMacroOpFusionPair();
if (MacroFusionPair == BB->end())
MayNeedMacroFusionAlignment = false;
}

SMLoc LastLocSeen;
// Remember if the last instruction emitted was a prefix.
bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;

Expand All @@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
continue;
}

// Handle macro-fusion alignment. If we emitted a prefix as
// the last instruction, we should've already emitted the associated
// alignment hint, so don't emit it twice.
if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
I == MacroFusionPair) {
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
}

if (!EmitCodeOnly) {
// A symbol to be emitted before the instruction to mark its location.
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
Expand Down Expand Up @@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
}

Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}

Expand Down
54 changes: 23 additions & 31 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
else
OS << "<unknown>\n";
}
if (BB->getCFIState() >= 0)
if (hasCFI())
OS << " CFI State : " << BB->getCFIState() << '\n';
if (opts::EnableBAT) {
OS << " Input offset: 0x" << Twine::utohexstr(BB->getInputOffset())
Expand Down Expand Up @@ -611,7 +611,7 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
}

// In CFG_Finalized state we can miscalculate CFI state at exit.
if (CurrentState == State::CFG) {
if (CurrentState == State::CFG && hasCFI()) {
const int32_t CFIStateAtExit = BB->getCFIStateAtExit();
if (CFIStateAtExit >= 0)
OS << " CFI State: " << CFIStateAtExit << '\n';
Expand Down Expand Up @@ -1276,6 +1276,10 @@ Error BinaryFunction::disassemble() {
}
}

bool IsUnsupported = BC.MIB->isUnsupportedInstruction(Instruction);
if (IsUnsupported)
setIgnored();

if (MIB->isBranch(Instruction) || MIB->isCall(Instruction)) {
uint64_t TargetAddress = 0;
if (MIB->evaluateBranch(Instruction, AbsoluteInstrAddr, Size,
Expand All @@ -1289,12 +1293,10 @@ Error BinaryFunction::disassemble() {
const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
MCSymbol *TargetSymbol = nullptr;

if (!BC.MIB->isReversibleBranch(Instruction)) {
setIgnored();
if (BinaryFunction *TargetFunc =
if (IsUnsupported)
if (auto *TargetFunc =
BC.getBinaryFunctionContainingAddress(TargetAddress))
TargetFunc->setIgnored();
}

if (IsCall && containsAddress(TargetAddress)) {
if (TargetAddress == getAddress()) {
Expand Down Expand Up @@ -2277,8 +2279,6 @@ void BinaryFunction::postProcessCFG() {
postProcessBranches();
}

calculateMacroOpFusionStats();

// The final cleanup of intermediate structures.
clearList(IgnoredBranches);

Expand All @@ -2295,29 +2295,6 @@ void BinaryFunction::postProcessCFG() {
"invalid CFG detected after post-processing");
}

void BinaryFunction::calculateMacroOpFusionStats() {
if (!getBinaryContext().isX86())
return;
for (const BinaryBasicBlock &BB : blocks()) {
auto II = BB.getMacroOpFusionPair();
if (II == BB.end())
continue;

// Check offset of the second instruction.
// FIXME: arch-specific.
const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
if (!Offset || (getAddress() + Offset) % 64)
continue;

LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
<< Twine::utohexstr(getAddress() + Offset)
<< " in function " << *this << "; executed "
<< BB.getKnownExecutionCount() << " times.\n");
++BC.Stats.MissedMacroFusionPairs;
BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
}
}

void BinaryFunction::removeTagsFromProfile() {
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
Expand Down Expand Up @@ -2561,6 +2538,7 @@ struct CFISnapshot {
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
break;
case MCCFIInstruction::OpRememberState:
Expand Down Expand Up @@ -2698,6 +2676,7 @@ struct CFISnapshotDiff : public CFISnapshot {
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
return false;
case MCCFIInstruction::OpRememberState:
Expand Down Expand Up @@ -2846,6 +2825,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
break;
case MCCFIInstruction::OpGnuArgsSize:
Expand Down Expand Up @@ -4472,6 +4452,18 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
}
}

MCInst *BinaryFunction::getInstructionContainingOffset(uint64_t Offset) {
assert(CurrentState == State::Disassembled && "Wrong function state");

if (Offset > Size)
return nullptr;

auto II = Instructions.upper_bound(Offset);
assert(II != Instructions.begin() && "First instruction not at offset 0");
--II;
return &II->second;
}

void BinaryFunction::printLoopInfo(raw_ostream &OS) const {
if (!opts::shouldPrint(*this))
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
//===- bolt/Core/BinaryFunctionCallGraph.cpp ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/Support/CommandLine.h"
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ add_llvm_library(LLVMBOLTCore
BinaryData.cpp
BinaryEmitter.cpp
BinaryFunction.cpp
BinaryFunctionCallGraph.cpp
BinaryFunctionProfile.cpp
BinarySection.cpp
CallGraph.cpp
CallGraphWalker.cpp
DebugData.cpp
DebugNames.cpp
DIEBuilder.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- bolt/Passes/CallGraph.cpp ------------------------------------------===//
//===- bolt/Core/CallGraph.cpp ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -10,16 +10,16 @@
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/CallGraph.h"
#include "bolt/Core/CallGraph.h"

#define DEBUG_TYPE "callgraph"

#if defined(__x86_64__) && !defined(_MSC_VER)
# if (!defined USE_SSECRC)
# define USE_SSECRC
# endif
#if (!defined USE_SSECRC)
#define USE_SSECRC
#endif
#else
# undef USE_SSECRC
#undef USE_SSECRC
#endif

static LLVM_ATTRIBUTE_UNUSED inline size_t hash_int64_fallback(int64_t k) {
Expand Down Expand Up @@ -50,7 +50,7 @@ static inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
// crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes
// differently from (k2, k1).
k1 += k1;
__asm("crc32q %1, %0\n" : "+r" (k1) : "rm"(k2));
__asm("crc32q %1, %0\n" : "+r"(k1) : "rm"(k2));
return k1;
#else
return (hash_int64(k1) << 1) ^ hash_int64(k2);
Expand Down Expand Up @@ -118,5 +118,5 @@ void CallGraph::adjustArcWeights() {
}
}

}
}
} // namespace bolt
} // namespace llvm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- bolt/Passes/CallGraphWalker.cpp ------------------------------------===//
//===- bolt/Core/CallGraphWalker.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/CallGraphWalker.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Core/CallGraphWalker.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
#include <queue>
Expand Down
6 changes: 6 additions & 0 deletions bolt/lib/Core/DebugData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ uint64_t DebugRangesSectionWriter::getSectionOffset() {
return SectionOffset;
}

void DebugRangesSectionWriter::appendToRangeBuffer(
const DebugBufferVector &CUBuffer) {
*RangesStream << CUBuffer;
SectionOffset = RangesBuffer->size();
}

DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;

uint64_t DebugRangeListsSectionWriter::addRanges(
Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
if (Iter.second)
CUList.push_back(BADCUOFFSET);
ForeignTUList.push_back(cast<DWARFTypeUnit>(&Unit)->getTypeHash());
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
if (!TUHashToIndexMap.count(TUHash)) {
TUHashToIndexMap.insert({TUHash, ForeignTUList.size()});
ForeignTUList.push_back(TUHash);
}
} else {
LocalTUList.push_back(CurrentUnitOffset);
}
Expand Down Expand Up @@ -231,8 +235,13 @@ DWARF5AcceleratorTable::addAccelTableEntry(
IsTU = Unit.isTypeUnit();
DieTag = Die.getTag();
if (IsTU) {
if (DWOID)
return ForeignTUList.size() - 1;
if (DWOID) {
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
auto Iter = TUHashToIndexMap.find(TUHash);
assert(Iter != TUHashToIndexMap.end() &&
"Could not find TU hash in map");
return Iter->second;
}
return LocalTUList.size() - 1;
}
return CUList.size() - 1;
Expand Down
20 changes: 0 additions & 20 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;

extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> EnableBAT;
extern cl::opt<unsigned> ExecutionCountThreshold;
Expand Down Expand Up @@ -1637,25 +1636,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
}
}

// Print information on missed macro-fusion opportunities seen on input.
if (BC.Stats.MissedMacroFusionPairs) {
BC.outs() << format(
"BOLT-INFO: the input contains %zu (dynamic count : %zu)"
" opportunities for macro-fusion optimization",
BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
switch (opts::AlignMacroOpFusion) {
case MFT_NONE:
BC.outs() << ". Use -align-macro-fusion to fix.\n";
break;
case MFT_HOT:
BC.outs() << ". Will fix instances on a hot path.\n";
break;
case MFT_ALL:
BC.outs() << " that are going to be fixed\n";
break;
}
}

// Collect and print information about suboptimal code layout on input.
if (opts::ReportBadLayout) {
std::vector<BinaryFunction *> SuboptimalFuncs;
Expand Down
3 changes: 0 additions & 3 deletions bolt/lib/Passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@ add_llvm_library(LLVMBOLTPasses
AllocCombiner.cpp
AsmDump.cpp
BinaryPasses.cpp
BinaryFunctionCallGraph.cpp
CMOVConversion.cpp
CacheMetrics.cpp
CallGraph.cpp
CallGraphWalker.cpp
DataflowAnalysis.cpp
DataflowInfoManager.cpp
FrameAnalysis.cpp
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/FrameAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/FrameAnalysis.h"
#include "bolt/Core/CallGraphWalker.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/CallGraphWalker.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Timer.h"
#include <fstream>
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/FrameOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/FrameOptimizer.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Passes/ShrinkWrapping.h"
#include "bolt/Passes/StackAvailableExpressions.h"
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/IndirectCallPromotion.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Passes/Inliner.h"
#include "llvm/ADT/STLExtras.h"
Expand Down
3 changes: 1 addition & 2 deletions bolt/lib/Passes/Instrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,7 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
HasJumpTable = true;
else if (BC.MIB->isUnconditionalBranch(Inst))
HasUnconditionalBranch = true;
else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) ||
!BC.MIB->isReversibleBranch(Inst))
else if (!(BC.MIB->isCall(Inst) || BC.MIB->isConditionalBranch(Inst)))
continue;

const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/JTFootprintReduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/JTFootprintReduction.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "llvm/Support/CommandLine.h"

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/RegAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include "bolt/Passes/RegAnalysis.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/CallGraphWalker.h"
#include "bolt/Core/CallGraphWalker.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/RegReAssign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/RegReAssign.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Core/MCPlus.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Passes/DataflowAnalysis.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Utils/Utils.h"
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/StokeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//

#include "bolt/Passes/StokeInfo.h"
#include "bolt/Passes/BinaryFunctionCallGraph.h"
#include "bolt/Core/BinaryFunctionCallGraph.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "llvm/Support/CommandLine.h"

Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << " Cold part\n");
for (const FunctionFragment &FF :
Function.getLayout().getSplitFragments()) {
// Skip empty fragments to avoid adding zero-address entries to maps.
if (FF.empty())
continue;
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_llvm_library(LLVMBOLTProfile
DISABLE_LLVM_LINK_LLVM_DYLIB

LINK_COMPONENTS
Demangle
Support
TransformUtils
)
Expand Down
255 changes: 224 additions & 31 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/edit_distance.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
Expand All @@ -22,12 +25,23 @@ namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InferStaleProfile;
extern cl::opt<bool> Lite;

cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
"name-similarity-function-matching-threshold",
cl::desc("Match functions using namespace and edit distance"), cl::init(0),
cl::Hidden, cl::cat(BoltOptCategory));

static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
cl::desc("ignore hash while reading function profile"),
cl::Hidden, cl::cat(BoltOptCategory));

llvm::cl::opt<bool>
MatchProfileWithFunctionHash("match-profile-with-function-hash",
cl::desc("Match profile with function hash"),
cl::Hidden, cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));
Expand Down Expand Up @@ -328,7 +342,16 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
return Error::success();
}

bool YAMLProfileReader::profileMatches(
const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
if (opts::IgnoreHash)
return Profile.NumBasicBlocks == BF.size();
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
}

bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
if (opts::MatchProfileWithFunctionHash)
return true;
for (StringRef Name : BF.getNames())
if (ProfileFunctionNames.contains(Name))
return true;
Expand All @@ -342,30 +365,9 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
return false;
}

Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: YAML profile with hash: ";
switch (YamlBP.Header.HashFunction) {
case HashFunction::StdHash:
outs() << "std::hash\n";
break;
case HashFunction::XXH3:
outs() << "xxh3\n";
break;
}
}
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);

auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
BinaryFunction &BF) {
if (opts::IgnoreHash)
return Profile.NumBasicBlocks == BF.size();
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
};

// We have to do 2 passes since LTO introduces an ambiguity in function
// names. The first pass assigns profiles that match 100% by name and
// by hash. The second pass allows name ambiguity for LTO private functions.
size_t YAMLProfileReader::matchWithExactName() {
size_t MatchedWithExactName = 0;
// This first pass assigns profiles that match 100% by name and by hash.
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
if (!BF)
continue;
Expand All @@ -374,15 +376,43 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
// the profile.
Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);

// Recompute hash once per function.
if (!opts::IgnoreHash)
Function.computeHash(YamlBP.Header.IsDFSOrder,
YamlBP.Header.HashFunction);

if (profileMatches(YamlBF, Function))
if (profileMatches(YamlBF, Function)) {
matchProfileToFunction(YamlBF, Function);
++MatchedWithExactName;
}
}
return MatchedWithExactName;
}

size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
// Iterates through profiled functions to match the first binary function with
// the same exact hash. Serves to match identical, renamed functions.
// Collisions are possible where multiple functions share the same exact hash.
size_t MatchedWithHash = 0;
if (opts::MatchProfileWithFunctionHash) {
DenseMap<size_t, BinaryFunction *> StrictHashToBF;
StrictHashToBF.reserve(BC.getBinaryFunctions().size());

for (auto &[_, BF] : BC.getBinaryFunctions())
StrictHashToBF[BF.getHash()] = &BF;

for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
auto It = StrictHashToBF.find(YamlBF.Hash);
if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
BinaryFunction *BF = It->second;
matchProfileToFunction(YamlBF, *BF);
++MatchedWithHash;
}
}
}
return MatchedWithHash;
}

size_t YAMLProfileReader::matchWithLTOCommonName() {
// This second pass allows name ambiguity for LTO private functions.
size_t MatchedWithLTOCommonName = 0;
for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
if (!LTOCommonNameFunctionMap.contains(CommonName))
continue;
Expand All @@ -396,6 +426,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
for (BinaryFunction *BF : Functions) {
if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
matchProfileToFunction(*YamlBF, *BF);
++MatchedWithLTOCommonName;
return true;
}
}
Expand All @@ -407,19 +438,175 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
// partially.
if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
!LTOProfiles.front()->Used &&
!ProfiledFunctions.count(*Functions.begin()))
!ProfiledFunctions.count(*Functions.begin())) {
matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
++MatchedWithLTOCommonName;
}
}
return MatchedWithLTOCommonName;
}

size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
if (opts::NameSimilarityFunctionMatchingThreshold == 0)
return 0;

size_t MatchedWithNameSimilarity = 0;
ItaniumPartialDemangler Demangler;

// Demangle and derive namespace from function name.
auto DemangleName = [&](std::string &FunctionName) {
StringRef RestoredName = NameResolver::restore(FunctionName);
return demangle(RestoredName);
};
auto DeriveNameSpace = [&](std::string &DemangledName) {
if (Demangler.partialDemangle(DemangledName.c_str()))
return std::string("");
std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
size_t BufferSize;
char *NameSpace =
Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
return std::string(NameSpace, BufferSize);
};

// Maps namespaces to associated function block counts and gets profile
// function names and namespaces to minimize the number of BFs to process and
// avoid repeated name demangling/namespace derivation.
StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
std::vector<std::string> ProfileBFDemangledNames;
ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
std::vector<std::string> ProfiledBFNamespaces;
ProfiledBFNamespaces.reserve(YamlBP.Functions.size());

for (auto &YamlBF : YamlBP.Functions) {
std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
ProfileBFDemangledNames.push_back(YamlBFDemangledName);
std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
ProfiledBFNamespaces.push_back(YamlBFNamespace);
NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
}

StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;

// Maps namespaces to BFs excluding binary functions with no equal sized
// profiled functions belonging to the same namespace.
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
std::string DemangledName = BF->getDemangledName();
std::string Namespace = DeriveNameSpace(DemangledName);

auto NamespaceToProfiledBFSizesIt =
NamespaceToProfiledBFSizes.find(Namespace);
// Skip if there are no ProfileBFs with a given \p Namespace.
if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
continue;
// Skip if there are no ProfileBFs in a given \p Namespace with
// equal number of blocks.
if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
continue;
auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
if (NamespaceToBFsIt == NamespaceToBFs.end())
NamespaceToBFs[Namespace] = {BF};
else
NamespaceToBFsIt->second.push_back(BF);
}

// Iterates through all profiled functions and binary functions belonging to
// the same namespace and matches based on edit distance threshold.
assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
if (YamlBF.Used)
continue;
// Skip if there are no BFs in a given \p Namespace.
auto It = NamespaceToBFs.find(YamlBFNamespace);
if (It == NamespaceToBFs.end())
continue;

std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
std::vector<BinaryFunction *> BFs = It->second;
unsigned MinEditDistance = UINT_MAX;
BinaryFunction *ClosestNameBF = nullptr;

// Determines BF the closest to the profiled function, in the
// same namespace.
for (BinaryFunction *BF : BFs) {
if (ProfiledFunctions.count(BF))
continue;
if (BF->size() != YamlBF.NumBasicBlocks)
continue;
std::string BFDemangledName = BF->getDemangledName();
unsigned BFEditDistance =
StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
if (BFEditDistance < MinEditDistance) {
MinEditDistance = BFEditDistance;
ClosestNameBF = BF;
}
}

if (ClosestNameBF &&
MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
++MatchedWithNameSimilarity;
}
}

return MatchedWithNameSimilarity;
}

Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: YAML profile with hash: ";
switch (YamlBP.Header.HashFunction) {
case HashFunction::StdHash:
outs() << "std::hash\n";
break;
case HashFunction::XXH3:
outs() << "xxh3\n";
break;
}
}
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);

// Computes hash for binary functions.
if (opts::MatchProfileWithFunctionHash) {
for (auto &[_, BF] : BC.getBinaryFunctions()) {
BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
} else if (!opts::IgnoreHash) {
for (BinaryFunction *BF : ProfileBFs) {
if (!BF)
continue;
BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
}

const size_t MatchedWithExactName = matchWithExactName();
const size_t MatchedWithHash = matchWithHash(BC);
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);

for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
matchProfileToFunction(YamlBF, *BF);


for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
<< '\n';

if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: matched " << MatchedWithExactName
<< " functions with identical names\n";
outs() << "BOLT-INFO: matched " << MatchedWithHash
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}

// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");
Expand All @@ -439,6 +626,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {

BC.setNumUnusedProfiledObjects(NumUnused);

if (opts::Lite && opts::MatchProfileWithFunctionHash) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
if (!BF->hasProfile())
BF->setIgnored();
}

return Error::success();
}

Expand Down
139 changes: 87 additions & 52 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Object/ObjectFile.h"
Expand Down Expand Up @@ -646,6 +646,15 @@ void DWARFRewriter::updateDebugInfo() {

} else {
LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>();
if (std::optional<uint64_t> DWOId = CU.getDWOId()) {
assert(LegacyRangesWritersByCU.count(*DWOId) == 0 &&
"LegacyRangeLists writer for DWO unit already exists.");
auto LegacyRangesSectionWriterByCU =
std::make_unique<DebugRangesSectionWriter>();
LegacyRangesSectionWriterByCU->initSection(CU);
LegacyRangesWritersByCU[*DWOId] =
std::move(LegacyRangesSectionWriterByCU);
}
}
return LocListWritersByCU[CUIndex++].get();
};
Expand Down Expand Up @@ -693,6 +702,7 @@ void DWARFRewriter::updateDebugInfo() {
if (Unit->getVersion() >= 5) {
TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
} else {
TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get();
RangesBase = RangesSectionWriter->getSectionOffset();
setDwoRangesBase(*DWOId, *RangesBase);
}
Expand Down Expand Up @@ -1274,9 +1284,17 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
}

if (RangesBaseInfo) {
DIEBldr.replaceValue(&Die, RangesBaseInfo.getAttribute(),
RangesBaseInfo.getForm(),
DIEInteger(static_cast<uint32_t>(*RangesBase)));
if (RangesBaseInfo.getAttribute() == dwarf::DW_AT_GNU_ranges_base) {
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
} else {
DIEBldr.replaceValue(&Die, RangesBaseInfo.getAttribute(),
RangesBaseInfo.getForm(),
DIEInteger(static_cast<uint32_t>(*RangesBase)));
}
RangesBase = std::nullopt;
}
}
Expand All @@ -1294,20 +1312,12 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
RangesAttrInfo.getForm() == dwarf::DW_FORM_sec_offset)
NeedConverted = true;

uint64_t CurRangeBase = 0;
if (Unit.isDWOUnit()) {
if (std::optional<uint64_t> DWOId = Unit.getDWOId())
CurRangeBase = getDwoRangesBase(*DWOId);
else
errs() << "BOLT-WARNING: [internal-dwarf-error]: DWOId is not found "
"for DWO Unit.";
}
if (NeedConverted || RangesAttrInfo.getForm() == dwarf::DW_FORM_rnglistx)
DIEBldr.replaceValue(&Die, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx,
DIEInteger(DebugRangesOffset));
else
DIEBldr.replaceValue(&Die, dwarf::DW_AT_ranges, RangesAttrInfo.getForm(),
DIEInteger(DebugRangesOffset - CurRangeBase));
DIEInteger(DebugRangesOffset));

if (!RangesBase) {
if (LowPCAttrInfo &&
Expand All @@ -1324,15 +1334,21 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(

// If we are at this point we are in the CU/Skeleton CU, and
// DW_AT_GNU_ranges_base or DW_AT_rnglists_base doesn't exist.
if (Unit.getVersion() <= 4)
if (Unit.getVersion() <= 4) {
DIEBldr.addValue(&Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4,
DIEInteger(*RangesBase));
else if (Unit.getVersion() == 5)
DIEInteger(INT_MAX));
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
} else if (Unit.getVersion() == 5) {
DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
else
} else {
DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
}
return;
}

Expand All @@ -1351,7 +1367,7 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
}
}

void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
void DWARFRewriter::updateLineTableOffsets(const MCAssembler &Asm) {
ErrorOr<BinarySection &> DbgInfoSection =
BC.getUniqueSectionByName(".debug_info");
ErrorOr<BinarySection &> TypeInfoSection =
Expand Down Expand Up @@ -1392,7 +1408,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
if (!StmtOffset)
continue;

const uint64_t LineTableOffset = Layout.getSymbolOffset(*Label);
const uint64_t LineTableOffset =
Asm.getSymbolOffset(*Label);
DebugLineOffsetMap[*StmtOffset] = LineTableOffset;
assert(DbgInfoSection && ".debug_info section must exist");
LineTablePatchMap[CU.get()] = LineTableOffset;
Expand Down Expand Up @@ -1610,6 +1627,30 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder,
DIEStreamer &Streamer,
CUOffsetMap &CUMap,
const std::list<DWARFUnit *> &CUs) {
for (DWARFUnit *CU : CUs) {
if (CU->getVersion() != 4)
continue;
std::optional<uint64_t> DWOId = CU->getDWOId();
if (!DWOId)
continue;
auto RangesWriterIterator = LegacyRangesWritersByCU.find(*DWOId);
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
std::unique_ptr<DebugRangesSectionWriter> &LegacyRangesWriter =
RangesWriterIterator->second;
std::optional<DIE *> Die = LegacyRangesWriter->getDie();
if (!Die || !Die.value())
continue;
DIEValue DvalGNUBase =
Die.value()->findAttribute(dwarf::DW_AT_GNU_ranges_base);
assert(DvalGNUBase && "GNU_ranges_base attribute does not exist for DWOId");
DIEBlder.replaceValue(
Die.value(), dwarf::DW_AT_GNU_ranges_base, DvalGNUBase.getForm(),
DIEInteger(LegacyRangesSectionWriter->getSectionOffset()));
std::unique_ptr<DebugBufferVector> RangesWritersContents =
LegacyRangesWriter->releaseBuffer();
LegacyRangesSectionWriter->appendToRangeBuffer(*RangesWritersContents);
}
DIEBlder.generateAbbrevs();
DIEBlder.finish();
// generate debug_info and CUMap
Expand Down Expand Up @@ -2097,7 +2138,6 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die,
uint64_t RangesSectionOffset, DIEValue &LowPCAttrInfo,
DIEValue &HighPCAttrInfo, std::optional<uint64_t> RangesBase) {
uint32_t BaseOffset = 0;
dwarf::Form LowForm = LowPCAttrInfo.getForm();
dwarf::Attribute RangeBaseAttribute = dwarf::DW_AT_GNU_ranges_base;
dwarf::Form RangesForm = dwarf::DW_FORM_sec_offset;
Expand All @@ -2112,45 +2152,40 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
Die.getTag() == dwarf::DW_TAG_skeleton_unit;
if (!IsUnitDie)
DIEBldr.deleteValue(&Die, LowPCAttrInfo.getAttribute());
// In DWARF4 for DW_AT_low_pc in binary DW_FORM_addr is used. In the DWO
// section DW_FORM_GNU_addr_index is used. So for if we are converting
// DW_AT_low_pc/DW_AT_high_pc and see DW_FORM_GNU_addr_index. We are
// converting in DWO section, and DW_AT_ranges [DW_FORM_sec_offset] is
// relative to DW_AT_GNU_ranges_base.
if (LowForm == dwarf::DW_FORM_GNU_addr_index) {
// Ranges are relative to DW_AT_GNU_ranges_base.
uint64_t CurRangeBase = 0;
if (std::optional<uint64_t> DWOId = Unit.getDWOId()) {
CurRangeBase = getDwoRangesBase(*DWOId);
}
BaseOffset = CurRangeBase;
} else {
// In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
// DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
// when it's absent.
if (IsUnitDie) {
if (LowForm == dwarf::DW_FORM_addrx) {
const uint32_t Index = AddrWriter->getIndexFromAddress(0, Unit);
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(Index));
} else {
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(0));
}

// In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
// DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
// when it's absent.
if (IsUnitDie) {
if (LowForm == dwarf::DW_FORM_addrx) {
const uint32_t Index = AddrWriter->getIndexFromAddress(0, Unit);
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(Index));
} else {
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(0));
}
// Original CU didn't have DW_AT_*_base. We converted it's children (or
// dwo), so need to insert it into CU.
if (RangesBase)
}
// Original CU didn't have DW_AT_*_base. We converted it's children (or
// dwo), so need to insert it into CU.
if (RangesBase) {
if (Unit.getVersion() >= 5) {
DIEBldr.addValue(&Die, RangeBaseAttribute, dwarf::DW_FORM_sec_offset,
DIEInteger(*RangesBase));
} else {
DIEBldr.addValue(&Die, RangeBaseAttribute, dwarf::DW_FORM_sec_offset,
DIEInteger(INT_MAX));
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
}
}

uint64_t RangeAttrVal = RangesSectionOffset - BaseOffset;
if (Unit.getVersion() >= 5)
RangeAttrVal = RangesSectionOffset;
// HighPC was conveted into DW_AT_ranges.
// For DWARF5 we only access ranges through index.

DIEBldr.replaceValue(&Die, HighPCAttrInfo.getAttribute(), dwarf::DW_AT_ranges,
RangesForm, DIEInteger(RangeAttrVal));
RangesForm, DIEInteger(RangesSectionOffset));
}
31 changes: 25 additions & 6 deletions bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = processSMPLocks())
return E;

if (Error E = readORCTables())
return E;

if (Error E = readStaticCalls())
return E;

Expand All @@ -313,6 +310,11 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = readAltInstructions())
return E;

// Some ORC entries could be linked to alternative instruction
// sequences. Hence, we read ORC after .altinstructions.
if (Error E = readORCTables())
return E;

if (Error E = readPCIFixupTable())
return E;

Expand Down Expand Up @@ -563,11 +565,28 @@ Error LinuxKernelRewriter::readORCTables() {
if (!BF->hasInstructions())
continue;

MCInst *Inst = BF->getInstructionAtOffset(IP - BF->getAddress());
if (!Inst)
const uint64_t Offset = IP - BF->getAddress();
MCInst *Inst = BF->getInstructionAtOffset(Offset);
if (!Inst) {
// Check if there is an alternative instruction(s) at this IP. Multiple
// alternative instructions can take a place of a single original
// instruction and each alternative can have a separate ORC entry.
// Since ORC table is shared between all alternative sequences, there's
// a requirement that only one (out of many) sequences can have an
// instruction from the ORC table to avoid ambiguities/conflicts.
//
// For now, we have limited support for alternatives. I.e. we still print
// functions with them, but will not change the code in the output binary.
// As such, we can ignore alternative ORC entries. They will be preserved
// in the binary, but will not get printed in the instruction stream.
Inst = BF->getInstructionContainingOffset(Offset);
if (Inst || BC.MIB->hasAnnotation(*Inst, "AltInst"))
continue;

return createStringError(
errc::executable_format_error,
"no instruction at address 0x%" PRIx64 " in .orc_unwind_ip", IP);
}

// Some addresses will have two entries associated with them. The first
// one being a "weak" section terminator. Since we ignore the terminator,
Expand Down Expand Up @@ -1440,7 +1459,7 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
AltBF->setIgnored();
}

if (!BF || !BC.shouldEmit(*BF))
if (!BF || !BF->hasInstructions())
continue;

if (OrgInstAddress + OrgSize > BF->getAddress() + BF->getSize())
Expand Down
32 changes: 3 additions & 29 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
Expand Down Expand Up @@ -76,12 +75,12 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;

namespace opts {

extern cl::opt<MacroFusionType> AlignMacroOpFusion;
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> KeepNops;
extern cl::opt<bool> Lite;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TerminalTrap;
Expand Down Expand Up @@ -140,9 +139,6 @@ KeepTmp("keep-tmp",
cl::Hidden,
cl::cat(BoltCategory));

cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
cl::cat(BoltCategory));

static cl::opt<unsigned>
LiteThresholdPct("lite-threshold-pct",
cl::desc("threshold (in percent) for selecting functions to process in lite "
Expand Down Expand Up @@ -1972,12 +1968,6 @@ void RewriteInstance::adjustCommandLineOptions() {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->adjustCommandLineOptions(*BC);

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
BC->outs()
<< "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (BC->isX86() && BC->MAB->allowAutoPadding()) {
if (!BC->HasRelocations) {
BC->errs()
Expand All @@ -1988,13 +1978,6 @@ void RewriteInstance::adjustCommandLineOptions() {
BC->outs()
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
"may take several minutes\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
"mode\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::SplitEH && !BC->HasRelocations) {
Expand All @@ -2016,14 +1999,6 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = true;
}

if (BC->isX86() && BC->HasRelocations &&
opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
BC->outs()
<< "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
"was specified\n";
opts::AlignMacroOpFusion = MFT_ALL;
}

if (!BC->HasRelocations &&
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
BC->errs() << "BOLT-ERROR: function reordering only works when "
Expand Down Expand Up @@ -3510,9 +3485,8 @@ void RewriteInstance::emitAndLink() {
updateOutputValues(*Linker);

if (opts::UpdateDebugSections) {
MCAsmLayout FinalLayout(
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler());
DebugInfoRewriter->updateLineTableOffsets(FinalLayout);
DebugInfoRewriter->updateLineTableOffsets(
static_cast<MCObjectStreamer &>(*Streamer).getAssembler());
}

if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
Expand Down
33 changes: 27 additions & 6 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
*AArch64ExprB.getSubExpr(), Comp);
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
return false;
}

bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
return false;
}
Expand Down Expand Up @@ -706,8 +702,20 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension);
AArch64_AM::ShiftExtendType ExtendType =
AArch64_AM::getArithExtendType(OperandExtension);
if (ShiftVal != 2)
llvm_unreachable("Failed to match indirect branch! (fragment 2)");
if (ShiftVal != 2) {
// TODO: Handle the patten where ShiftVal != 2.
// The following code sequence below has no shift amount,
// the range could be 0 to 4.
// The pattern comes from libc, it occurs when the binary is static.
// adr x6, 0x219fb0 <sigall_set+0x88>
// add x6, x6, x14, lsl #2
// ldr w7, [x6]
// add x6, x6, w7, sxtw => no shift amount
// br x6
errs() << "BOLT-WARNING: "
"Failed to match indirect branch: ShiftVAL != 2 \n";
return false;
}

if (ExtendType == AArch64_AM::SXTB)
ScaleValue = 1LL;
Expand Down Expand Up @@ -752,6 +760,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return true;
}

if (DefJTBaseAdd->getOpcode() == AArch64::ADR) {
// TODO: Handle the pattern where there is no adrp/add pair.
// It also occurs when the binary is static.
// adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
// ldrh w13, [x13, w12, uxtw #1]
// adr x12, 0x247b30 <__gettextparse+0x5b0>
// add x13, x12, w13, sxth #2
// br x13
errs() << "BOLT-WARNING: Failed to match indirect branch: "
"nop/adr instead of adrp/add \n";
return false;
}

assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
"Failed to match jump table base address pattern! (1)");

Expand Down
52 changes: 8 additions & 44 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,19 +328,19 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return false;
}

bool isReversibleBranch(const MCInst &Inst) const override {
if (isDynamicBranch(Inst))
return false;

bool isUnsupportedInstruction(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
default:
return true;
return false;

case X86::LOOP:
case X86::LOOPE:
case X86::LOOPNE:
case X86::JECXZ:
case X86::JRCXZ:
return false;
// These have a short displacement, and therefore (often) break after
// basic block relayout.
return true;
}
}

Expand Down Expand Up @@ -661,40 +661,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
const auto *I = Insts.begin();
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;

const MCInst &FirstInst = *I;
++I;
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;
const MCInst &SecondInst = *I;

if (!isConditionalBranch(SecondInst))
return false;
// Cannot fuse if the first instruction uses RIP-relative memory.
if (hasPCRelOperand(FirstInst))
return false;

const X86::FirstMacroFusionInstKind CmpKind =
X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
return false;

X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
X86::SecondMacroFusionInstKind BranchKind =
X86::classifySecondCondCodeInMacroFusion(CC);
if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
return false;
return X86::isMacroFused(CmpKind, BranchKind);
}

std::optional<X86MemOperand>
evaluateX86MemoryOperand(const MCInst &Inst) const override {
int MemOpNo = getMemoryOperandNo(Inst);
Expand Down Expand Up @@ -1879,11 +1845,9 @@ class X86MCPlusBuilder : public MCPlusBuilder {
continue;
}

// Handle conditional branches and ignore indirect branches
if (isReversibleBranch(*I) && getCondCode(*I) == X86::COND_INVALID) {
// Indirect branch
// Ignore indirect branches
if (getCondCode(*I) == X86::COND_INVALID)
return false;
}

if (CondBranch == nullptr) {
const MCSymbol *TargetBB = getTargetSymbol(*I);
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ cl::opt<bool>
cl::desc("instrument code to generate accurate profile data"),
cl::cat(BoltOptCategory));

cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
cl::cat(BoltCategory));

cl::opt<std::string>
OutputFilename("o",
cl::desc("<output file>"),
Expand Down
83 changes: 83 additions & 0 deletions bolt/test/AArch64/test-indirect-branch.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Test how BOLT handles indirect branch sequence of instructions in
// AArch64MCPlus builder.

// clang-format off

// REQUIRES: system-linux
// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
// RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict\
// RUN: -v=1 2>&1 | FileCheck %s

// Pattern 1: there is no shift amount after the 'add' instruction.
//
// adr x6, 0x219fb0 <sigall_set+0x88>
// add x6, x6, x14, lsl #2
// ldr w7, [x6]
// add x6, x6, w7, sxtw => no shift amount
// br x6
//

// Pattern 2: nop/adr pair is used in place of adrp/add
//
// nop => nop/adr instead of adrp/add
// adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
// ldrh w13, [x13, w12, uxtw #1]
// adr x12, 0x247b30 <__gettextparse+0x5b0>
// add x13, x12, w13, sxth #2
// br x13

.section .text
.align 4
.globl _start
.type _start, %function
_start:
bl test1
bl test2
// mov x0, #4
// mov w8, #93
// svc #0

// Pattern 1
// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
.globl test1
.type test1, %function
test1:
mov x1, #0
adr x3, datatable
add x3, x3, x1, lsl #2
ldr w2, [x3]
add x3, x3, w2, sxtw
br x3
test1_0:
ret
test1_1:
ret
test1_2:
ret

// Pattern 2
// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
.globl test2
.type test2, %function
test2:
nop
adr x3, jump_table
ldrh w3, [x3, x1, lsl #1]
adr x1, test2_0
add x3, x1, w3, sxth #2
br x3
test2_0:
ret
test2_1:
ret

.section .rodata,"a",@progbits
datatable:
.word test1_0-datatable
.word test1_1-datatable
.word test1_2-datatable

jump_table:
.hword (test2_0-test2_0)>>2
.hword (test2_1-test2_0)>>2
6 changes: 3 additions & 3 deletions bolt/test/X86/debug-fission-single-convert.s
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
# CHECK-DWO-DWO: 00000010
# CHECK-DWO-DWO: 00000050
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000030
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050

# CHECK-ADDR-SEC: .debug_addr contents:
# CHECK-ADDR-SEC: 0x00000000: Addrs: [
Expand Down
40 changes: 21 additions & 19 deletions bolt/test/X86/dwarf4-df-dualcu.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,38 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010 [[#%.16x,ADDR:]] [[#%.16x,ADDRB:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000030 [[#%.16x,ADDR1:]] [[#%.16x,ADDR1B:]]
; BOLT-NEXT: 00000030 <End of list>
; BOLT-NEXT: 00000050 [[#%.16x,ADDR2:]] [[#%.16x,ADDR2B:]]
; BOLT-NEXT: 00000050 [[#%.16x,ADDR3:]] [[#%.16x,ADDR3B:]]
; BOLT-NEXT: 00000040 <End of list>
; BOLT-NEXT: 00000050 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000080 [[#%.16x,ADDR4:]] [[#%.16x,ADDR4B:]]
; BOLT-NEXT: 00000080 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR5:]] [[#%.16x,ADDR5B:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 00000070 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000070 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 000000b0 <End of list>
; BOLT-NEXT: 000000c0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000c0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000016] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDR2B]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDR3B]]))
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]]))
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000040)
; BOLT-NEXT: Compile
; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000023] = "helper.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x000000a0
; BOLT-NEXT: [0x[[#ADDR5]], 0x[[#ADDR5B]])
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000080)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x000000b0)

; PRE-BOLT-DWO-MAIN: version = 0x0004
; PRE-BOLT-DWO-MAIN: DW_TAG_compile_unit
Expand Down Expand Up @@ -113,13 +115,13 @@
; BOLT-DWO-MAIN-NEXT: DW_AT_decl_line
; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x1)
; BOLT-DWO-MAIN: DW_TAG_subprogram [4]
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN-NEXT: )
; BOLT-DWO-MAIN-NEXT: DW_AT_frame_base
; BOLT-DWO-MAIN-NEXT: DW_AT_linkage_name [DW_FORM_GNU_str_index] (indexed (00000003) string = "_Z3usePiS_")
; BOLT-DWO-MAIN-NEXT: DW_AT_name [DW_FORM_GNU_str_index] (indexed (00000004) string = "use")
; BOLT-DWO-MAIN: DW_TAG_subprogram [6]
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000030
; BOLT-DWO-MAIN-NEXT: )
; BOLT-DWO-MAIN-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_reg6 RBP)
; BOLT-DWO-MAIN-NEXT: DW_AT_name [DW_FORM_GNU_str_index] (indexed (00000005) string = "main")
Expand Down Expand Up @@ -160,4 +162,4 @@
; BOLT-DWO-HELPER-NEXT: DW_AT_decl_line
; BOLT-DWO-HELPER-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x1)
; BOLT-DWO-HELPER: DW_TAG_subprogram [4]
; BOLT-DWO-HELPER-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-HELPER-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
80 changes: 42 additions & 38 deletions bolt/test/X86/dwarf4-df-input-lowpc-ranges-cus.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,45 +17,47 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110 <End of list>
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150 <End of list>
; BOLT-NEXT: 00000190 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR11:]] [[#%.16x,ADDRB11:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 00000190 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 000000e0 <End of list>
; BOLT-NEXT: 00000120 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR11:]] [[#%.16x,ADDRB11:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 00000120 <End of list>
; BOLT-NEXT: 000001a0 <End of list>
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 000001b0 <End of list>
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 000001f0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000090)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
Expand All @@ -64,13 +66,14 @@
; BOLT-NEXT: [0x[[#ADDR6]], 0x[[#ADDRB6]])
; BOLT-NEXT: [0x[[#ADDR7]], 0x[[#ADDRB7]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)
; BOLT-NEXT: Compile Unit

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "mainOther.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000110)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x000001a0)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000190
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000120
; BOLT-NEXT: [0x[[#ADDR8]], 0x[[#ADDRB8]])
; BOLT-NEXT: [0x[[#ADDR9]], 0x[[#ADDRB9]])
; BOLT-NEXT: [0x[[#ADDR10]], 0x[[#ADDRB10]])
Expand All @@ -79,19 +82,20 @@
; BOLT-NEXT: [0x[[#ADDR13]], 0x[[#ADDRB13]])
; BOLT-NEXT: [0x[[#ADDR14]], 0x[[#ADDRB14]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000018)
; BOLT: {{^$}}

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
37 changes: 19 additions & 18 deletions bolt/test/X86/dwarf4-df-input-lowpc-ranges.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,30 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 000000e0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000090)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
Expand All @@ -48,9 +49,9 @@
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
11 changes: 5 additions & 6 deletions bolt/test/X86/dwarf5-df-types-debug-names.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@
; BOLT: type_signature = [[TYPE1:0x[0-9a-f]*]]
; BOLT: Compile Unit
; BOLT: type_signature = [[TYPE2:0x[0-9a-f]*]]
; BOLT: type_signature = [[TYPE3:0x[0-9a-f]*]]
; BOLT: type_signature = [[TYPE1]]
; BOLT: Compile Unit
; BOLT: [[OFFSET:0x[0-9a-f]*]]: Compile Unit
; BOLT: [[OFFSET1:0x[0-9a-f]*]]: Compile Unit

; BOLT: Name Index @ 0x0 {
; BOLT-NEXT: Header {
; BOLT-NEXT: Length: 0x17E
; BOLT-NEXT: Length: 0x176
; BOLT-NEXT: Format: DWARF32
; BOLT-NEXT: Version: 5
; BOLT-NEXT: CU count: 2
; BOLT-NEXT: Local TU count: 0
; BOLT-NEXT: Foreign TU count: 4
; BOLT-NEXT: Foreign TU count: 3
; BOLT-NEXT: Bucket count: 9
; BOLT-NEXT: Name count: 9
; BOLT-NEXT: Abbreviations table size: 0x37
Expand All @@ -44,7 +44,6 @@
; BOLT-NEXT: ForeignTU[0]: [[TYPE]]
; BOLT-NEXT: ForeignTU[1]: [[TYPE1]]
; BOLT-NEXT: ForeignTU[2]: [[TYPE2]]
; BOLT-NEXT: ForeignTU[3]: [[TYPE3]]
; BOLT-NEXT: ]
; BOLT-NEXT: Abbreviations [
; BOLT-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
Expand Down Expand Up @@ -173,7 +172,7 @@
; BOLT-NEXT: Entry @ {{.+}} {
; BOLT-NEXT: Abbrev: [[ABBREV]]
; BOLT-NEXT: Tag: DW_TAG_structure_type
; BOLT-NEXT: DW_IDX_type_unit: 0x03
; BOLT-NEXT: DW_IDX_type_unit: 0x01
; BOLT-NEXT: DW_IDX_compile_unit: 0x01
; BOLT-NEXT: DW_IDX_die_offset: 0x00000021
; BOLT-NEXT: DW_IDX_parent: <parent not indexed>
Expand Down Expand Up @@ -237,7 +236,7 @@
; BOLT-NEXT: Entry @ {{.+}} {
; BOLT-NEXT: Abbrev: 0x5
; BOLT-NEXT: Tag: DW_TAG_base_type
; BOLT-NEXT: DW_IDX_type_unit: 0x03
; BOLT-NEXT: DW_IDX_type_unit: 0x01
; BOLT-NEXT: DW_IDX_compile_unit: 0x01
; BOLT-NEXT: DW_IDX_die_offset: 0x00000048
; BOLT-NEXT: DW_IDX_parent: <parent not indexed>
Expand Down
63 changes: 63 additions & 0 deletions bolt/test/X86/hashing-based-function-matching.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
## Tests function matching in YAMLProfileReader by function hash.

# REQUIRES: system-linux
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
# RUN: --print-cfg --match-profile-with-function-hash --profile-ignore-hash=0 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: matched 1 functions with hash

#--- main.s
.globl main
.type main, @function
main:
.cfi_startproc
.LBB00:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
testq %rax, %rax
js .LBB03
.LBB01:
jne .LBB04
.LBB02:
nop
.LBB03:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.LBB04:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
## For relocations against .text
.reloc 0, R_X86_64_NONE
.cfi_endproc
.size main, .-main

#--- yaml
---
header:
profile-version: 1
binary-name: 'hashing-based-function-matching.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main2
fid: 0
hash: 0x6E7F15254DE2478
exec: 1
nblocks: 6
blocks:
- bid: 1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
...
1 change: 0 additions & 1 deletion bolt/test/X86/issue20.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# CHECK-NOT: BOLT-INFO: UCE removed {{.*}} blocks and {{.*}} bytes of code
# CHECK: Binary Function "main"
# CHECK: .LFT{{.*}} (2 instructions, align : 1)
# CHECK-NEXT: CFI State : 0
# CHECK-NEXT: 00000004: andq
# CHECK-NEXT: 00000008: jmpq
# CHECK-NEXT: Successors: .Ltmp{{.*}}, .Ltmp{{.*}}, .Ltmp{{.*}}, .Ltmp{{.*}}
Expand Down
1 change: 0 additions & 1 deletion bolt/test/X86/issue20.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
CHECK-NOT: BOLT-INFO: UCE removed {{.*}} blocks and {{.*}}| bytes of code
CHECK: Binary Function "main"
CHECK: .LFT0 (2 instructions, align : 1)
CHECK-NEXT: CFI State : 0
CHECK-NEXT: 00000004: andq
CHECK-NEXT: 00000008: jmpq
CHECK-NEXT: Successors: .Ltmp1, .Ltmp2, .Ltmp3, .Ltmp4
49 changes: 47 additions & 2 deletions bolt/test/X86/linux-alt-instruction.s
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# RUN: llvm-bolt %t.exe --print-cfg -o %t.fs4.out | FileCheck %s

# CHECK: BOLT-INFO: Linux kernel binary detected
# CHECK: BOLT-INFO: parsed 2 alternative instruction entries
# CHECK: BOLT-INFO: parsed 3 alternative instruction entries

.text
.globl _start
Expand All @@ -50,10 +50,12 @@ _start:
# CHECK: rdtsc
# CHECK-SAME: AltInst: 1
# CHECK-SAME: AltInst2: 2
# CHECK-SAME: AltInst3: 3
nop
# CHECK-NEXT: nop
# CHECK-SAME: AltInst: 1
# CHECK-SAME: AltInst2: 2
# CHECK-SAME: AltInst3: 3
nop
nop
.L1:
Expand All @@ -66,6 +68,9 @@ _start:
rdtsc
.A1:
rdtscp
.A2:
pushf
pop %rax
.Ae:

## Alternative instruction info.
Expand All @@ -92,11 +97,51 @@ _start:
.word 0x3b # feature flags
.endif
.byte .L1 - .L0 # org size
.byte .Ae - .A1 # alt size
.byte .A2 - .A1 # alt size
.ifdef PADLEN
.byte 0
.endif

.long .L0 - . # org instruction
.long .A2 - . # alt instruction
.ifdef FEATURE_SIZE_4
.long 0x110 # feature flags
.else
.word 0x110 # feature flags
.endif
.byte .L1 - .L0 # org size
.byte .Ae - .A2 # alt size
.ifdef PADLEN
.byte 0
.endif

## ORC unwind for "pushf; pop %rax" alternative sequence.
.section .orc_unwind,"a",@progbits
.align 4
.section .orc_unwind_ip,"a",@progbits
.align 4

.section .orc_unwind
.2byte 8
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long _start - .

.section .orc_unwind
.2byte 16
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long .L0 + 1 - .

.section .orc_unwind
.2byte 8
.2byte 0
.2byte 0x205
.section .orc_unwind_ip
.long .L0 + 2 - .

## Fake Linux Kernel sections.
.section __ksymtab,"a",@progbits
.section __ksymtab_gpl,"a",@progbits
63 changes: 63 additions & 0 deletions bolt/test/X86/name-similarity-function-matching.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
## Tests function matching in YAMLProfileReader by name similarity.

# REQUIRES: system-linux
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
# RUN: --print-cfg --name-similarity-function-matching-threshold=1 --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: matched 1 functions with similar names

#--- main.s
.globl main
.type main, @function
main:
.cfi_startproc
.LBB00:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
testq %rax, %rax
js .LBB03
.LBB01:
jne .LBB04
.LBB02:
nop
.LBB03:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.LBB04:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
## For relocations against .text
.reloc 0, R_X86_64_NONE
.cfi_endproc
.size main, .-main

#--- yaml
---
header:
profile-version: 1
binary-name: 'hashing-based-function-matching.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main2
fid: 0
hash: 0x0000000000000001
exec: 1
nblocks: 5
blocks:
- bid: 1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
...
1 change: 1 addition & 0 deletions bolt/test/X86/reader-stale-yaml-std.test
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# UNSUPPORTED: true
## This script checks that YamlProfileReader in llvm-bolt is reading data
## correctly and stale data is corrected by profile inference.

Expand Down
4 changes: 4 additions & 0 deletions bolt/test/X86/register-fragments-bolt-symbols.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
# PREAGGWARM: B X:0 #chain.warm# 1 0
# RUN: perf2bolt %t.warm.bolt -p %t.preagg.warm --pa -o %t.warm.fdata -w %t.warm.yaml \
# RUN: -v=1 | FileCheck %s --check-prefix=CHECK-BOLT-WARM
# RUN: FileCheck %s --input-file %t.warm.fdata --check-prefix=CHECK-FDATA-WARM
# RUN: FileCheck %s --input-file %t.warm.yaml --check-prefix=CHECK-YAML-WARM

# CHECK-BOLT-WARM: marking chain.warm/1(*2) as a fragment of chain
# CHECK-FDATA-WARM: chain
# CHECK-YAML-WARM: chain

# RUN: sed -i 's|chain|chain/2|g' %t.fdata
# RUN: llvm-objcopy --localize-symbol=chain %t.main.o
Expand Down
5 changes: 0 additions & 5 deletions bolt/test/X86/sctc-bug4.test
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,19 @@ RUN: llvm-bolt %t -o %t.null --enable-bat \
RUN: -funcs=test_func -print-sctc -sequential-disassembly 2>&1 | FileCheck %s

CHECK: .Ltmp2 (3 instructions, align : 1)
CHECK-NEXT: CFI State : 0
CHECK-NEXT: Input offset: 0x24
CHECK-NEXT: Predecessors: .LFT1
CHECK-NEXT: 00000024: cmpq $0x20, %rsi
CHECK-NEXT: 00000028: ja dummy # TAILCALL # Offset: 53 # CTCTakenCount: 0
CHECK-NEXT: 0000002a: jmp .Ltmp4
CHECK-NEXT: Successors: .Ltmp4
CHECK-NEXT: CFI State: 0

CHECK: .Ltmp1 (2 instructions, align : 1)
CHECK-NEXT: CFI State : 0
CHECK-NEXT: Input offset: 0x2c
CHECK-NEXT: Predecessors: .LFT0
CHECK-NEXT: 0000002c: xorq %r11, %rax
CHECK-NEXT: 0000002f: retq
CHECK-NEXT: CFI State: 0

CHECK: .Ltmp4 (4 instructions, align : 1)
CHECK-NEXT: CFI State : 0
CHECK-NEXT: Input offset: 0x3a
CHECK-NEXT: Predecessors: .Ltmp2
118 changes: 63 additions & 55 deletions clang-tools-extra/clang-doc/HTMLGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class HTMLTag {
operator TagType() const { return Value; }
operator bool() = delete;

bool IsSelfClosing() const;
bool isSelfClosing() const;
llvm::SmallString<16> ToString() const;

private:
Expand All @@ -71,7 +71,7 @@ struct HTMLNode {
HTMLNode(NodeType Type) : Type(Type) {}
virtual ~HTMLNode() = default;

virtual void Render(llvm::raw_ostream &OS, int IndentationLevel) = 0;
virtual void render(llvm::raw_ostream &OS, int IndentationLevel) = 0;
NodeType Type; // Type of node
};

Expand All @@ -80,7 +80,7 @@ struct TextNode : public HTMLNode {
: HTMLNode(NodeType::NODE_TEXT), Text(Text.str()) {}

std::string Text; // Content of node
void Render(llvm::raw_ostream &OS, int IndentationLevel) override;
void render(llvm::raw_ostream &OS, int IndentationLevel) override;
};

struct TagNode : public HTMLNode {
Expand All @@ -94,25 +94,25 @@ struct TagNode : public HTMLNode {
std::vector<std::pair<std::string, std::string>>
Attributes; // List of key-value attributes for tag

void Render(llvm::raw_ostream &OS, int IndentationLevel) override;
void render(llvm::raw_ostream &OS, int IndentationLevel) override;
};

constexpr const char *kDoctypeDecl = "<!DOCTYPE html>";

struct HTMLFile {
std::vector<std::unique_ptr<HTMLNode>> Children; // List of child nodes
void Render(llvm::raw_ostream &OS) {
void render(llvm::raw_ostream &OS) {
OS << kDoctypeDecl << "\n";
for (const auto &C : Children) {
C->Render(OS, 0);
C->render(OS, 0);
OS << "\n";
}
}
};

} // namespace

bool HTMLTag::IsSelfClosing() const {
bool HTMLTag::isSelfClosing() const {
switch (Value) {
case HTMLTag::TAG_META:
case HTMLTag::TAG_LINK:
Expand Down Expand Up @@ -177,12 +177,12 @@ llvm::SmallString<16> HTMLTag::ToString() const {
llvm_unreachable("Unhandled HTMLTag::TagType");
}

void TextNode::Render(llvm::raw_ostream &OS, int IndentationLevel) {
void TextNode::render(llvm::raw_ostream &OS, int IndentationLevel) {
OS.indent(IndentationLevel * 2);
printHTMLEscaped(Text, OS);
}

void TagNode::Render(llvm::raw_ostream &OS, int IndentationLevel) {
void TagNode::render(llvm::raw_ostream &OS, int IndentationLevel) {
// Children nodes are rendered in the same line if all of them are text nodes
bool InlineChildren = true;
for (const auto &C : Children)
Expand All @@ -194,7 +194,7 @@ void TagNode::Render(llvm::raw_ostream &OS, int IndentationLevel) {
OS << "<" << Tag.ToString();
for (const auto &A : Attributes)
OS << " " << A.first << "=\"" << A.second << "\"";
if (Tag.IsSelfClosing()) {
if (Tag.isSelfClosing()) {
OS << "/>";
return;
}
Expand All @@ -205,7 +205,7 @@ void TagNode::Render(llvm::raw_ostream &OS, int IndentationLevel) {
for (const auto &C : Children) {
int ChildrenIndentation =
InlineChildren || !NewLineRendered ? 0 : IndentationLevel + 1;
C->Render(OS, ChildrenIndentation);
C->render(OS, ChildrenIndentation);
if (!InlineChildren && (C == Children.back() ||
(C->Type != NodeType::NODE_TEXT ||
(&C + 1)->get()->Type != NodeType::NODE_TEXT))) {
Expand All @@ -221,7 +221,7 @@ void TagNode::Render(llvm::raw_ostream &OS, int IndentationLevel) {

template <typename Derived, typename Base,
typename = std::enable_if<std::is_base_of<Derived, Base>::value>>
static void AppendVector(std::vector<Derived> &&New,
static void appendVector(std::vector<Derived> &&New,
std::vector<Base> &Original) {
std::move(New.begin(), New.end(), std::back_inserter(Original));
}
Expand Down Expand Up @@ -289,9 +289,18 @@ genStylesheetsHTML(StringRef InfoPath, const ClangDocContext &CDCtx) {
static std::vector<std::unique_ptr<TagNode>>
genJsScriptsHTML(StringRef InfoPath, const ClangDocContext &CDCtx) {
std::vector<std::unique_ptr<TagNode>> Out;

// index_json.js is part of every generated HTML file
SmallString<128> IndexJSONPath = computeRelativePath("", InfoPath);
auto IndexJSONNode = std::make_unique<TagNode>(HTMLTag::TAG_SCRIPT);
llvm::sys::path::append(IndexJSONPath, "index_json.js");
llvm::sys::path::native(IndexJSONPath, llvm::sys::path::Style::posix);
IndexJSONNode->Attributes.emplace_back("src", std::string(IndexJSONPath));
Out.emplace_back(std::move(IndexJSONNode));

for (const auto &FilePath : CDCtx.JsScripts) {
auto ScriptNode = std::make_unique<TagNode>(HTMLTag::TAG_SCRIPT);
SmallString<128> ScriptPath = computeRelativePath("", InfoPath);
auto ScriptNode = std::make_unique<TagNode>(HTMLTag::TAG_SCRIPT);
llvm::sys::path::append(ScriptPath, llvm::sys::path::filename(FilePath));
// Paths in HTML must be in posix-style
llvm::sys::path::native(ScriptPath, llvm::sys::path::Style::posix);
Expand All @@ -313,8 +322,7 @@ genReference(const Reference &Type, StringRef CurrentDirectory,
if (Type.Path.empty()) {
if (!JumpToSection)
return std::make_unique<TextNode>(Type.Name);
else
return genLink(Type.Name, "#" + *JumpToSection);
return genLink(Type.Name, "#" + *JumpToSection);
}
llvm::SmallString<64> Path = Type.getRelativeFilePath(CurrentDirectory);
llvm::sys::path::append(Path, Type.getFileBaseName() + ".html");
Expand Down Expand Up @@ -357,7 +365,7 @@ genEnumsBlock(const std::vector<EnumInfo> &Enums,
auto &DivBody = Out.back();
for (const auto &E : Enums) {
std::vector<std::unique_ptr<TagNode>> Nodes = genHTML(E, CDCtx);
AppendVector(std::move(Nodes), DivBody->Children);
appendVector(std::move(Nodes), DivBody->Children);
}
return Out;
}
Expand Down Expand Up @@ -388,7 +396,7 @@ genFunctionsBlock(const std::vector<FunctionInfo> &Functions,
for (const auto &F : Functions) {
std::vector<std::unique_ptr<TagNode>> Nodes =
genHTML(F, CDCtx, ParentInfoDir);
AppendVector(std::move(Nodes), DivBody->Children);
appendVector(std::move(Nodes), DivBody->Children);
}
return Out;
}
Expand Down Expand Up @@ -478,10 +486,10 @@ genFileHeadNodes(StringRef Title, StringRef InfoPath,
Out.emplace_back(std::make_unique<TagNode>(HTMLTag::TAG_TITLE, Title));
std::vector<std::unique_ptr<TagNode>> StylesheetsNodes =
genStylesheetsHTML(InfoPath, CDCtx);
AppendVector(std::move(StylesheetsNodes), Out);
appendVector(std::move(StylesheetsNodes), Out);
std::vector<std::unique_ptr<TagNode>> JsNodes =
genJsScriptsHTML(InfoPath, CDCtx);
AppendVector(std::move(JsNodes), Out);
appendVector(std::move(JsNodes), Out);
return Out;
}

Expand Down Expand Up @@ -513,15 +521,15 @@ static std::unique_ptr<TagNode> genInfoFileMainNode(
MainContentNode->Attributes.emplace_back("id", "main-content");
MainContentNode->Attributes.emplace_back(
"class", "col-xs-12 col-sm-9 col-md-8 main-content");
AppendVector(std::move(MainContentInnerNodes), MainContentNode->Children);
appendVector(std::move(MainContentInnerNodes), MainContentNode->Children);

auto RightSidebarNode = std::make_unique<TagNode>(HTMLTag::TAG_DIV);
RightSidebarNode->Attributes.emplace_back("id", "sidebar-right");
RightSidebarNode->Attributes.emplace_back(
"class", "col-xs-6 col-sm-6 col-md-2 sidebar sidebar-offcanvas-right");
std::vector<std::unique_ptr<TagNode>> InfoIndexHTML =
genHTML(InfoIndex, InfoPath, true);
AppendVector(std::move(InfoIndexHTML), RightSidebarNode->Children);
appendVector(std::move(InfoIndexHTML), RightSidebarNode->Children);

MainNode->Children.emplace_back(std::move(LeftSidebarNode));
MainNode->Children.emplace_back(std::move(MainContentNode));
Expand Down Expand Up @@ -555,7 +563,7 @@ genInfoFile(StringRef Title, StringRef InfoPath,
genInfoFileMainNode(InfoPath, MainContentNodes, InfoIndex);
std::unique_ptr<TagNode> FooterNode = genFileFooterNode();

AppendVector(std::move(HeadNodes), F.Children);
appendVector(std::move(HeadNodes), F.Children);
F.Children.emplace_back(std::move(HeaderNode));
F.Children.emplace_back(std::move(MainNode));
F.Children.emplace_back(std::move(FooterNode));
Expand Down Expand Up @@ -594,7 +602,7 @@ genHTML(const Index &Index, StringRef InfoPath, bool IsOutermostList) {
for (const auto &C : Index.Children) {
auto LiBody = std::make_unique<TagNode>(HTMLTag::TAG_LI);
std::vector<std::unique_ptr<TagNode>> Nodes = genHTML(C, InfoPath, false);
AppendVector(std::move(Nodes), LiBody->Children);
appendVector(std::move(Nodes), LiBody->Children);
UlBody->Children.emplace_back(std::move(LiBody));
}
return Out;
Expand All @@ -609,7 +617,9 @@ static std::unique_ptr<HTMLNode> genHTML(const CommentInfo &I) {
FullComment->Children.emplace_back(std::move(Node));
}
return std::move(FullComment);
} else if (I.Kind == "ParagraphComment") {
}

if (I.Kind == "ParagraphComment") {
auto ParagraphComment = std::make_unique<TagNode>(HTMLTag::TAG_P);
for (const auto &Child : I.Children) {
std::unique_ptr<HTMLNode> Node = genHTML(*Child);
Expand All @@ -619,7 +629,9 @@ static std::unique_ptr<HTMLNode> genHTML(const CommentInfo &I) {
if (ParagraphComment->Children.empty())
return nullptr;
return std::move(ParagraphComment);
} else if (I.Kind == "TextComment") {
}

if (I.Kind == "TextComment") {
if (I.Text == "")
return nullptr;
return std::make_unique<TextNode>(I.Text);
Expand All @@ -639,11 +651,7 @@ static std::unique_ptr<TagNode> genHTML(const std::vector<CommentInfo> &C) {
static std::vector<std::unique_ptr<TagNode>>
genHTML(const EnumInfo &I, const ClangDocContext &CDCtx) {
std::vector<std::unique_ptr<TagNode>> Out;
std::string EnumType;
if (I.Scoped)
EnumType = "enum class ";
else
EnumType = "enum ";
std::string EnumType = I.Scoped ? "enum class " : "enum ";

Out.emplace_back(
std::make_unique<TagNode>(HTMLTag::TAG_H3, EnumType + I.Name));
Expand Down Expand Up @@ -737,17 +745,17 @@ genHTML(const NamespaceInfo &I, Index &InfoIndex, const ClangDocContext &CDCtx,

std::vector<std::unique_ptr<TagNode>> ChildNamespaces =
genReferencesBlock(I.Children.Namespaces, "Namespaces", BasePath);
AppendVector(std::move(ChildNamespaces), Out);
appendVector(std::move(ChildNamespaces), Out);
std::vector<std::unique_ptr<TagNode>> ChildRecords =
genReferencesBlock(I.Children.Records, "Records", BasePath);
AppendVector(std::move(ChildRecords), Out);
appendVector(std::move(ChildRecords), Out);

std::vector<std::unique_ptr<TagNode>> ChildFunctions =
genFunctionsBlock(I.Children.Functions, CDCtx, BasePath);
AppendVector(std::move(ChildFunctions), Out);
appendVector(std::move(ChildFunctions), Out);
std::vector<std::unique_ptr<TagNode>> ChildEnums =
genEnumsBlock(I.Children.Enums, CDCtx);
AppendVector(std::move(ChildEnums), Out);
appendVector(std::move(ChildEnums), Out);

if (!I.Children.Namespaces.empty())
InfoIndex.Children.emplace_back("Namespaces", "Namespaces");
Expand Down Expand Up @@ -791,29 +799,29 @@ genHTML(const RecordInfo &I, Index &InfoIndex, const ClangDocContext &CDCtx,
auto &PBody = Out.back();
PBody->Children.emplace_back(std::make_unique<TextNode>("Inherits from "));
if (Parents.empty())
AppendVector(std::move(VParents), PBody->Children);
appendVector(std::move(VParents), PBody->Children);
else if (VParents.empty())
AppendVector(std::move(Parents), PBody->Children);
appendVector(std::move(Parents), PBody->Children);
else {
AppendVector(std::move(Parents), PBody->Children);
appendVector(std::move(Parents), PBody->Children);
PBody->Children.emplace_back(std::make_unique<TextNode>(", "));
AppendVector(std::move(VParents), PBody->Children);
appendVector(std::move(VParents), PBody->Children);
}
}

std::vector<std::unique_ptr<TagNode>> Members =
genRecordMembersBlock(I.Members, I.Path);
AppendVector(std::move(Members), Out);
appendVector(std::move(Members), Out);
std::vector<std::unique_ptr<TagNode>> ChildRecords =
genReferencesBlock(I.Children.Records, "Records", I.Path);
AppendVector(std::move(ChildRecords), Out);
appendVector(std::move(ChildRecords), Out);

std::vector<std::unique_ptr<TagNode>> ChildFunctions =
genFunctionsBlock(I.Children.Functions, CDCtx, I.Path);
AppendVector(std::move(ChildFunctions), Out);
appendVector(std::move(ChildFunctions), Out);
std::vector<std::unique_ptr<TagNode>> ChildEnums =
genEnumsBlock(I.Children.Enums, CDCtx);
AppendVector(std::move(ChildEnums), Out);
appendVector(std::move(ChildEnums), Out);

if (!I.Members.empty())
InfoIndex.Children.emplace_back("Members", "Members");
Expand Down Expand Up @@ -936,7 +944,7 @@ llvm::Error HTMLGenerator::generateDocForInfo(Info *I, llvm::raw_ostream &OS,

HTMLFile F = genInfoFile(InfoTitle, I->getRelativeFilePath(""),
MainContentNodes, InfoIndex, CDCtx);
F.Render(OS);
F.render(OS);

return llvm::Error::success();
}
Expand All @@ -959,7 +967,7 @@ static std::string getRefType(InfoType IT) {
llvm_unreachable("Unknown InfoType");
}

static llvm::Error SerializeIndex(ClangDocContext &CDCtx) {
static llvm::Error serializeIndex(ClangDocContext &CDCtx) {
std::error_code OK;
std::error_code FileErr;
llvm::SmallString<128> FilePath;
Expand All @@ -985,9 +993,9 @@ static llvm::Error SerializeIndex(ClangDocContext &CDCtx) {
});
});
};
OS << "var JsonIndex = `\n";
OS << "async function LoadIndex() {\nreturn";
IndexToJSON(CDCtx.Idx);
OS << "`;\n";
OS << ";\n}";
return llvm::Error::success();
}

Expand All @@ -1009,7 +1017,7 @@ static std::unique_ptr<TagNode> genIndexFileMainNode() {
return MainNode;
}

static llvm::Error GenIndex(const ClangDocContext &CDCtx) {
static llvm::Error genIndex(const ClangDocContext &CDCtx) {
std::error_code FileErr, OK;
llvm::SmallString<128> IndexPath;
llvm::sys::path::native(CDCtx.OutDirectory, IndexPath);
Expand All @@ -1029,17 +1037,17 @@ static llvm::Error GenIndex(const ClangDocContext &CDCtx) {
std::unique_ptr<TagNode> MainNode = genIndexFileMainNode();
std::unique_ptr<TagNode> FooterNode = genFileFooterNode();

AppendVector(std::move(HeadNodes), F.Children);
appendVector(std::move(HeadNodes), F.Children);
F.Children.emplace_back(std::move(HeaderNode));
F.Children.emplace_back(std::move(MainNode));
F.Children.emplace_back(std::move(FooterNode));

F.Render(IndexOS);
F.render(IndexOS);

return llvm::Error::success();
}

static llvm::Error CopyFile(StringRef FilePath, StringRef OutDirectory) {
static llvm::Error copyFile(StringRef FilePath, StringRef OutDirectory) {
llvm::SmallString<128> PathWrite;
llvm::sys::path::native(OutDirectory, PathWrite);
llvm::sys::path::append(PathWrite, llvm::sys::path::filename(FilePath));
Expand All @@ -1057,20 +1065,20 @@ static llvm::Error CopyFile(StringRef FilePath, StringRef OutDirectory) {
}

llvm::Error HTMLGenerator::createResources(ClangDocContext &CDCtx) {
auto Err = SerializeIndex(CDCtx);
auto Err = serializeIndex(CDCtx);
if (Err)
return Err;
Err = GenIndex(CDCtx);
Err = genIndex(CDCtx);
if (Err)
return Err;

for (const auto &FilePath : CDCtx.UserStylesheets) {
Err = CopyFile(FilePath, CDCtx.OutDirectory);
Err = copyFile(FilePath, CDCtx.OutDirectory);
if (Err)
return Err;
}
for (const auto &FilePath : CDCtx.FilesToCopy) {
Err = CopyFile(FilePath, CDCtx.OutDirectory);
for (const auto &FilePath : CDCtx.JsScripts) {
Err = copyFile(FilePath, CDCtx.OutDirectory);
if (Err)
return Err;
}
Expand Down
6 changes: 2 additions & 4 deletions clang-tools-extra/clang-doc/Representation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,9 @@ ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
StringRef ProjectName, bool PublicOnly,
StringRef OutDirectory, StringRef SourceRoot,
StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts)
std::vector<std::string> UserStylesheets)
: ECtx(ECtx), ProjectName(ProjectName), PublicOnly(PublicOnly),
OutDirectory(OutDirectory), UserStylesheets(UserStylesheets),
JsScripts(JsScripts) {
OutDirectory(OutDirectory), UserStylesheets(UserStylesheets) {
llvm::SmallString<128> SourceRootDir(SourceRoot);
if (SourceRoot.empty())
// If no SourceRoot was provided the current path is used as the default
Expand Down
5 changes: 1 addition & 4 deletions clang-tools-extra/clang-doc/Representation.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,7 @@ struct ClangDocContext {
ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts);
std::vector<std::string> UserStylesheets);
tooling::ExecutionContext *ECtx;
std::string ProjectName; // Name of project clang-doc is documenting.
bool PublicOnly; // Indicates if only public declarations are documented.
Expand All @@ -498,8 +497,6 @@ struct ClangDocContext {
std::vector<std::string> UserStylesheets;
// JavaScript files that will be imported in allHTML file.
std::vector<std::string> JsScripts;
// Other files that should be copied to OutDirectory, besides UserStylesheets.
std::vector<std::string> FilesToCopy;
Index Idx;
};

Expand Down
8 changes: 4 additions & 4 deletions clang-tools-extra/clang-doc/assets/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ function createIndex(Index) {

// Runs after DOM loads
document.addEventListener("DOMContentLoaded", function() {
// JsonIndex is a variable from another file that contains the index
// in JSON format
var Index = JSON.parse(JsonIndex);
createIndex(Index);
// LoadIndex is an asynchronous function that will be generated clang-doc.
// It ensures that the function call will not block as soon the page loads,
// since the index object are often huge and can contain thousands of lines.
LoadIndex().then((Index) => { createIndex(Index); });
});
103 changes: 82 additions & 21 deletions clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ static llvm::cl::list<std::string> UserStylesheets(
llvm::cl::desc("CSS stylesheets to extend the default styles."),
llvm::cl::cat(ClangDocCategory));

static llvm::cl::opt<std::string> UserAssetPath(
"asset",
llvm::cl::desc("User supplied asset path to "
"override the default css and js files for html output"),
llvm::cl::cat(ClangDocCategory));

static llvm::cl::opt<std::string> SourceRoot("source-root", llvm::cl::desc(R"(
Directory where processed files are stored.
Links to definition locations will only be
Expand Down Expand Up @@ -127,16 +133,84 @@ std::string getFormatString() {
// GetMainExecutable (since some platforms don't support taking the
// address of main, and some platforms can't implement GetMainExecutable
// without being given the address of a function in the main executable).
std::string GetExecutablePath(const char *Argv0, void *MainAddr) {
std::string getExecutablePath(const char *Argv0, void *MainAddr) {
return llvm::sys::fs::getMainExecutable(Argv0, MainAddr);
}

llvm::Error getAssetFiles(clang::doc::ClangDocContext &CDCtx) {
using DirIt = llvm::sys::fs::directory_iterator;
std::error_code FileErr;
llvm::SmallString<128> FilePath(UserAssetPath);
for (DirIt DirStart = DirIt(UserAssetPath, FileErr),
DirEnd;
!FileErr && DirStart != DirEnd; DirStart.increment(FileErr)) {
FilePath = DirStart->path();
if (llvm::sys::fs::is_regular_file(FilePath)) {
if (llvm::sys::path::extension(FilePath) == ".css")
CDCtx.UserStylesheets.insert(CDCtx.UserStylesheets.begin(),
std::string(FilePath));
else if (llvm::sys::path::extension(FilePath) == ".js")
CDCtx.JsScripts.emplace_back(FilePath.str());
}
}
if (FileErr)
return llvm::createFileError(FilePath, FileErr);
return llvm::Error::success();
}

llvm::Error getDefaultAssetFiles(const char *Argv0,
clang::doc::ClangDocContext &CDCtx) {
void *MainAddr = (void *)(intptr_t)getExecutablePath;
std::string ClangDocPath = getExecutablePath(Argv0, MainAddr);
llvm::SmallString<128> NativeClangDocPath;
llvm::sys::path::native(ClangDocPath, NativeClangDocPath);

llvm::SmallString<128> AssetsPath;
AssetsPath = llvm::sys::path::parent_path(NativeClangDocPath);
llvm::sys::path::append(AssetsPath, "..", "share", "clang-doc");
llvm::SmallString<128> DefaultStylesheet;
llvm::sys::path::native(AssetsPath, DefaultStylesheet);
llvm::sys::path::append(DefaultStylesheet,
"clang-doc-default-stylesheet.css");
llvm::SmallString<128> IndexJS;
llvm::sys::path::native(AssetsPath, IndexJS);
llvm::sys::path::append(IndexJS, "index.js");

if (!llvm::sys::fs::is_regular_file(IndexJS))
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"default index.js file missing at " +
IndexJS + "\n");

if (!llvm::sys::fs::is_regular_file(DefaultStylesheet))
return llvm::createStringError(
llvm::inconvertibleErrorCode(),
"default clang-doc-default-stylesheet.css file missing at " +
DefaultStylesheet + "\n");

CDCtx.UserStylesheets.insert(CDCtx.UserStylesheets.begin(),
std::string(DefaultStylesheet));
CDCtx.JsScripts.emplace_back(IndexJS.str());

return llvm::Error::success();
}

llvm::Error getHtmlAssetFiles(const char *Argv0,
clang::doc::ClangDocContext &CDCtx) {
if (!UserAssetPath.empty() &&
!llvm::sys::fs::is_directory(std::string(UserAssetPath)))
llvm::outs() << "Asset path supply is not a directory: " << UserAssetPath
<< " falling back to default\n";
if (llvm::sys::fs::is_directory(std::string(UserAssetPath)))
return getAssetFiles(CDCtx);
return getDefaultAssetFiles(Argv0, CDCtx);
}

int main(int argc, const char **argv) {
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
std::error_code OK;

const char *Overview =
R"(Generates documentation from source code and comments.
R"(Generates documentation from source code and comments.
Example usage for files without flags (default):
Expand Down Expand Up @@ -178,27 +252,14 @@ Example usage for a project using a compile commands database:
OutDirectory,
SourceRoot,
RepositoryUrl,
{UserStylesheets.begin(), UserStylesheets.end()},
{"index.js", "index_json.js"}};
{UserStylesheets.begin(), UserStylesheets.end()}
};

if (Format == "html") {
void *MainAddr = (void *)(intptr_t)GetExecutablePath;
std::string ClangDocPath = GetExecutablePath(argv[0], MainAddr);
llvm::SmallString<128> NativeClangDocPath;
llvm::sys::path::native(ClangDocPath, NativeClangDocPath);
llvm::SmallString<128> AssetsPath;
AssetsPath = llvm::sys::path::parent_path(NativeClangDocPath);
llvm::sys::path::append(AssetsPath, "..", "share", "clang-doc");
llvm::SmallString<128> DefaultStylesheet;
llvm::sys::path::native(AssetsPath, DefaultStylesheet);
llvm::sys::path::append(DefaultStylesheet,
"clang-doc-default-stylesheet.css");
llvm::SmallString<128> IndexJS;
llvm::sys::path::native(AssetsPath, IndexJS);
llvm::sys::path::append(IndexJS, "index.js");
CDCtx.UserStylesheets.insert(CDCtx.UserStylesheets.begin(),
std::string(DefaultStylesheet));
CDCtx.FilesToCopy.emplace_back(IndexJS.str());
if (auto Err = getHtmlAssetFiles(argv[0], CDCtx)) {
llvm::errs() << toString(std::move(Err)) << "\n";
return 1;
}
}

// Mapping phase
Expand Down
Loading