33 changes: 33 additions & 0 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,36 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
static const bool flow = true;
};

namespace bolt {
struct PseudoProbeInfo {
llvm::yaml::Hex64 GUID;
uint64_t Index;
uint8_t Type;

bool operator==(const PseudoProbeInfo &Other) const {
return GUID == Other.GUID && Index == Other.Index;
}
bool operator!=(const PseudoProbeInfo &Other) const {
return !(*this == Other);
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
}

static const bool flow = true;
};
} // end namespace yaml
} // end namespace llvm

LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)

namespace llvm {
namespace yaml {
Expand All @@ -111,6 +136,7 @@ struct BinaryBasicBlockProfile {
uint64_t EventCount{0};
std::vector<CallSiteInfo> CallSites;
std::vector<SuccessorInfo> Successors;
std::vector<PseudoProbeInfo> PseudoProbes;

bool operator==(const BinaryBasicBlockProfile &Other) const {
return Index == Other.Index;
Expand All @@ -132,6 +158,8 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::CallSiteInfo>());
YamlIO.mapOptional("succ", BBP.Successors,
std::vector<bolt::SuccessorInfo>());
YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
}
};

Expand All @@ -151,6 +179,8 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
llvm::yaml::Hex64 GUID{0};
llvm::yaml::Hex64 PseudoProbeDescHash{0};
bool Used{false};
};
} // end namespace bolt
Expand All @@ -164,6 +194,9 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
(uint64_t)0);
}
};

Expand Down
56 changes: 56 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,59 @@ class YAMLProfileReader : public ProfileReaderBase {
using ProfileLookupMap =
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;

/// A class for matching binary functions in functions in the YAML profile.
/// First, a call graph is constructed for both profiled and binary functions.
/// Then functions are hashed based on the names of their callee/caller
/// functions. Finally, functions are matched based on these neighbor hashes.
class CallGraphMatcher {
public:
/// Constructs the call graphs for binary and profiled functions and
/// computes neighbor hashes for binary functions.
CallGraphMatcher(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF);

/// Returns the YamlBFs adjacent to the parameter YamlBF in the call graph.
std::optional<std::set<yaml::bolt::BinaryFunctionProfile *>>
getAdjacentYamlBFs(yaml::bolt::BinaryFunctionProfile &YamlBF) {
auto It = YamlBFAdjacencyMap.find(&YamlBF);
return It == YamlBFAdjacencyMap.end() ? std::nullopt
: std::make_optional(It->second);
}

/// Returns the binary functions with the parameter neighbor hash.
std::optional<std::vector<BinaryFunction *>>
getBFsWithNeighborHash(uint64_t NeighborHash) {
auto It = NeighborHashToBFs.find(NeighborHash);
return It == NeighborHashToBFs.end() ? std::nullopt
: std::make_optional(It->second);
}

private:
/// Adds edges to the binary function call graph given the callsites of the
/// parameter function.
void constructBFCG(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP);

/// Using the constructed binary function call graph, computes and creates
/// mappings from "neighbor hash" (composed of the function names of callee
/// and caller functions of a function) to binary functions.
void computeBFNeighborHashes(BinaryContext &BC);

/// Constructs the call graph for profile functions.
void constructYAMLFCG(yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF);

/// Adjacency map for binary functions in the call graph.
DenseMap<BinaryFunction *, std::set<BinaryFunction *>> BFAdjacencyMap;

/// Maps neighbor hashes to binary functions.
DenseMap<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs;

/// Adjacency map for profile functions in the call graph.
DenseMap<yaml::bolt::BinaryFunctionProfile *,
std::set<yaml::bolt::BinaryFunctionProfile *>>
YamlBFAdjacencyMap;
};

private:
/// Adjustments for basic samples profiles (without LBR).
bool NormalizeByInsnCount{false};
Expand Down Expand Up @@ -100,6 +153,9 @@ class YAMLProfileReader : public ProfileReaderBase {
/// Matches functions using exact hash.
size_t matchWithHash(BinaryContext &BC);

/// Matches functions using the call graph.
size_t matchWithCallGraph(BinaryContext &BC);

/// Matches functions with similarly named profiled functions.
size_t matchWithNameSimilarity(BinaryContext &BC);

Expand Down
9 changes: 0 additions & 9 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ class DWARFRewriter {

std::mutex LocListDebugInfoPatchesMutex;

/// Dwo id specific its RangesBase.
std::unordered_map<uint64_t, uint64_t> DwoRangesBase;

std::unordered_map<DWARFUnit *, uint64_t> LineTablePatchMap;
std::unordered_map<const DWARFUnit *, uint64_t> TypeUnitRelocMap;

Expand Down Expand Up @@ -191,12 +188,6 @@ class DWARFRewriter {
/// Update stmt_list for CUs based on the new .debug_line \p Layout.
void updateLineTableOffsets(const MCAssembler &Asm);

uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; }

void setDwoRangesBase(uint64_t DWOId, uint64_t RangesBase) {
DwoRangesBase[DWOId] = RangesBase;
}

using OverriddenSectionsMap = std::unordered_map<DWARFSectionKind, StringRef>;
/// Output .dwo files.
void writeDWOFiles(DWARFUnit &, const OverriddenSectionsMap &,
Expand Down
36 changes: 23 additions & 13 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ PrintMemData("print-mem-data",

cl::opt<std::string> CompDirOverride(
"comp-dir-override",
cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
"location, which is used with DW_AT_dwo_name to construct a path "
"to *.dwo files."),
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
Expand Down Expand Up @@ -646,7 +646,7 @@ bool BinaryContext::analyzeJumpTable(const uint64_t Address,
const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Value);
const bool DoesBelongToFunction =
BF.containsAddress(Value) ||
(TargetBF && TargetBF->isParentOrChildOf(BF));
(TargetBF && areRelatedFragments(TargetBF, &BF));
if (!DoesBelongToFunction) {
LLVM_DEBUG({
if (!BF.containsAddress(Value)) {
Expand Down Expand Up @@ -839,9 +839,11 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
assert(Address == JT->getAddress() && "unexpected non-empty jump table");

// Prevent associating a jump table to a specific fragment twice.
// This simple check arises from the assumption: no more than 2 fragments.
if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
assert(JT->Parents[0]->isParentOrChildOf(Function) &&
if (!llvm::is_contained(JT->Parents, &Function)) {
assert(llvm::all_of(JT->Parents,
[&](const BinaryFunction *BF) {
return areRelatedFragments(&Function, BF);
}) &&
"cannot re-use jump table of a different function");
// Duplicate the entry for the parent function for easy access
JT->Parents.push_back(&Function);
Expand All @@ -852,8 +854,8 @@ BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
JT->print(this->outs());
}
Function.JumpTables.emplace(Address, JT);
JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
for (BinaryFunction *Parent : JT->Parents)
Parent->setHasIndirectTargetToSplitFragment(true);
}

bool IsJumpTableParent = false;
Expand Down Expand Up @@ -1209,12 +1211,13 @@ void BinaryContext::generateSymbolHashes() {
}

bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
BinaryFunction &Function) const {
BinaryFunction &Function) {
assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
if (TargetFunction.isChildOf(Function))
return true;
TargetFunction.addParentFragment(Function);
Function.addFragment(TargetFunction);
FragmentClasses.unionSets(&TargetFunction, &Function);
if (!HasRelocations) {
TargetFunction.setSimple(false);
Function.setSimple(false);
Expand Down Expand Up @@ -1336,7 +1339,7 @@ void BinaryContext::processInterproceduralReferences() {

if (TargetFunction) {
if (TargetFunction->isFragment() &&
!TargetFunction->isChildOf(Function)) {
!areRelatedFragments(TargetFunction, &Function)) {
this->errs()
<< "BOLT-WARNING: interprocedural reference between unrelated "
"fragments: "
Expand Down Expand Up @@ -2367,10 +2370,7 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(VecOS);
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
*TheTriple, *LocalCtx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI,
/*RelaxAll=*/false,
/*IncrementalLinkerCompatible=*/false,
/*DWARFMustBeAtTheEnd=*/false));
std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), *STI));

Streamer->initSections(false, *STI);

Expand Down Expand Up @@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
return nullptr;
}

/// Deregister JumpTable registered at a given \p Address and delete it.
void BinaryContext::deleteJumpTable(uint64_t Address) {
assert(JumpTables.count(Address) && "Must have a jump table at address");
JumpTable *JT = JumpTables.at(Address);
for (BinaryFunction *Parent : JT->Parents)
Parent->JumpTables.erase(Address);
JumpTables.erase(Address);
delete JT;
}

DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
const DWARFAddressRangesVector &InputRanges) const {
DebugAddressRangesVector OutputRanges;
Expand Down
47 changes: 45 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
// setting the value of the register used by the branch.
MCInst *MemLocInstr;

// The instruction loading the fixed PIC jump table entry value.
MCInst *FixedEntryLoadInstr;

// Address of the table referenced by MemLocInstr. Could be either an
// array of function pointers, or a jump table.
uint64_t ArrayStart = 0;
Expand Down Expand Up @@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,

IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr);

if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
return BranchType;
Expand Down Expand Up @@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
if (BaseRegNum == BC.MRI->getProgramCounter())
ArrayStart += getAddress() + Offset + Size;

if (FixedEntryLoadInstr) {
assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH &&
"Invalid IndirectBranch type");
MCInst::iterator FixedEntryDispOperand =
BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr);
assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() &&
"Invalid memory instruction");
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
ErrorOr<int64_t> Value =
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
if (!Value)
return IndirectBranchType::UNKNOWN;

BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " referencing data at 0x" << Twine::utohexstr(EntryAddress)
<< " the destination value is 0x"
<< Twine::utohexstr(ArrayStart + *Value) << '\n';

TargetAddress = ArrayStart + *Value;

// Remove spurious JumpTable at EntryAddress caused by PIC reference from
// the load instruction.
BC.deleteJumpTable(EntryAddress);

// Replace FixedEntryDispExpr used in target address calculation with outer
// jump table reference.
JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart);
assert(JT && "Must have a containing jump table for PIC fixed branch");
BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(),
EntryAddress - ArrayStart, &*BC.Ctx);

return BranchType;
}

LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');

Expand Down Expand Up @@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
}
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
Expand Down Expand Up @@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches(
int64_t DispValue;
const MCExpr *DispExpr;
MCInst *PCRelBaseInstr;
MCInst *FixedEntryLoadInstr;
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr,
FixedEntryLoadInstr);
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
continue;

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Core/Exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
"BOLT-ERROR: cannot find landing pad fragment");
BC.addInterproceduralReference(this, Fragment->getAddress());
BC.processInterproceduralReferences();
assert(isParentOrChildOf(*Fragment) &&
assert(BC.areRelatedFragments(this, Fragment) &&
"BOLT-ERROR: cannot have landing pads in different functions");
setHasIndirectTargetToSplitFragment(true);
BC.addFragmentsToSkip(this);
Expand Down
7 changes: 2 additions & 5 deletions bolt/lib/Passes/AsmDump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,8 @@ void dumpFunction(const BinaryFunction &BF) {
auto FOut = std::make_unique<formatted_raw_ostream>(OS);
FOut->SetUnbuffered();
std::unique_ptr<MCStreamer> AsmStreamer(
createAsmStreamer(*LocalCtx, std::move(FOut),
/*isVerboseAsm=*/true,
/*useDwarfDirectory=*/false, InstructionPrinter,
std::move(MCEInstance.MCE), std::move(MAB),
/*ShowInst=*/false));
createAsmStreamer(*LocalCtx, std::move(FOut), InstructionPrinter,
std::move(MCEInstance.MCE), std::move(MAB)));
AsmStreamer->initSections(true, *BC.STI);
std::unique_ptr<TargetMachine> TM(BC.TheTarget->createTargetMachine(
BC.TripleName, "", "", TargetOptions(), std::nullopt));
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
JumpTableInfoType HotTargets;
MCInst *MemLocInstr;
MCInst *PCRelBaseOut;
MCInst *FixedEntryLoadInstr;
unsigned BaseReg, IndexReg;
int64_t DispValue;
const MCExpr *DispExpr;
MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut,
FixedEntryLoadInstr);

assert(MemLocInstr && "There should always be a load for jump tables");
if (!MemLocInstr)
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ add_llvm_library(LLVMBOLTProfile

LINK_COMPONENTS
Demangle
MC
Support
TransformUtils
)
Expand Down
31 changes: 31 additions & 0 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));

extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
extern cl::opt<bool> ProfileUsePseudoProbes;
extern cl::opt<std::string> SaveProfile;

cl::opt<bool> ReadPreAggregated(
Expand Down Expand Up @@ -2298,6 +2299,9 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,

yaml::bolt::BinaryProfile BP;

const MCPseudoProbeDecoder *PseudoProbeDecoder =
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;

// Fill out the header info.
BP.Header.Version = 1;
BP.Header.FileName = std::string(BC.getFilename());
Expand Down Expand Up @@ -2398,6 +2402,33 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF->getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
const auto &FragmentProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
ProbeMap.lower_bound(FuncAddr + F->getSize()));
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
}
}
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
Expand Down
162 changes: 155 additions & 7 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,87 @@ llvm::cl::opt<bool>
MatchProfileWithFunctionHash("match-profile-with-function-hash",
cl::desc("Match profile with function hash"),
cl::Hidden, cl::cat(BoltOptCategory));
llvm::cl::opt<bool>
MatchWithCallGraph("match-with-call-graph",
cl::desc("Match functions with call graph"), cl::Hidden,
cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUsePseudoProbes(
"profile-use-pseudo-probes",
cl::desc("Use pseudo probes for profile generation and matching"),
cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
namespace bolt {

YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
ProfileLookupMap &IdToYAMLBF) {
constructBFCG(BC, YamlBP);
constructYAMLFCG(YamlBP, IdToYAMLBF);
computeBFNeighborHashes(BC);
}

void YAMLProfileReader::CallGraphMatcher::constructBFCG(
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
for (const BinaryBasicBlock &BB : BF->blocks()) {
for (const MCInst &Instr : BB) {
if (!BC.MIB->isCall(Instr))
continue;
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
if (!BD)
continue;
BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
if (!CalleeBF)
continue;

BFAdjacencyMap[CalleeBF].insert(BF);
BFAdjacencyMap[BF].insert(CalleeBF);
}
}
}
}

void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
BinaryContext &BC) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
auto It = BFAdjacencyMap.find(BF);
if (It == BFAdjacencyMap.end())
continue;
auto &AdjacentBFs = It->second;
std::string HashStr;
for (BinaryFunction *BF : AdjacentBFs)
HashStr += BF->getOneName();
uint64_t Hash = std::hash<std::string>{}(HashStr);
NeighborHashToBFs[Hash].push_back(BF);
}
}

void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {

for (auto &CallerYamlBF : YamlBP.Functions) {
for (auto &YamlBB : CallerYamlBF.Blocks) {
for (auto &CallSite : YamlBB.CallSites) {
auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
if (IdToYAMLBFIt == IdToYAMLBF.end())
continue;
YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
}
}
}
}

bool YAMLProfileReader::isYAML(const StringRef Filename) {
if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
StringRef Buffer = (*MB)->getBuffer();
Expand Down Expand Up @@ -350,7 +422,7 @@ bool YAMLProfileReader::profileMatches(
}

bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
if (opts::MatchProfileWithFunctionHash)
if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
return true;
for (StringRef Name : BF.getNames())
if (ProfileFunctionNames.contains(Name))
Expand Down Expand Up @@ -446,6 +518,79 @@ size_t YAMLProfileReader::matchWithLTOCommonName() {
return MatchedWithLTOCommonName;
}

size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
if (!opts::MatchWithCallGraph)
return 0;

size_t MatchedWithCallGraph = 0;
CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);

ItaniumPartialDemangler Demangler;
auto GetBaseName = [&](std::string &FunctionName) {
if (Demangler.partialDemangle(FunctionName.c_str()))
return std::string("");
size_t BufferSize = 1;
char *Buffer = static_cast<char *>(std::malloc(BufferSize));
char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
if (!BaseName) {
std::free(Buffer);
return std::string("");
}
if (Buffer != BaseName)
Buffer = BaseName;
std::string BaseNameStr(Buffer, BufferSize);
std::free(Buffer);
return BaseNameStr;
};

// Matches YAMLBF to BFs with neighbor hashes.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
if (!AdjacentYamlBFsOpt)
continue;
std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
AdjacentYamlBFsOpt.value();
std::string AdjacentYamlBFsHashStr;
for (auto *AdjacentYamlBF : AdjacentYamlBFs)
AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
if (!BFsWithSameHashOpt)
continue;
std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
// Finds the binary function with the longest common prefix to the profiled
// function and matches.
BinaryFunction *ClosestBF = nullptr;
size_t LCP = 0;
std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
for (BinaryFunction *BF : BFsWithSameHash) {
if (ProfiledFunctions.count(BF))
continue;
std::string BFName = std::string(BF->getOneName());
std::string BFBaseName = GetBaseName(BFName);
size_t PrefixLength = 0;
size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
for (size_t I = 0; I < N; ++I) {
if (YamlBFBaseName[I] != BFBaseName[I])
break;
++PrefixLength;
}
if (PrefixLength >= LCP) {
LCP = PrefixLength;
ClosestBF = BF;
}
}
if (ClosestBF) {
matchProfileToFunction(YamlBF, *ClosestBF);
++MatchedWithCallGraph;
}
}

return MatchedWithCallGraph;
}

size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
if (opts::NameSimilarityFunctionMatchingThreshold == 0)
return 0;
Expand Down Expand Up @@ -581,9 +726,14 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}

// Map profiled function ids to names.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
IdToYamLBF[YamlBF.Id] = &YamlBF;

const size_t MatchedWithExactName = matchWithExactName();
const size_t MatchedWithHash = matchWithHash(BC);
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);

for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
Expand All @@ -603,18 +753,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
<< " functions with call graph\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}

// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");

// Map profiled function ids to names.
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
IdToYamLBF[YamlBF.Id] = &YamlBF;

uint64_t NumUnused = 0;
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Id >= YamlProfileToFunction.size()) {
Expand All @@ -630,7 +777,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {

BC.setNumUnusedProfiledObjects(NumUnused);

if (opts::Lite && opts::MatchProfileWithFunctionHash) {
if (opts::Lite &&
(opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
if (!BF->hasProfile())
BF->setIgnored();
Expand Down
25 changes: 25 additions & 0 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

namespace opts {
extern llvm::cl::opt<bool> ProfileUseDFS;
extern llvm::cl::opt<bool> ProfileUsePseudoProbes;
} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -57,6 +58,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();
const MCPseudoProbeDecoder *PseudoProbeDecoder =
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;

const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;

Expand All @@ -69,6 +72,13 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF.getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
}
}

BinaryFunction::BasicBlockOrderType Order;
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
Expand Down Expand Up @@ -177,6 +187,21 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
++BranchInfo;
}

if (PseudoProbeDecoder) {
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
const auto &BlockProbes =
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
for (const auto &[_, Probes] : BlockProbes)
for (const MCDecodedPseudoProbe &Probe : Probes)
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
}

YamlBF.Blocks.emplace_back(YamlBB);
}
return YamlBF;
Expand Down
7 changes: 6 additions & 1 deletion bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
cl::ReallyHidden,
cl::cat(BoltOptCategory));

static cl::opt<bool> ShortenInstructions("shorten-instructions",
cl::desc("shorten instructions"),
cl::init(true),
cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -378,7 +382,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
else if (opts::Hugify)
Manager.registerPass(std::make_unique<HugePage>(NeverPrint));

Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint));
Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint),
opts::ShortenInstructions);

Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint),
!opts::KeepNops);
Expand Down
175 changes: 80 additions & 95 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,6 @@ static cl::opt<bool> KeepARanges(
"keep or generate .debug_aranges section if .gdb_index is written"),
cl::Hidden, cl::cat(BoltCategory));

static cl::opt<bool> DeterministicDebugInfo(
"deterministic-debuginfo",
cl::desc("disables parallel execution of tasks that may produce "
"nondeterministic debug info"),
cl::init(true), cl::cat(BoltCategory));

static cl::opt<std::string> DwarfOutputPath(
"dwarf-output-path",
cl::desc("Path to where .dwo files or dwp file will be written out to."),
Expand Down Expand Up @@ -607,11 +601,6 @@ void DWARFRewriter::updateDebugInfo() {
StrWriter = std::make_unique<DebugStrWriter>(*BC.DwCtx, false);
StrOffstsWriter = std::make_unique<DebugStrOffsetsWriter>(BC);

if (!opts::DeterministicDebugInfo) {
opts::DeterministicDebugInfo = true;
errs() << "BOLT-WARNING: --deterministic-debuginfo is being deprecated\n";
}

/// Stores and serializes information that will be put into the
/// .debug_addr DWARF section.
std::unique_ptr<DebugAddrWriter> FinalAddrWriter;
Expand All @@ -631,8 +620,8 @@ void DWARFRewriter::updateDebugInfo() {
uint32_t CUIndex = 0;
std::mutex AccessMutex;
// Needs to be invoked in the same order as CUs are processed.
auto createRangeLocListAddressWriters =
[&](DWARFUnit &CU) -> DebugLocWriter * {
llvm::DenseMap<uint64_t, uint64_t> LocListWritersIndexByCU;
auto createRangeLocListAddressWriters = [&](DWARFUnit &CU) {
std::lock_guard<std::mutex> Lock(AccessMutex);
const uint16_t DwarfVersion = CU.getVersion();
if (DwarfVersion >= 5) {
Expand All @@ -652,7 +641,6 @@ void DWARFRewriter::updateDebugInfo() {
RangeListsWritersByCU[*DWOId] = std::move(DWORangeListsSectionWriter);
}
AddressWritersByCU[CU.getOffset()] = std::move(AddrW);

} else {
auto AddrW =
std::make_unique<DebugAddrWriter>(&BC, CU.getAddressByteSize());
Expand All @@ -668,7 +656,7 @@ void DWARFRewriter::updateDebugInfo() {
std::move(LegacyRangesSectionWriterByCU);
}
}
return LocListWritersByCU[CUIndex++].get();
LocListWritersIndexByCU[CU.getOffset()] = CUIndex++;
};

DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC,
Expand All @@ -677,75 +665,62 @@ void DWARFRewriter::updateDebugInfo() {
DWPState State;
if (opts::WriteDWP)
initDWPState(State);
auto processUnitDIE = [&](DWARFUnit *Unit, DIEBuilder *DIEBlder) {
// Check if the unit is a skeleton and we need special updates for it and
// its matching split/DWO CU.
auto processSplitCU = [&](DWARFUnit &Unit, DWARFUnit &SplitCU,
DIEBuilder &DIEBlder,
DebugRangesSectionWriter &TempRangesSectionWriter,
DebugAddrWriter &AddressWriter,
const std::string &DWOName,
const std::optional<std::string> &DwarfOutputPath) {
DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable,
&Unit);
DWODIEBuilder.buildDWOUnit(SplitCU);
DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
DebugStrWriter DWOStrWriter((SplitCU).getContext(), true);
DWODIEBuilder.updateDWONameCompDirForTypes(
DWOStrOffstsWriter, DWOStrWriter, SplitCU, DwarfOutputPath, DWOName);
DebugLoclistWriter DebugLocDWoWriter(Unit, Unit.getVersion(), true,
AddressWriter);

updateUnitDebugInfo(SplitCU, DWODIEBuilder, DebugLocDWoWriter,
TempRangesSectionWriter, AddressWriter);
DebugLocDWoWriter.finalize(DWODIEBuilder,
*DWODIEBuilder.getUnitDIEbyUnit(SplitCU));
if (Unit.getVersion() >= 5)
TempRangesSectionWriter.finalizeSection();

emitDWOBuilder(DWOName, DWODIEBuilder, *this, SplitCU, Unit, State,
DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
GDBIndexSection);
};
auto processMainBinaryCU = [&](DWARFUnit &Unit, DIEBuilder &DIEBlder) {
std::optional<DWARFUnit *> SplitCU;
std::optional<uint64_t> RangesBase;
std::optional<uint64_t> DWOId = Unit->getDWOId();
std::optional<uint64_t> DWOId = Unit.getDWOId();
if (DWOId)
SplitCU = BC.getDWOCU(*DWOId);
DebugLocWriter *DebugLocWriter = createRangeLocListAddressWriters(*Unit);
DebugRangesSectionWriter *RangesSectionWriter =
Unit->getVersion() >= 5 ? RangeListsSectionWriter.get()
: LegacyRangesSectionWriter.get();
DebugAddrWriter *AddressWriter =
AddressWritersByCU[Unit->getOffset()].get();
// Skipping CUs that failed to load.
if (SplitCU) {
DIEBuilder DWODIEBuilder(BC, &(*SplitCU)->getContext(), DebugNamesTable,
Unit);
DWODIEBuilder.buildDWOUnit(**SplitCU);
std::string DWOName = "";
std::optional<std::string> DwarfOutputPath =
opts::DwarfOutputPath.empty()
? std::nullopt
: std::optional<std::string>(opts::DwarfOutputPath.c_str());
{
std::lock_guard<std::mutex> Lock(AccessMutex);
DWOName = DIEBlder->updateDWONameCompDir(
*StrOffstsWriter, *StrWriter, *Unit, DwarfOutputPath, std::nullopt);
}
DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
DebugStrWriter DWOStrWriter((*SplitCU)->getContext(), true);
DWODIEBuilder.updateDWONameCompDirForTypes(DWOStrOffstsWriter,
DWOStrWriter, **SplitCU,
DwarfOutputPath, DWOName);
DebugLoclistWriter DebugLocDWoWriter(*Unit, Unit->getVersion(), true,
*AddressWriter);
DebugRangesSectionWriter *TempRangesSectionWriter = RangesSectionWriter;
if (Unit->getVersion() >= 5) {
TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
} else {
TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get();
RangesBase = RangesSectionWriter->getSectionOffset();
setDwoRangesBase(*DWOId, *RangesBase);
}

updateUnitDebugInfo(*(*SplitCU), DWODIEBuilder, DebugLocDWoWriter,
*TempRangesSectionWriter, *AddressWriter);
DebugLocDWoWriter.finalize(DWODIEBuilder,
*DWODIEBuilder.getUnitDIEbyUnit(**SplitCU));
if (Unit->getVersion() >= 5)
TempRangesSectionWriter->finalizeSection();

emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State,
DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
GDBIndexSection);
}

if (Unit->getVersion() >= 5) {
RangesBase = RangesSectionWriter->getSectionOffset() +
DebugLocWriter &DebugLocWriter =
*LocListWritersByCU[LocListWritersIndexByCU[Unit.getOffset()]].get();
DebugRangesSectionWriter &RangesSectionWriter =
Unit.getVersion() >= 5 ? *RangeListsSectionWriter.get()
: *LegacyRangesSectionWriter.get();
DebugAddrWriter &AddressWriter =
*AddressWritersByCU[Unit.getOffset()].get();
if (Unit.getVersion() >= 5)
RangeListsSectionWriter->setAddressWriter(&AddressWriter);
if (Unit.getVersion() >= 5) {
RangesBase = RangesSectionWriter.getSectionOffset() +
getDWARF5RngListLocListHeaderSize();
RangesSectionWriter->initSection(*Unit);
StrOffstsWriter->finalizeSection(*Unit, *DIEBlder);
RangesSectionWriter.initSection(Unit);
StrOffstsWriter->finalizeSection(Unit, DIEBlder);
} else if (SplitCU) {
RangesBase = LegacyRangesSectionWriter.get()->getSectionOffset();
}

updateUnitDebugInfo(*Unit, *DIEBlder, *DebugLocWriter, *RangesSectionWriter,
*AddressWriter, RangesBase);
DebugLocWriter->finalize(*DIEBlder, *DIEBlder->getUnitDIEbyUnit(*Unit));
if (Unit->getVersion() >= 5)
RangesSectionWriter->finalizeSection();
updateUnitDebugInfo(Unit, DIEBlder, DebugLocWriter, RangesSectionWriter,
AddressWriter, RangesBase);
DebugLocWriter.finalize(DIEBlder, *DIEBlder.getUnitDIEbyUnit(Unit));
if (Unit.getVersion() >= 5)
RangesSectionWriter.finalizeSection();
};

DIEBuilder DIEBlder(BC, BC.DwCtx.get(), DebugNamesTable);
Expand All @@ -760,25 +735,35 @@ void DWARFRewriter::updateDebugInfo() {
CUOffsetMap OffsetMap =
finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection);

const bool SingleThreadedMode =
opts::NoThreads || opts::DeterministicDebugInfo;
if (!SingleThreadedMode)
DIEBlder.buildCompileUnits();
if (SingleThreadedMode) {
CUPartitionVector PartVec = partitionCUs(*BC.DwCtx);
for (std::vector<DWARFUnit *> &Vec : PartVec) {
DIEBlder.buildCompileUnits(Vec);
for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
processUnitDIE(CU, &DIEBlder);
finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap,
DIEBlder.getProcessedCUs(), *FinalAddrWriter);
CUPartitionVector PartVec = partitionCUs(*BC.DwCtx);
for (std::vector<DWARFUnit *> &Vec : PartVec) {
DIEBlder.buildCompileUnits(Vec);
for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) {
createRangeLocListAddressWriters(*CU);
std::optional<DWARFUnit *> SplitCU;
std::optional<uint64_t> DWOId = CU->getDWOId();
if (DWOId)
SplitCU = BC.getDWOCU(*DWOId);
if (!SplitCU)
continue;
DebugAddrWriter &AddressWriter =
*AddressWritersByCU[CU->getOffset()].get();
DebugRangesSectionWriter *TempRangesSectionWriter =
CU->getVersion() >= 5 ? RangeListsWritersByCU[*DWOId].get()
: LegacyRangesWritersByCU[*DWOId].get();
std::optional<std::string> DwarfOutputPath =
opts::DwarfOutputPath.empty()
? std::nullopt
: std::optional<std::string>(opts::DwarfOutputPath.c_str());
std::string DWOName = DIEBlder.updateDWONameCompDir(
*StrOffstsWriter, *StrWriter, *CU, DwarfOutputPath, std::nullopt);
processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter,
AddressWriter, DWOName, DwarfOutputPath);
}
} else {
// Update unit debug info in parallel
ThreadPoolInterface &ThreadPool = ParallelUtilities::getThreadPool();
for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
ThreadPool.async(processUnitDIE, CU.get(), &DIEBlder);
ThreadPool.wait();
for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
processMainBinaryCU(*CU, DIEBlder);
finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap,
DIEBlder.getProcessedCUs(), *FinalAddrWriter);
}

DebugNamesTable.emitAccelTable();
Expand Down
33 changes: 30 additions & 3 deletions bolt/lib/Rewrite/PseudoProbeRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/LEB128.h"
#include <memory>

#undef DEBUG_TYPE
#define DEBUG_TYPE "pseudo-probe-rewriter"
Expand Down Expand Up @@ -48,6 +49,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
clEnumValN(PPP_All, "all", "enable all debugging printout")),
cl::Hidden, cl::cat(BoltCategory));

extern cl::opt<bool> ProfileUsePseudoProbes;
} // namespace opts

namespace {
Expand All @@ -72,23 +74,38 @@ class PseudoProbeRewriter final : public MetadataRewriter {
void parsePseudoProbe();

/// PseudoProbe decoder
MCPseudoProbeDecoder ProbeDecoder;
std::shared_ptr<MCPseudoProbeDecoder> ProbeDecoderPtr;

public:
PseudoProbeRewriter(BinaryContext &BC)
: MetadataRewriter("pseudo-probe-rewriter", BC) {}
: MetadataRewriter("pseudo-probe-rewriter", BC),
ProbeDecoderPtr(std::make_shared<MCPseudoProbeDecoder>()) {
BC.setPseudoProbeDecoder(ProbeDecoderPtr);
}

Error preCFGInitializer() override;
Error postEmitFinalizer() override;

~PseudoProbeRewriter() override { ProbeDecoderPtr.reset(); }
};

Error PseudoProbeRewriter::preCFGInitializer() {
if (opts::ProfileUsePseudoProbes)
parsePseudoProbe();

return Error::success();
}

Error PseudoProbeRewriter::postEmitFinalizer() {
parsePseudoProbe();
if (!opts::ProfileUsePseudoProbes)
parsePseudoProbe();
updatePseudoProbes();

return Error::success();
}

void PseudoProbeRewriter::parsePseudoProbe() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
PseudoProbeDescSection = BC.getUniqueSectionByName(".pseudo_probe_desc");
PseudoProbeSection = BC.getUniqueSectionByName(".pseudo_probe");

Expand Down Expand Up @@ -138,9 +155,18 @@ void PseudoProbeRewriter::parsePseudoProbe() {
ProbeDecoder.printGUID2FuncDescMap(outs());
ProbeDecoder.printProbesForAllAddresses(outs());
}

for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) {
if (!FuncStartAddrs.contains(GUID))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]);
assert(BF);
BF->setGUID(GUID);
}
}

void PseudoProbeRewriter::updatePseudoProbes() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
// check if there is pseudo probe section decoded
if (ProbeDecoder.getAddress2ProbesMap().empty())
return;
Expand Down Expand Up @@ -241,6 +267,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}

void PseudoProbeRewriter::encodePseudoProbes() {
MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr);
// Buffer for new pseudo probes section
SmallString<8> Contents;
MCDecodedPseudoProbe *LastProbe = nullptr;
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ Error RewriteInstance::run() {
opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML) {
selectFunctionsToProcess();
disassembleFunctions();
processMetadataPreCFG();
buildFunctionsCFG();
}
processProfileData();
Expand Down
13 changes: 8 additions & 5 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,16 +852,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Uses;
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInstr) const override {
MemLocInstrOut = nullptr;
BaseRegNumOut = AArch64::NoRegister;
IndexRegNumOut = AArch64::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInstr = nullptr;

// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers
Expand Down
3 changes: 2 additions & 1 deletion bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,14 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut) const override {
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override {
MemLocInstr = nullptr;
BaseRegNum = 0;
IndexRegNum = 0;
DispValue = 0;
DispExpr = nullptr;
PCRelBaseOut = nullptr;
FixedEntryLoadInst = nullptr;

// Check for the following long tail call sequence:
// 1: auipc xi, %pcrel_hi(sym)
Expand Down
105 changes: 66 additions & 39 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1866,8 +1866,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return true;
}

/// Analyzes PIC-style jump table code template and return identified
/// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
/// (POSSIBLE_PIC_FIXED_BRANCH case).
template <typename Itr>
std::pair<IndirectBranchType, MCInst *>
std::tuple<IndirectBranchType, MCInst *, MCInst *>
analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
// Analyze PIC-style jump table code template:
//
Expand All @@ -1876,6 +1879,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// add %r2, %r1
// jmp *%r1
//
// or a fixed indirect jump template:
//
// movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
// add %r2, %r1
// jmp *%r1
//
// (with any irrelevant instructions in-between)
//
// When we call this helper we've already determined %r1 and %r2, and
Expand Down Expand Up @@ -1916,8 +1926,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
MO.SegRegNum == X86::NoRegister;
};
LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
MCInst *MemLocInstr = nullptr;
const MCInst *MovInstr = nullptr;
MCInst *FirstInstr = nullptr;
MCInst *SecondInstr = nullptr;
enum {
NOMATCH = 0,
MATCH_JUMP_TABLE,
MATCH_FIXED_BRANCH,
} MatchingState = NOMATCH;
while (++II != IE) {
MCInst &Instr = *II;
const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
Expand All @@ -1926,68 +1941,76 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// Ignore instructions that don't affect R1, R2 registers.
continue;
}
if (!MovInstr) {
// Expect to see MOV instruction.
if (!isMOVSX64rm32(Instr)) {
LLVM_DEBUG(dbgs() << "MOV instruction expected.\n");
const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
const bool IsLEAInstr = isLEA64r(Instr);
if (MatchingState == NOMATCH) {
if (IsMOVSXInstr)
MatchingState = MATCH_JUMP_TABLE;
else if (IsLEAInstr)
MatchingState = MATCH_FIXED_BRANCH;
else
break;
}

// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
// If it sets %r1 - rename the registers so we have to only check
// a single form.
unsigned MovDestReg = Instr.getOperand(0).getReg();
if (MovDestReg != R2)
// Check if the first instruction is setting %r1 or %r2. In canonical
// form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
// we have to only check a single form.
unsigned DestReg = Instr.getOperand(0).getReg();
MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
if (DestReg != ExpectReg)
std::swap(R1, R2);
if (MovDestReg != R2) {
LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n");
if (DestReg != ExpectReg)
break;
}

// Verify operands for MOV.
// Verify operands
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isIndexed(*MO, R1))
// POSSIBLE_PIC_JUMP_TABLE
if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
(MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
FirstInstr = &Instr;
else
break;
MovInstr = &Instr;
} else {
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo))
unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
continue;
if (!isLEA64r(Instr)) {
LLVM_DEBUG(dbgs() << "LEA instruction expected\n");
if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
(MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
break;
}
if (Instr.getOperand(0).getReg() != R1) {
LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n");
if (Instr.getOperand(0).getReg() != ExpectReg)
break;
}

// Verify operands for LEA.
// Verify operands.
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isRIPRel(*MO))
break;
MemLocInstr = &Instr;
SecondInstr = &Instr;
break;
}
}

if (!MemLocInstr)
return std::make_pair(IndirectBranchType::UNKNOWN, nullptr);
if (!SecondInstr)
return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);

if (MatchingState == MATCH_FIXED_BRANCH) {
LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
FirstInstr, SecondInstr);
}
LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
MemLocInstr);
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
SecondInstr, nullptr);
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInst) const override {
// Try to find a (base) memory location from where the address for
// the indirect branch is loaded. For X86-64 the memory will be specified
// in the following format:
Expand All @@ -2014,6 +2037,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
IndexRegNumOut = X86::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInst = nullptr;

std::reverse_iterator<InstructionIterator> II(End);
std::reverse_iterator<InstructionIterator> IE(Begin);
Expand Down Expand Up @@ -2046,7 +2070,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
unsigned R2 = PrevInstr.getOperand(2).getReg();
if (R1 == R2)
return IndirectBranchType::UNKNOWN;
std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2);
std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
analyzePICJumpTable(PrevII, IE, R1, R2);
break;
}
return IndirectBranchType::UNKNOWN;
Expand Down Expand Up @@ -2090,6 +2115,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
return IndirectBranchType::UNKNOWN;
break;
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
break;
default:
if (MO->ScaleImm != PtrSize)
return IndirectBranchType::UNKNOWN;
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/AArch64/update-debug-reloc.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# update-debug-sections option.

RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
RUN: llvm-bolt %t.exe -o %t --update-debug-sections
RUN: llvm-bolt %t.exe -o %t --update-debug-sections 2>&1 | FileCheck %s

CHECK: BOLT-INFO: Target architecture: aarch64
CHECK-NOT: Reloc num: 10
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/AArch64/veneer-gold.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dummy:
.type foo, %function
foo:
# CHECK: <foo>:
# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
bl .L2
ret
.size foo, .-foo
Expand All @@ -38,7 +38,7 @@ foo:
.type foo2, %function
foo2:
# CHECK: <foo2>:
# CHECK-NEXT : {{.*}} bl {{.*}} <foo2>
# CHECK-NEXT: {{.*}} bl {{.*}} <foo2>
bl .L2
ret
.size foo2, .-foo2
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ main:
jae .L4
cmpq $0x1, %rdi
jne .L4
mov .Ljt_pic+8(%rip), %rax
movslq .Ljt_pic+8(%rip), %rax
lea .Ljt_pic(%rip), %rdx
add %rdx, %rax
jmpq *%rax
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-df-types-modify-dwo-name-mixed.test
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
; BOLT-DWP: DW_TAG_compile_unit
; BOLT-DWP: DW_AT_dwo_name ("main.dwo.dwo")
; BOLT-DWP: DW_TAG_type_unit
; BOLT-DW-NOT: DW_AT_dwo_name
; BOLT-DWP-NOT: DW_AT_dwo_name
; BOLT-DWP: Contribution size = 68, Format = DWARF32, Version = 5
; BOLT-DWP-NEXT: "main"
; BOLT-DWP-NEXT: "int"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
# RUN: %clang %cflags %tmain.o %thelper.o -o %t.exe
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=POSTCHECK %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=POSTCHECKADDR %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-types %t.bolt | FileCheck --check-prefix=POSTCHECKTU %s

## This test checks that BOLT handles correctly backward and forward cross CU references
## for DWARF5 and DWARF4 with -fdebug-types-section
## for DWARF5 and DWARF4 with -fdebug-types-section and checks the address table is correct.

# POSTCHECK: version = 0x0005
# POSTCHECK: DW_TAG_type_unit
Expand All @@ -29,6 +30,15 @@
# POSTCHECK: DW_TAG_variable [20]
# POSTCHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{[0-9a-f]+}} "Foo3a")

# POSTCHECKADDR: Addrs: [
# POSTCHECKADDR-NEXT: 0x0000000000001360
# POSTCHECKADDR-NEXT: 0x0000000000000000
# POSTCHECKADDR-NEXT: ]
# POSTCHECKADDR: Addrs: [
# POSTCHECKADDR-NEXT: 0x00000000000013e0
# POSTCHECKADDR-NEXT: 0x0000000000000000
# POSTCHECKADDR-NEXT: ]

# POSTCHECKTU: version = 0x0004
# POSTCHECKTU: DW_TAG_type_unit
# POSTCHECKTU: DW_TAG_structure_type
Expand Down
19 changes: 18 additions & 1 deletion bolt/test/X86/dwarf5-locexpr-referrence.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
# RUN: %clang %cflags -dwarf-5 %tmain.o %thelper.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt | FileCheck --check-prefix=CHECK %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt | FileCheck --check-prefix=CHECKADDR %s

## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr.
## This test checks that we update relative DIE references with DW_OP_convert that are in locexpr
## and checks the address table is correct.

# CHECK: version = 0x0005
# CHECK: DW_TAG_variable
Expand All @@ -19,3 +21,18 @@
# CHECK-SAME: DW_OP_convert (0x00000028 -> 0x00000092)
# CHECK-SAME: DW_OP_convert (0x0000002c -> 0x00000096)
# CHECK: version = 0x0005

# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001330
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: 0x0000000000001333
# CHECKADDR-NEXT: ]
# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001340
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: 0x0000000000001343
# CHECKADDR-NEXT: ]
# CHECKADDR: Addrs: [
# CHECKADDR-NEXT: 0x0000000000001320
# CHECKADDR-NEXT: 0x0000000000000000
# CHECKADDR-NEXT: ]
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-one-loclists-two-bases.test
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# POSTCHECK: version = 0x0005
# POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_variable
# POSTCHECK: DW_AT_location [DW_FORM_loclistx]
# POSTCHECK-SAME: indexed (0x0)
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/dwarf5-two-loclists.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
# POSTCHECK: version = 0x0005
# POSTCHECK: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_variable
# POSTCHECK: DW_AT_location [DW_FORM_loclistx]
# POSTCHECK-SAME: indexed (0x0)
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/X86/dwarf5-two-rnglists.test
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
# POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000008)
# POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:
# POSTCHECK: DW_TAG_subprogram
# POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
# POSTCHECK-SAME: indexed (0x1)
Expand All @@ -75,7 +75,7 @@
# POSTCHECK-NEXT: DW_AT_addr_base [DW_FORM_sec_offset] (0x00000030)
# POSTCHECK-NEXT: DW_AT_loclists_base [DW_FORM_sec_offset] (0x00000045)
# POSTCHECK-NEXT: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x00000035)
# POSTCHECK-EMPTY
# POSTCHECK-EMPTY:

# POSTCHECK: DW_TAG_subprogram
# POSTCHECK-NEXT: DW_AT_ranges [DW_FORM_rnglistx]
Expand Down
12 changes: 8 additions & 4 deletions bolt/test/X86/jump-table-fixed-ref-pic.test
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
## Verify that BOLT detects fixed destination of indirect jump for PIC
## case.

XFAIL: *

RUN: %clang %cflags -no-pie %S/Inputs/jump-table-fixed-ref-pic.s -Wl,-q -o %t
RUN: llvm-bolt %t --relocs -o %t.null 2>&1 | FileCheck %s
RUN: llvm-bolt %t --relocs -o %t.null -print-cfg 2>&1 | FileCheck %s

CHECK: BOLT-INFO: fixed PIC indirect branch detected in main {{.*}} the destination value is 0x[[#TGT:]]
CHECK: Binary Function "main" after building cfg

CHECK: BOLT-INFO: fixed indirect branch detected in main
CHECK: movslq ".rodata/1"+8(%rip), %rax
CHECK-NEXT: leaq ".rodata/1"(%rip), %rdx
CHECK-NEXT: addq %rdx, %rax
CHECK-NEXT: jmpq *%rax # UNKNOWN CONTROL FLOW
103 changes: 103 additions & 0 deletions bolt/test/X86/match-functions-with-call-graph.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
## Tests blocks matching by called function names in inferStaleProfile.

# REQUIRES: system-linux
# RUN: split-file %s %t
# RUN: %clang %cflags %t/main.cpp -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml --profile-ignore-hash -v=1 \
# RUN: --dyno-stats --print-cfg --infer-stale-profile=1 --match-with-call-graph 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: matched 1 functions with call graph

#--- main.cpp
void foo() {}

void bar() {}

void qux() {
foo();
bar();
}

void fred() {
foo();
qux();
bar();
bar();
foo();
}

int main() {
return 0;
}

#--- yaml
---
header:
profile-version: 1
binary-name: 'match-functions-with-calls-as-anchors.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main
fid: 0
hash: 0x0000000000000001
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000001
insns: 1
succ: [ { bid: 3, cnt: 1} ]
- name: _Z3foov
fid: 1
hash: 0x0000000000000002
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000002
insns: 1
succ: [ { bid: 3, cnt: 1} ]

- name: _Z3barv
fid: 2
hash: 0x0000000000000003
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000003
insns: 1
succ: [ { bid: 3, cnt: 1} ]
- name: _Z3quxv
fid: 3
hash: 0x0000000000000004
exec: 4
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000004
insns: 1
succ: [ { bid: 3, cnt: 1} ]
calls: [ { off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0} ]
- name: _Z4bazv
fid: 4
hash: 0x0000000000000005
exec: 1
nblocks: 6
blocks:
- bid: 1
hash: 0x0000000000000005
insns: 1
succ: [ { bid: 3, cnt: 1} ]
calls: [ { off : 0, fid : 3, cnt : 0},
{ off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0},
{ off : 0, fid : 1, cnt : 0},
{ off : 0, fid : 2, cnt : 0} ]
...
39 changes: 38 additions & 1 deletion bolt/test/X86/pseudoprobe-decoding-inline.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,42 @@
# REQUIRES: system-linux
# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s
# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s

# PREAGG: B X:0 #foo# 1 0
# PREAGG: B X:0 #bar# 1 0
# PREAGG: B X:0 #main# 1 0
## Check pseudo-probes in regular YAML profile (non-BOLTed binary)
# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML
## Check pseudo-probes in BAT YAML profile (BOLTed binary)
# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG
# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes
# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML
# CHECK-YAML: name: bar
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ]
# CHECK-YAML: guid: 0xE413754A191DB537
# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94
#
# CHECK-YAML: name: foo
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC
# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4
#
# CHECK-YAML: name: main
# CHECK-YAML: - bid: 0
# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ]
# CHECK-YAML: guid: 0xDB956436E78DD5FA
# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF
#
## Check that without --profile-use-pseudo-probes option, no pseudo probes are
## generated
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
# CHECK-NO-OPT-NOT: pseudo_probes
# CHECK-NO-OPT-NOT: guid
# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash

CHECK: Report of decoding input pseudo probe binaries

Expand Down
95 changes: 95 additions & 0 deletions bolt/test/X86/three-way-split-jt.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
## This reproduces an issue where the function is split into three fragments
## and all fragments access the same jump table.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.out -v=1 -print-only=main.warm -print-cfg 2>&1 | FileCheck %s

# CHECK-DAG: BOLT-INFO: marking main.warm as a fragment of main
# CHECK-DAG: BOLT-INFO: marking main.cold as a fragment of main
# CHECK-DAG: BOLT-INFO: processing main.warm as a sibling of non-ignored function
# CHECK-DAG: BOLT-INFO: processing main.cold as a sibling of non-ignored function
# CHECK-DAG: BOLT-WARNING: Ignoring main.cold
# CHECK-DAG: BOLT-WARNING: Ignoring main.warm
# CHECK-DAG: BOLT-WARNING: Ignoring main
# CHECK: BOLT-WARNING: skipped 3 functions due to cold fragments

# CHECK: PIC Jump table JUMP_TABLE for function main, main.warm, main.cold
# CHECK-NEXT: 0x0000 : __ENTRY_main@0x[[#]]
# CHECK-NEXT: 0x0004 : __ENTRY_main@0x[[#]]
# CHECK-NEXT: 0x0008 : __ENTRY_main.cold@0x[[#]]
# CHECK-NEXT: 0x000c : __ENTRY_main@0x[[#]]
.globl main
.type main, %function
.p2align 2
main:
LBB0:
andl $0xf, %ecx
cmpb $0x4, %cl
## exit through ret
ja LBB3

## jump table dispatch, jumping to label indexed by val in %ecx
LBB1:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8

LBB2:
xorq %rax, %rax
LBB3:
addq $0x8, %rsp
ret
.size main, .-main

.globl main.warm
.type main.warm, %function
.p2align 2
main.warm:
LBB20:
andl $0xb, %ebx
cmpb $0x1, %cl
# exit through ret
ja LBB23

## jump table dispatch, jumping to label indexed by val in %ecx
LBB21:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8

LBB22:
xorq %rax, %rax
LBB23:
addq $0x8, %rsp
ret
.size main.warm, .-main.warm

## cold fragment is only reachable through jump table
.globl main.cold
.type main.cold, %function
main.cold:
leaq JUMP_TABLE(%rip), %r8
movzbl %cl, %ecx
movslq (%r8,%rcx,4), %rax
addq %rax, %r8
jmpq *%r8
LBB4:
callq abort
.size main.cold, .-main.cold

.rodata
## jmp table, entries must be R_X86_64_PC32 relocs
.globl JUMP_TABLE
JUMP_TABLE:
.long LBB2-JUMP_TABLE
.long LBB3-JUMP_TABLE
.long LBB4-JUMP_TABLE
.long LBB3-JUMP_TABLE
2 changes: 1 addition & 1 deletion bolt/test/perf2bolt/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import shutil

if shutil.which("perf") != None:
if shutil.which("perf") is not None:
config.available_features.add("perf")
13 changes: 13 additions & 0 deletions clang-tools-extra/clang-doc/HTMLGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <optional>
#include <string>

Expand Down Expand Up @@ -979,6 +980,18 @@ static llvm::Error serializeIndex(ClangDocContext &CDCtx) {
"error creating index file: " +
FileErr.message());
}
llvm::SmallString<128> RootPath(CDCtx.OutDirectory);
if (llvm::sys::path::is_relative(RootPath)) {
llvm::sys::fs::make_absolute(RootPath);
}
// Replace the escaped characters with a forward slash. It shouldn't matter
// when rendering the webpage in a web browser. This helps to prevent the
// JavaScript from escaping characters incorrectly, and introducing bad paths
// in the URLs.
std::string RootPathEscaped = RootPath.str().str();
std::replace(RootPathEscaped.begin(), RootPathEscaped.end(), '\\', '/');
OS << "var RootPath = \"" << RootPathEscaped << "\";\n";

CDCtx.Idx.sort();
llvm::json::OStream J(OS, 2);
std::function<void(Index)> IndexToJSON = [&](const Index &I) {
Expand Down
44 changes: 35 additions & 9 deletions clang-tools-extra/clang-doc/Mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,28 @@
#include "clang/AST/Comment.h"
#include "clang/Index/USRGeneration.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Error.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Mutex.h"

namespace clang {
namespace doc {

static llvm::StringSet<> USRVisited;
static llvm::sys::Mutex USRVisitedGuard;

template <typename T> bool isTypedefAnonRecord(const T *D) {
if (const auto *C = dyn_cast<CXXRecordDecl>(D)) {
return C->getTypedefNameForAnonDecl();
}
return false;
}

void MapASTVisitor::HandleTranslationUnit(ASTContext &Context) {
TraverseDecl(Context.getTranslationUnitDecl());
}

template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
template <typename T>
bool MapASTVisitor::mapDecl(const T *D, bool IsDefinition) {
// If we're looking a decl not in user files, skip this decl.
if (D->getASTContext().getSourceManager().isInSystemHeader(D->getLocation()))
return true;
Expand All @@ -34,6 +46,16 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
// If there is an error generating a USR for the decl, skip this decl.
if (index::generateUSRForDecl(D, USR))
return true;
// Prevent Visiting USR twice
{
std::lock_guard<llvm::sys::Mutex> Guard(USRVisitedGuard);
StringRef Visited = USR.str();
if (USRVisited.count(Visited) && !isTypedefAnonRecord<T>(D))
return true;
// We considered a USR to be visited only when its defined
if (IsDefinition)
USRVisited.insert(Visited);
}
bool IsFileInRootDir;
llvm::SmallString<128> File =
getFile(D, D->getASTContext(), CDCtx.SourceRoot, IsFileInRootDir);
Expand All @@ -53,30 +75,34 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
}

bool MapASTVisitor::VisitNamespaceDecl(const NamespaceDecl *D) {
return mapDecl(D);
return mapDecl(D, /*isDefinition=*/true);
}

bool MapASTVisitor::VisitRecordDecl(const RecordDecl *D) { return mapDecl(D); }
bool MapASTVisitor::VisitRecordDecl(const RecordDecl *D) {
return mapDecl(D, D->isThisDeclarationADefinition());
}

bool MapASTVisitor::VisitEnumDecl(const EnumDecl *D) { return mapDecl(D); }
bool MapASTVisitor::VisitEnumDecl(const EnumDecl *D) {
return mapDecl(D, D->isThisDeclarationADefinition());
}

bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) {
return mapDecl(D);
return mapDecl(D, D->isThisDeclarationADefinition());
}

bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) {
// Don't visit CXXMethodDecls twice
if (isa<CXXMethodDecl>(D))
return true;
return mapDecl(D);
return mapDecl(D, D->isThisDeclarationADefinition());
}

bool MapASTVisitor::VisitTypedefDecl(const TypedefDecl *D) {
return mapDecl(D);
return mapDecl(D, /*isDefinition=*/true);
}

bool MapASTVisitor::VisitTypeAliasDecl(const TypeAliasDecl *D) {
return mapDecl(D);
return mapDecl(D, /*isDefinition=*/true);
}

comments::FullComment *
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clang-doc/Mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor>,
bool VisitTypeAliasDecl(const TypeAliasDecl *D);

private:
template <typename T> bool mapDecl(const T *D);
template <typename T> bool mapDecl(const T *D, bool IsDefinition);

int getLine(const NamedDecl *D, const ASTContext &Context) const;
llvm::SmallString<128> getFile(const NamedDecl *D, const ASTContext &Context,
Expand Down
51 changes: 13 additions & 38 deletions clang-tools-extra/clang-doc/assets/index.js
Original file line number Diff line number Diff line change
@@ -1,42 +1,17 @@
// Append using posix-style a file name or directory to Base
function append(Base, New) {
if (!New)
return Base;
if (Base)
Base += "/";
Base += New;
return Base;
}

// Get relative path to access FilePath from CurrentDirectory
function computeRelativePath(FilePath, CurrentDirectory) {
var Path = FilePath;
while (Path) {
if (CurrentDirectory == Path)
return FilePath.substring(Path.length + 1);
Path = Path.substring(0, Path.lastIndexOf("/"));
}

var Dir = CurrentDirectory;
var Result = "";
while (Dir) {
if (Dir == FilePath)
break;
Dir = Dir.substring(0, Dir.lastIndexOf("/"));
Result = append(Result, "..")
function genLink(Ref) {
// we treat the file paths different depending on if we're
// serving via a http server or viewing from a local
var Path = window.location.protocol.startsWith("file") ?
`${window.location.protocol}//${window.location.host}/${Ref.Path}` :
`${window.location.protocol}//${RootPath}/${Ref.Path}`;
if (Ref.RefType === "namespace") {
Path = `${Path}/index.html`
} else if (Ref.Path === "") {
Path = `${Path}${Ref.Name}.html`;
} else {
Path = `${Path}/${Ref.Name}.html`;
}
Result = append(Result, FilePath.substring(Dir.length))
return Result;
}

function genLink(Ref, CurrentDirectory) {
var Path = computeRelativePath(Ref.Path, CurrentDirectory);
if (Ref.RefType == "namespace")
Path = append(Path, "index.html");
else
Path = append(Path, Ref.Name + ".html")

ANode = document.createElement("a");
ANode = document.createElement("a");
ANode.setAttribute("href", Path);
var TextNode = document.createTextNode(Ref.Name);
ANode.appendChild(TextNode);
Expand Down
1 change: 0 additions & 1 deletion clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,6 @@ Example usage for a project using a compile commands database:
for (auto &Group : USRToBitcode) {
Pool.async([&]() {
std::vector<std::unique_ptr<doc::Info>> Infos;

for (auto &Bitcode : Group.getValue()) {
llvm::BitstreamCursor Stream(Bitcode);
doc::ClangDocBitcodeReader Reader(Stream);
Expand Down
4 changes: 2 additions & 2 deletions clang-tools-extra/clang-tidy/add_new_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,8 @@ def format_link_alias(doc_file):
f.write(' :header: "Name", "Offers fixes"\n\n')
f.writelines(checks)
# and the aliases
f.write("\n\n")
f.write(".. csv-table:: Aliases..\n")
f.write("\nCheck aliases\n-------------\n\n")
f.write(".. csv-table::\n")
f.write(' :header: "Name", "Redirect", "Offers fixes"\n\n')
f.writelines(checks_alias)
break
Expand Down
12 changes: 6 additions & 6 deletions clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ UnusedReturnValueCheck::UnusedReturnValueCheck(llvm::StringRef Name,
"^::sigismember$;"
"^::strcasecmp$;"
"^::strsignal$;"
"^::ttyname"))),
"^::ttyname$"))),
CheckedReturnTypes(utils::options::parseStringList(
Options.get("CheckedReturnTypes", "::std::error_code$;"
"::std::error_condition$;"
"::std::errc$;"
"::std::expected$;"
"::boost::system::error_code"))),
Options.get("CheckedReturnTypes", "^::std::error_code$;"
"^::std::error_condition$;"
"^::std::errc$;"
"^::std::expected$;"
"^::boost::system::error_code$"))),
AllowCastToVoid(Options.get("AllowCastToVoid", false)) {}

UnusedReturnValueCheck::UnusedReturnValueCheck(
Expand Down
354 changes: 177 additions & 177 deletions clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp

Large diffs are not rendered by default.

17 changes: 8 additions & 9 deletions clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,12 @@ void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) {
// shall be run.
const auto FunctionScope =
functionDecl(
hasBody(
compoundStmt(forEachDescendant(
declStmt(containsAnyDeclaration(
LocalValDecl.bind("local-value")),
unless(has(decompositionDecl())))
.bind("decl-stmt")))
.bind("scope")))
hasBody(stmt(forEachDescendant(
declStmt(containsAnyDeclaration(
LocalValDecl.bind("local-value")),
unless(has(decompositionDecl())))
.bind("decl-stmt")))
.bind("scope")))
.bind("function-decl");

Finder->addMatcher(FunctionScope, this);
Expand All @@ -109,7 +108,7 @@ void ConstCorrectnessCheck::registerMatchers(MatchFinder *Finder) {
enum class VariableCategory { Value, Reference, Pointer };

void ConstCorrectnessCheck::check(const MatchFinder::MatchResult &Result) {
const auto *LocalScope = Result.Nodes.getNodeAs<CompoundStmt>("scope");
const auto *LocalScope = Result.Nodes.getNodeAs<Stmt>("scope");
const auto *Variable = Result.Nodes.getNodeAs<VarDecl>("local-value");
const auto *Function = Result.Nodes.getNodeAs<FunctionDecl>("function-decl");

Expand Down Expand Up @@ -198,7 +197,7 @@ void ConstCorrectnessCheck::check(const MatchFinder::MatchResult &Result) {
}
}

void ConstCorrectnessCheck::registerScope(const CompoundStmt *LocalScope,
void ConstCorrectnessCheck::registerScope(const Stmt *LocalScope,
ASTContext *Context) {
auto &Analyzer = ScopesCache[LocalScope];
if (!Analyzer)
Expand Down
4 changes: 2 additions & 2 deletions clang-tools-extra/clang-tidy/misc/ConstCorrectnessCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ class ConstCorrectnessCheck : public ClangTidyCheck {
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;

private:
void registerScope(const CompoundStmt *LocalScope, ASTContext *Context);
void registerScope(const Stmt *LocalScope, ASTContext *Context);

using MutationAnalyzer = std::unique_ptr<ExprMutationAnalyzer>;
llvm::DenseMap<const CompoundStmt *, MutationAnalyzer> ScopesCache;
llvm::DenseMap<const Stmt *, MutationAnalyzer> ScopesCache;
llvm::DenseSet<SourceLocation> TemplateDiagnosticsCache;

const bool AnalyzeValues;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,65 +119,72 @@ void UnnecessaryValueParamCheck::check(const MatchFinder::MatchResult &Result) {
}
}

const size_t Index = llvm::find(Function->parameters(), Param) -
Function->parameters().begin();
handleConstRefFix(*Function, *Param, *Result.Context);
}

void UnnecessaryValueParamCheck::registerPPCallbacks(
const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
Inserter.registerPreprocessor(PP);
}

void UnnecessaryValueParamCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "IncludeStyle", Inserter.getStyle());
Options.store(Opts, "AllowedTypes",
utils::options::serializeStringList(AllowedTypes));
}

void UnnecessaryValueParamCheck::onEndOfTranslationUnit() {
MutationAnalyzerCache.clear();
}

void UnnecessaryValueParamCheck::handleConstRefFix(const FunctionDecl &Function,
const ParmVarDecl &Param,
ASTContext &Context) {
const size_t Index =
llvm::find(Function.parameters(), &Param) - Function.parameters().begin();
const bool IsConstQualified =
Param.getType().getCanonicalType().isConstQualified();

auto Diag =
diag(Param->getLocation(),
diag(Param.getLocation(),
"the %select{|const qualified }0parameter %1 is copied for each "
"invocation%select{ but only used as a const reference|}0; consider "
"making it a %select{const |}0reference")
<< IsConstQualified << paramNameOrIndex(Param->getName(), Index);
<< IsConstQualified << paramNameOrIndex(Param.getName(), Index);
// Do not propose fixes when:
// 1. the ParmVarDecl is in a macro, since we cannot place them correctly
// 2. the function is virtual as it might break overrides
// 3. the function is referenced outside of a call expression within the
// compilation unit as the signature change could introduce build errors.
// 4. the function is an explicit template/ specialization.
const auto *Method = llvm::dyn_cast<CXXMethodDecl>(Function);
if (Param->getBeginLoc().isMacroID() || (Method && Method->isVirtual()) ||
isReferencedOutsideOfCallExpr(*Function, *Result.Context) ||
Function->getTemplateSpecializationKind() == TSK_ExplicitSpecialization)
const auto *Method = llvm::dyn_cast<CXXMethodDecl>(&Function);
if (Param.getBeginLoc().isMacroID() || (Method && Method->isVirtual()) ||
isReferencedOutsideOfCallExpr(Function, Context) ||
Function.getTemplateSpecializationKind() == TSK_ExplicitSpecialization)
return;
for (const auto *FunctionDecl = Function; FunctionDecl != nullptr;
for (const auto *FunctionDecl = &Function; FunctionDecl != nullptr;
FunctionDecl = FunctionDecl->getPreviousDecl()) {
const auto &CurrentParam = *FunctionDecl->getParamDecl(Index);
Diag << utils::fixit::changeVarDeclToReference(CurrentParam,
*Result.Context);
Diag << utils::fixit::changeVarDeclToReference(CurrentParam, Context);
// The parameter of each declaration needs to be checked individually as to
// whether it is const or not as constness can differ between definition and
// declaration.
if (!CurrentParam.getType().getCanonicalType().isConstQualified()) {
if (std::optional<FixItHint> Fix = utils::fixit::addQualifierToVarDecl(
CurrentParam, *Result.Context, DeclSpec::TQ::TQ_const))
CurrentParam, Context, DeclSpec::TQ::TQ_const))
Diag << *Fix;
}
}
}

void UnnecessaryValueParamCheck::registerPPCallbacks(
const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
Inserter.registerPreprocessor(PP);
}

void UnnecessaryValueParamCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "IncludeStyle", Inserter.getStyle());
Options.store(Opts, "AllowedTypes",
utils::options::serializeStringList(AllowedTypes));
}

void UnnecessaryValueParamCheck::onEndOfTranslationUnit() {
MutationAnalyzerCache.clear();
}

void UnnecessaryValueParamCheck::handleMoveFix(const ParmVarDecl &Var,
void UnnecessaryValueParamCheck::handleMoveFix(const ParmVarDecl &Param,
const DeclRefExpr &CopyArgument,
const ASTContext &Context) {
ASTContext &Context) {
auto Diag = diag(CopyArgument.getBeginLoc(),
"parameter %0 is passed by value and only copied once; "
"consider moving it to avoid unnecessary copies")
<< &Var;
<< &Param;
// Do not propose fixes in macros since we cannot place them correctly.
if (CopyArgument.getBeginLoc().isMacroID())
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,16 @@ class UnnecessaryValueParamCheck : public ClangTidyCheck {
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
void onEndOfTranslationUnit() override;

private:
void handleMoveFix(const ParmVarDecl &Var, const DeclRefExpr &CopyArgument,
const ASTContext &Context);
protected:
// Create diagnostics. These are virtual so that derived classes can change
// behaviour.
virtual void handleMoveFix(const ParmVarDecl &Param,
const DeclRefExpr &CopyArgument,
ASTContext &Context);
virtual void handleConstRefFix(const FunctionDecl &Function,
const ParmVarDecl &Param, ASTContext &Context);

private:
ExprMutationAnalyzer::Memoized MutationAnalyzerCache;
utils::IncludeInserter Inserter;
const std::vector<StringRef> AllowedTypes;
Expand Down
323 changes: 183 additions & 140 deletions clang-tools-extra/clang-tidy/tool/run-clang-tidy.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion clang-tools-extra/clang-tidy/utils/ASTUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ bool areStatementsIdentical(const Stmt *FirstStmt, const Stmt *SecondStmt,
if (FirstStmt == SecondStmt)
return true;

if (FirstStmt->getStmtClass() != FirstStmt->getStmtClass())
if (FirstStmt->getStmtClass() != SecondStmt->getStmtClass())
return false;

if (isa<Expr>(FirstStmt) && isa<Expr>(SecondStmt)) {
Expand Down
10 changes: 5 additions & 5 deletions clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ bool isStandardPointerConvertible(QualType From, QualType To) {
if (RD->isCompleteDefinition() &&
isBaseOf(From->getPointeeType().getTypePtr(),
To->getPointeeType().getTypePtr())) {
return true;
// If B is an inaccessible or ambiguous base class of D, a program
// that necessitates this conversion is ill-formed
return isUnambiguousPublicBaseClass(From->getPointeeType().getTypePtr(),
To->getPointeeType().getTypePtr());
}
}

Expand Down Expand Up @@ -375,10 +378,7 @@ bool ExceptionAnalyzer::ExceptionInfo::filterByCatch(
isPointerOrPointerToMember(ExceptionCanTy->getTypePtr())) {
// A standard pointer conversion not involving conversions to pointers to
// private or protected or ambiguous classes ...
if (isStandardPointerConvertible(ExceptionCanTy, HandlerCanTy) &&
isUnambiguousPublicBaseClass(
ExceptionCanTy->getTypePtr()->getPointeeType().getTypePtr(),
HandlerCanTy->getTypePtr()->getPointeeType().getTypePtr())) {
if (isStandardPointerConvertible(ExceptionCanTy, HandlerCanTy)) {
TypesToDelete.push_back(ExceptionTy);
}
// A function pointer conversion ...
Expand Down
20 changes: 20 additions & 0 deletions clang-tools-extra/clangd/IncludeCleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,26 @@ computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeAngledIncludes) {
Ref.RT != include_cleaner::RefType::Explicit)
return;

// Check if we have any headers with the same spelling, in edge cases
// like `#include_next "foo.h"`, the user can't ever include the
// physical foo.h, but can have a spelling that refers to it.
// We postpone this check because spelling a header for every usage is
// expensive.
std::string Spelling = include_cleaner::spellHeader(
{Providers.front(), AST.getPreprocessor().getHeaderSearchInfo(),
MainFile});
for (auto *Inc :
ConvertedIncludes.match(include_cleaner::Header{Spelling})) {
Satisfied = true;
auto HeaderID =
AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
assert(HeaderID.has_value() &&
"ConvertedIncludes only contains resolved includes.");
Used.insert(*HeaderID);
}
if (Satisfied)
return;

// We actually always want to map usages to their spellings, but
// spelling locations can point into preamble section. Using these
// offsets could lead into crashes in presence of stale preambles. Hence
Expand Down
22 changes: 22 additions & 0 deletions clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,28 @@ TEST(IncludeCleaner, ResourceDirIsIgnored) {
EXPECT_THAT(Findings.MissingIncludes, IsEmpty());
}

TEST(IncludeCleaner, DifferentHeaderSameSpelling) {
// `foo` is declared in foo_inner/foo.h, but there's no way to spell it
// directly. Make sure we don't generate unusued/missing include findings in
// such cases.
auto TU = TestTU::withCode(R"cpp(
#include <foo.h>
void baz() {
foo();
}
)cpp");
TU.AdditionalFiles["foo/foo.h"] = guard("#include_next <foo.h>");
TU.AdditionalFiles["foo_inner/foo.h"] = guard(R"cpp(
void foo();
)cpp");
TU.ExtraArgs.push_back("-Ifoo");
TU.ExtraArgs.push_back("-Ifoo_inner");

auto AST = TU.build();
auto Findings = computeIncludeCleanerFindings(AST);
EXPECT_THAT(Findings.UnusedIncludes, IsEmpty());
EXPECT_THAT(Findings.MissingIncludes, IsEmpty());
}
} // namespace
} // namespace clangd
} // namespace clang
Loading