15 changes: 12 additions & 3 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
if (Iter.second)
CUList.push_back(BADCUOFFSET);
ForeignTUList.push_back(cast<DWARFTypeUnit>(&Unit)->getTypeHash());
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
if (!TUHashToIndexMap.count(TUHash)) {
TUHashToIndexMap.insert({TUHash, ForeignTUList.size()});
ForeignTUList.push_back(TUHash);
}
} else {
LocalTUList.push_back(CurrentUnitOffset);
}
Expand Down Expand Up @@ -231,8 +235,13 @@ DWARF5AcceleratorTable::addAccelTableEntry(
IsTU = Unit.isTypeUnit();
DieTag = Die.getTag();
if (IsTU) {
if (DWOID)
return ForeignTUList.size() - 1;
if (DWOID) {
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
auto Iter = TUHashToIndexMap.find(TUHash);
assert(Iter != TUHashToIndexMap.end() &&
"Could not find TU hash in map");
return Iter->second;
}
return LocalTUList.size() - 1;
}
return CUList.size() - 1;
Expand Down
20 changes: 0 additions & 20 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;

extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> EnableBAT;
extern cl::opt<unsigned> ExecutionCountThreshold;
Expand Down Expand Up @@ -1637,25 +1636,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
}
}

// Print information on missed macro-fusion opportunities seen on input.
if (BC.Stats.MissedMacroFusionPairs) {
BC.outs() << format(
"BOLT-INFO: the input contains %zu (dynamic count : %zu)"
" opportunities for macro-fusion optimization",
BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
switch (opts::AlignMacroOpFusion) {
case MFT_NONE:
BC.outs() << ". Use -align-macro-fusion to fix.\n";
break;
case MFT_HOT:
BC.outs() << ". Will fix instances on a hot path.\n";
break;
case MFT_ALL:
BC.outs() << " that are going to be fixed\n";
break;
}
}

// Collect and print information about suboptimal code layout on input.
if (opts::ReportBadLayout) {
std::vector<BinaryFunction *> SuboptimalFuncs;
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << " Cold part\n");
for (const FunctionFragment &FF :
Function.getLayout().getSplitFragments()) {
// Skip empty fragments to avoid adding zero-address entries to maps.
if (FF.empty())
continue;
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_llvm_library(LLVMBOLTProfile
DISABLE_LLVM_LINK_LLVM_DYLIB

LINK_COMPONENTS
Demangle
Support
TransformUtils
)
Expand Down
208 changes: 170 additions & 38 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/edit_distance.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
Expand All @@ -24,6 +27,11 @@ extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InferStaleProfile;
extern cl::opt<bool> Lite;

cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
"name-similarity-function-matching-threshold",
cl::desc("Match functions using namespace and edit distance"), cl::init(0),
cl::Hidden, cl::cat(BoltOptCategory));

static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
cl::desc("ignore hash while reading function profile"),
Expand Down Expand Up @@ -334,6 +342,13 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
return Error::success();
}

bool YAMLProfileReader::profileMatches(
const yaml::bolt::BinaryFunctionProfile &Profile, const BinaryFunction &BF) {
if (opts::IgnoreHash)
return Profile.NumBasicBlocks == BF.size();
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
}

bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
if (opts::MatchProfileWithFunctionHash)
return true;
Expand All @@ -350,44 +365,8 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
return false;
}

Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: YAML profile with hash: ";
switch (YamlBP.Header.HashFunction) {
case HashFunction::StdHash:
outs() << "std::hash\n";
break;
case HashFunction::XXH3:
outs() << "xxh3\n";
break;
}
}
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);

auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile,
BinaryFunction &BF) {
if (opts::IgnoreHash)
return Profile.NumBasicBlocks == BF.size();
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
};

uint64_t MatchedWithExactName = 0;
uint64_t MatchedWithHash = 0;
uint64_t MatchedWithLTOCommonName = 0;

// Computes hash for binary functions.
if (opts::MatchProfileWithFunctionHash) {
for (auto &[_, BF] : BC.getBinaryFunctions()) {
BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
} else if (!opts::IgnoreHash) {
for (BinaryFunction *BF : ProfileBFs) {
if (!BF)
continue;
BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
}

size_t YAMLProfileReader::matchWithExactName() {
size_t MatchedWithExactName = 0;
// This first pass assigns profiles that match 100% by name and by hash.
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
if (!BF)
Expand All @@ -402,10 +381,14 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
++MatchedWithExactName;
}
}
return MatchedWithExactName;
}

size_t YAMLProfileReader::matchWithHash(BinaryContext &BC) {
// Iterates through profiled functions to match the first binary function with
// the same exact hash. Serves to match identical, renamed functions.
// Collisions are possible where multiple functions share the same exact hash.
size_t MatchedWithHash = 0;
if (opts::MatchProfileWithFunctionHash) {
DenseMap<size_t, BinaryFunction *> StrictHashToBF;
StrictHashToBF.reserve(BC.getBinaryFunctions().size());
Expand All @@ -424,8 +407,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}
}
return MatchedWithHash;
}

size_t YAMLProfileReader::matchWithLTOCommonName() {
// This second pass allows name ambiguity for LTO private functions.
size_t MatchedWithLTOCommonName = 0;
for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
if (!LTOCommonNameFunctionMap.contains(CommonName))
continue;
Expand Down Expand Up @@ -456,11 +443,154 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
++MatchedWithLTOCommonName;
}
}
return MatchedWithLTOCommonName;
}

size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
if (opts::NameSimilarityFunctionMatchingThreshold == 0)
return 0;

size_t MatchedWithNameSimilarity = 0;
ItaniumPartialDemangler Demangler;

// Demangle and derive namespace from function name.
auto DemangleName = [&](std::string &FunctionName) {
StringRef RestoredName = NameResolver::restore(FunctionName);
return demangle(RestoredName);
};
auto DeriveNameSpace = [&](std::string &DemangledName) {
if (Demangler.partialDemangle(DemangledName.c_str()))
return std::string("");
std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
size_t BufferSize;
char *NameSpace =
Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
return std::string(NameSpace, BufferSize);
};

// Maps namespaces to associated function block counts and gets profile
// function names and namespaces to minimize the number of BFs to process and
// avoid repeated name demangling/namespace derivation.
StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
std::vector<std::string> ProfileBFDemangledNames;
ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
std::vector<std::string> ProfiledBFNamespaces;
ProfiledBFNamespaces.reserve(YamlBP.Functions.size());

for (auto &YamlBF : YamlBP.Functions) {
std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
ProfileBFDemangledNames.push_back(YamlBFDemangledName);
std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
ProfiledBFNamespaces.push_back(YamlBFNamespace);
NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
}

StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;

// Maps namespaces to BFs excluding binary functions with no equal sized
// profiled functions belonging to the same namespace.
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
std::string DemangledName = BF->getDemangledName();
std::string Namespace = DeriveNameSpace(DemangledName);

auto NamespaceToProfiledBFSizesIt =
NamespaceToProfiledBFSizes.find(Namespace);
// Skip if there are no ProfileBFs with a given \p Namespace.
if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
continue;
// Skip if there are no ProfileBFs in a given \p Namespace with
// equal number of blocks.
if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
continue;
auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
if (NamespaceToBFsIt == NamespaceToBFs.end())
NamespaceToBFs[Namespace] = {BF};
else
NamespaceToBFsIt->second.push_back(BF);
}

// Iterates through all profiled functions and binary functions belonging to
// the same namespace and matches based on edit distance threshold.
assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
if (YamlBF.Used)
continue;
// Skip if there are no BFs in a given \p Namespace.
auto It = NamespaceToBFs.find(YamlBFNamespace);
if (It == NamespaceToBFs.end())
continue;

std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
std::vector<BinaryFunction *> BFs = It->second;
unsigned MinEditDistance = UINT_MAX;
BinaryFunction *ClosestNameBF = nullptr;

// Determines BF the closest to the profiled function, in the
// same namespace.
for (BinaryFunction *BF : BFs) {
if (ProfiledFunctions.count(BF))
continue;
if (BF->size() != YamlBF.NumBasicBlocks)
continue;
std::string BFDemangledName = BF->getDemangledName();
unsigned BFEditDistance =
StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
if (BFEditDistance < MinEditDistance) {
MinEditDistance = BFEditDistance;
ClosestNameBF = BF;
}
}

if (ClosestNameBF &&
MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
++MatchedWithNameSimilarity;
}
}

return MatchedWithNameSimilarity;
}

Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: YAML profile with hash: ";
switch (YamlBP.Header.HashFunction) {
case HashFunction::StdHash:
outs() << "std::hash\n";
break;
case HashFunction::XXH3:
outs() << "xxh3\n";
break;
}
}
YamlProfileToFunction.resize(YamlBP.Functions.size() + 1);

// Computes hash for binary functions.
if (opts::MatchProfileWithFunctionHash) {
for (auto &[_, BF] : BC.getBinaryFunctions()) {
BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
} else if (!opts::IgnoreHash) {
for (BinaryFunction *BF : ProfileBFs) {
if (!BF)
continue;
BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
}
}

const size_t MatchedWithExactName = matchWithExactName();
const size_t MatchedWithHash = matchWithHash(BC);
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);

for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
matchProfileToFunction(YamlBF, *BF);


for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
Expand All @@ -473,6 +603,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}

// Set for parseFunctionProfile().
Expand Down
132 changes: 83 additions & 49 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,15 @@ void DWARFRewriter::updateDebugInfo() {

} else {
LocListWritersByCU[CUIndex] = std::make_unique<DebugLocWriter>();
if (std::optional<uint64_t> DWOId = CU.getDWOId()) {
assert(LegacyRangesWritersByCU.count(*DWOId) == 0 &&
"LegacyRangeLists writer for DWO unit already exists.");
auto LegacyRangesSectionWriterByCU =
std::make_unique<DebugRangesSectionWriter>();
LegacyRangesSectionWriterByCU->initSection(CU);
LegacyRangesWritersByCU[*DWOId] =
std::move(LegacyRangesSectionWriterByCU);
}
}
return LocListWritersByCU[CUIndex++].get();
};
Expand Down Expand Up @@ -693,6 +702,7 @@ void DWARFRewriter::updateDebugInfo() {
if (Unit->getVersion() >= 5) {
TempRangesSectionWriter = RangeListsWritersByCU[*DWOId].get();
} else {
TempRangesSectionWriter = LegacyRangesWritersByCU[*DWOId].get();
RangesBase = RangesSectionWriter->getSectionOffset();
setDwoRangesBase(*DWOId, *RangesBase);
}
Expand Down Expand Up @@ -1274,9 +1284,17 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
}

if (RangesBaseInfo) {
DIEBldr.replaceValue(&Die, RangesBaseInfo.getAttribute(),
RangesBaseInfo.getForm(),
DIEInteger(static_cast<uint32_t>(*RangesBase)));
if (RangesBaseInfo.getAttribute() == dwarf::DW_AT_GNU_ranges_base) {
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
} else {
DIEBldr.replaceValue(&Die, RangesBaseInfo.getAttribute(),
RangesBaseInfo.getForm(),
DIEInteger(static_cast<uint32_t>(*RangesBase)));
}
RangesBase = std::nullopt;
}
}
Expand All @@ -1294,20 +1312,12 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
RangesAttrInfo.getForm() == dwarf::DW_FORM_sec_offset)
NeedConverted = true;

uint64_t CurRangeBase = 0;
if (Unit.isDWOUnit()) {
if (std::optional<uint64_t> DWOId = Unit.getDWOId())
CurRangeBase = getDwoRangesBase(*DWOId);
else
errs() << "BOLT-WARNING: [internal-dwarf-error]: DWOId is not found "
"for DWO Unit.";
}
if (NeedConverted || RangesAttrInfo.getForm() == dwarf::DW_FORM_rnglistx)
DIEBldr.replaceValue(&Die, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx,
DIEInteger(DebugRangesOffset));
else
DIEBldr.replaceValue(&Die, dwarf::DW_AT_ranges, RangesAttrInfo.getForm(),
DIEInteger(DebugRangesOffset - CurRangeBase));
DIEInteger(DebugRangesOffset));

if (!RangesBase) {
if (LowPCAttrInfo &&
Expand All @@ -1324,15 +1334,21 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(

// If we are at this point we are in the CU/Skeleton CU, and
// DW_AT_GNU_ranges_base or DW_AT_rnglists_base doesn't exist.
if (Unit.getVersion() <= 4)
if (Unit.getVersion() <= 4) {
DIEBldr.addValue(&Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4,
DIEInteger(*RangesBase));
else if (Unit.getVersion() == 5)
DIEInteger(INT_MAX));
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
} else if (Unit.getVersion() == 5) {
DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
else
} else {
DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
}
return;
}

Expand Down Expand Up @@ -1611,6 +1627,30 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder,
DIEStreamer &Streamer,
CUOffsetMap &CUMap,
const std::list<DWARFUnit *> &CUs) {
for (DWARFUnit *CU : CUs) {
if (CU->getVersion() != 4)
continue;
std::optional<uint64_t> DWOId = CU->getDWOId();
if (!DWOId)
continue;
auto RangesWriterIterator = LegacyRangesWritersByCU.find(*DWOId);
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
std::unique_ptr<DebugRangesSectionWriter> &LegacyRangesWriter =
RangesWriterIterator->second;
std::optional<DIE *> Die = LegacyRangesWriter->getDie();
if (!Die || !Die.value())
continue;
DIEValue DvalGNUBase =
Die.value()->findAttribute(dwarf::DW_AT_GNU_ranges_base);
assert(DvalGNUBase && "GNU_ranges_base attribute does not exist for DWOId");
DIEBlder.replaceValue(
Die.value(), dwarf::DW_AT_GNU_ranges_base, DvalGNUBase.getForm(),
DIEInteger(LegacyRangesSectionWriter->getSectionOffset()));
std::unique_ptr<DebugBufferVector> RangesWritersContents =
LegacyRangesWriter->releaseBuffer();
LegacyRangesSectionWriter->appendToRangeBuffer(*RangesWritersContents);
}
DIEBlder.generateAbbrevs();
DIEBlder.finish();
// generate debug_info and CUMap
Expand Down Expand Up @@ -2098,7 +2138,6 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
DWARFUnit &Unit, DIEBuilder &DIEBldr, DIE &Die,
uint64_t RangesSectionOffset, DIEValue &LowPCAttrInfo,
DIEValue &HighPCAttrInfo, std::optional<uint64_t> RangesBase) {
uint32_t BaseOffset = 0;
dwarf::Form LowForm = LowPCAttrInfo.getForm();
dwarf::Attribute RangeBaseAttribute = dwarf::DW_AT_GNU_ranges_base;
dwarf::Form RangesForm = dwarf::DW_FORM_sec_offset;
Expand All @@ -2113,45 +2152,40 @@ void DWARFRewriter::convertToRangesPatchDebugInfo(
Die.getTag() == dwarf::DW_TAG_skeleton_unit;
if (!IsUnitDie)
DIEBldr.deleteValue(&Die, LowPCAttrInfo.getAttribute());
// In DWARF4 for DW_AT_low_pc in binary DW_FORM_addr is used. In the DWO
// section DW_FORM_GNU_addr_index is used. So for if we are converting
// DW_AT_low_pc/DW_AT_high_pc and see DW_FORM_GNU_addr_index. We are
// converting in DWO section, and DW_AT_ranges [DW_FORM_sec_offset] is
// relative to DW_AT_GNU_ranges_base.
if (LowForm == dwarf::DW_FORM_GNU_addr_index) {
// Ranges are relative to DW_AT_GNU_ranges_base.
uint64_t CurRangeBase = 0;
if (std::optional<uint64_t> DWOId = Unit.getDWOId()) {
CurRangeBase = getDwoRangesBase(*DWOId);
}
BaseOffset = CurRangeBase;
} else {
// In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
// DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
// when it's absent.
if (IsUnitDie) {
if (LowForm == dwarf::DW_FORM_addrx) {
const uint32_t Index = AddrWriter->getIndexFromAddress(0, Unit);
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(Index));
} else {
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(0));
}

// In DWARF 5 we can have DW_AT_low_pc either as DW_FORM_addr, or
// DW_FORM_addrx. Former is when DW_AT_rnglists_base is present. Latter is
// when it's absent.
if (IsUnitDie) {
if (LowForm == dwarf::DW_FORM_addrx) {
const uint32_t Index = AddrWriter->getIndexFromAddress(0, Unit);
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(Index));
} else {
DIEBldr.replaceValue(&Die, LowPCAttrInfo.getAttribute(),
LowPCAttrInfo.getForm(), DIEInteger(0));
}
// Original CU didn't have DW_AT_*_base. We converted it's children (or
// dwo), so need to insert it into CU.
if (RangesBase)
}
// Original CU didn't have DW_AT_*_base. We converted it's children (or
// dwo), so need to insert it into CU.
if (RangesBase) {
if (Unit.getVersion() >= 5) {
DIEBldr.addValue(&Die, RangeBaseAttribute, dwarf::DW_FORM_sec_offset,
DIEInteger(*RangesBase));
} else {
DIEBldr.addValue(&Die, RangeBaseAttribute, dwarf::DW_FORM_sec_offset,
DIEInteger(INT_MAX));
auto RangesWriterIterator =
LegacyRangesWritersByCU.find(*Unit.getDWOId());
assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
"RangesWriter does not exist for DWOId");
RangesWriterIterator->second->setDie(&Die);
}
}

uint64_t RangeAttrVal = RangesSectionOffset - BaseOffset;
if (Unit.getVersion() >= 5)
RangeAttrVal = RangesSectionOffset;
// HighPC was conveted into DW_AT_ranges.
// For DWARF5 we only access ranges through index.

DIEBldr.replaceValue(&Die, HighPCAttrInfo.getAttribute(), dwarf::DW_AT_ranges,
RangesForm, DIEInteger(RangeAttrVal));
RangesForm, DIEInteger(RangesSectionOffset));
}
22 changes: 0 additions & 22 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;

namespace opts {

extern cl::opt<MacroFusionType> AlignMacroOpFusion;
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
Expand Down Expand Up @@ -1969,12 +1968,6 @@ void RewriteInstance::adjustCommandLineOptions() {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->adjustCommandLineOptions(*BC);

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
BC->outs()
<< "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (BC->isX86() && BC->MAB->allowAutoPadding()) {
if (!BC->HasRelocations) {
BC->errs()
Expand All @@ -1985,13 +1978,6 @@ void RewriteInstance::adjustCommandLineOptions() {
BC->outs()
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
"may take several minutes\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
"mode\n";
opts::AlignMacroOpFusion = MFT_NONE;
}

if (opts::SplitEH && !BC->HasRelocations) {
Expand All @@ -2013,14 +1999,6 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = true;
}

if (BC->isX86() && BC->HasRelocations &&
opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
BC->outs()
<< "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
"was specified\n";
opts::AlignMacroOpFusion = MFT_ALL;
}

if (!BC->HasRelocations &&
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
BC->errs() << "BOLT-ERROR: function reordering only works when "
Expand Down
33 changes: 27 additions & 6 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
*AArch64ExprB.getSubExpr(), Comp);
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
return false;
}

bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
return false;
}
Expand Down Expand Up @@ -706,8 +702,20 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension);
AArch64_AM::ShiftExtendType ExtendType =
AArch64_AM::getArithExtendType(OperandExtension);
if (ShiftVal != 2)
llvm_unreachable("Failed to match indirect branch! (fragment 2)");
if (ShiftVal != 2) {
// TODO: Handle the patten where ShiftVal != 2.
// The following code sequence below has no shift amount,
// the range could be 0 to 4.
// The pattern comes from libc, it occurs when the binary is static.
// adr x6, 0x219fb0 <sigall_set+0x88>
// add x6, x6, x14, lsl #2
// ldr w7, [x6]
// add x6, x6, w7, sxtw => no shift amount
// br x6
errs() << "BOLT-WARNING: "
"Failed to match indirect branch: ShiftVAL != 2 \n";
return false;
}

if (ExtendType == AArch64_AM::SXTB)
ScaleValue = 1LL;
Expand Down Expand Up @@ -752,6 +760,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return true;
}

if (DefJTBaseAdd->getOpcode() == AArch64::ADR) {
// TODO: Handle the pattern where there is no adrp/add pair.
// It also occurs when the binary is static.
// adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
// ldrh w13, [x13, w12, uxtw #1]
// adr x12, 0x247b30 <__gettextparse+0x5b0>
// add x13, x12, w13, sxth #2
// br x13
errs() << "BOLT-WARNING: Failed to match indirect branch: "
"nop/adr instead of adrp/add \n";
return false;
}

assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
"Failed to match jump table base address pattern! (1)");

Expand Down
34 changes: 0 additions & 34 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,40 +661,6 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
}

bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
const auto *I = Insts.begin();
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;

const MCInst &FirstInst = *I;
++I;
while (I != Insts.end() && isPrefix(*I))
++I;
if (I == Insts.end())
return false;
const MCInst &SecondInst = *I;

if (!isConditionalBranch(SecondInst))
return false;
// Cannot fuse if the first instruction uses RIP-relative memory.
if (hasPCRelOperand(FirstInst))
return false;

const X86::FirstMacroFusionInstKind CmpKind =
X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
return false;

X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
X86::SecondMacroFusionInstKind BranchKind =
X86::classifySecondCondCodeInMacroFusion(CC);
if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
return false;
return X86::isMacroFused(CmpKind, BranchKind);
}

std::optional<X86MemOperand>
evaluateX86MemoryOperand(const MCInst &Inst) const override {
int MemOpNo = getMemoryOperandNo(Inst);
Expand Down
83 changes: 83 additions & 0 deletions bolt/test/AArch64/test-indirect-branch.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Test how BOLT handles indirect branch sequence of instructions in
// AArch64MCPlus builder.

// clang-format off

// REQUIRES: system-linux
// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
// RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict\
// RUN: -v=1 2>&1 | FileCheck %s

// Pattern 1: there is no shift amount after the 'add' instruction.
//
// adr x6, 0x219fb0 <sigall_set+0x88>
// add x6, x6, x14, lsl #2
// ldr w7, [x6]
// add x6, x6, w7, sxtw => no shift amount
// br x6
//

// Pattern 2: nop/adr pair is used in place of adrp/add
//
// nop => nop/adr instead of adrp/add
// adr x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
// ldrh w13, [x13, w12, uxtw #1]
// adr x12, 0x247b30 <__gettextparse+0x5b0>
// add x13, x12, w13, sxth #2
// br x13

.section .text
.align 4
.globl _start
.type _start, %function
_start:
bl test1
bl test2
// mov x0, #4
// mov w8, #93
// svc #0

// Pattern 1
// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
.globl test1
.type test1, %function
test1:
mov x1, #0
adr x3, datatable
add x3, x3, x1, lsl #2
ldr w2, [x3]
add x3, x3, w2, sxtw
br x3
test1_0:
ret
test1_1:
ret
test1_2:
ret

// Pattern 2
// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
.globl test2
.type test2, %function
test2:
nop
adr x3, jump_table
ldrh w3, [x3, x1, lsl #1]
adr x1, test2_0
add x3, x1, w3, sxth #2
br x3
test2_0:
ret
test2_1:
ret

.section .rodata,"a",@progbits
datatable:
.word test1_0-datatable
.word test1_1-datatable
.word test1_2-datatable

jump_table:
.hword (test2_0-test2_0)>>2
.hword (test2_1-test2_0)>>2
6 changes: 3 additions & 3 deletions bolt/test/X86/debug-fission-single-convert.s
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
# CHECK-DWO-DWO: 00000010
# CHECK-DWO-DWO: 00000050
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000030
# CHECK-DWO-DWO: DW_TAG_subprogram
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
# CHECK-DWO-DWO-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050

# CHECK-ADDR-SEC: .debug_addr contents:
# CHECK-ADDR-SEC: 0x00000000: Addrs: [
Expand Down
40 changes: 21 additions & 19 deletions bolt/test/X86/dwarf4-df-dualcu.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,38 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010 [[#%.16x,ADDR:]] [[#%.16x,ADDRB:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000030 [[#%.16x,ADDR1:]] [[#%.16x,ADDR1B:]]
; BOLT-NEXT: 00000030 <End of list>
; BOLT-NEXT: 00000050 [[#%.16x,ADDR2:]] [[#%.16x,ADDR2B:]]
; BOLT-NEXT: 00000050 [[#%.16x,ADDR3:]] [[#%.16x,ADDR3B:]]
; BOLT-NEXT: 00000040 <End of list>
; BOLT-NEXT: 00000050 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000080 [[#%.16x,ADDR4:]] [[#%.16x,ADDR4B:]]
; BOLT-NEXT: 00000080 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR5:]] [[#%.16x,ADDR5B:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 00000070 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000070 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 000000b0 <End of list>
; BOLT-NEXT: 000000c0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000c0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000016] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDR2B]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDR3B]]))
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]]))
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000040)
; BOLT-NEXT: Compile
; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000023] = "helper.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x000000a0
; BOLT-NEXT: [0x[[#ADDR5]], 0x[[#ADDR5B]])
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000080)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x000000b0)

; PRE-BOLT-DWO-MAIN: version = 0x0004
; PRE-BOLT-DWO-MAIN: DW_TAG_compile_unit
Expand Down Expand Up @@ -113,13 +115,13 @@
; BOLT-DWO-MAIN-NEXT: DW_AT_decl_line
; BOLT-DWO-MAIN-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x1)
; BOLT-DWO-MAIN: DW_TAG_subprogram [4]
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN-NEXT: )
; BOLT-DWO-MAIN-NEXT: DW_AT_frame_base
; BOLT-DWO-MAIN-NEXT: DW_AT_linkage_name [DW_FORM_GNU_str_index] (indexed (00000003) string = "_Z3usePiS_")
; BOLT-DWO-MAIN-NEXT: DW_AT_name [DW_FORM_GNU_str_index] (indexed (00000004) string = "use")
; BOLT-DWO-MAIN: DW_TAG_subprogram [6]
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000020
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000030
; BOLT-DWO-MAIN-NEXT: )
; BOLT-DWO-MAIN-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_reg6 RBP)
; BOLT-DWO-MAIN-NEXT: DW_AT_name [DW_FORM_GNU_str_index] (indexed (00000005) string = "main")
Expand Down Expand Up @@ -160,4 +162,4 @@
; BOLT-DWO-HELPER-NEXT: DW_AT_decl_line
; BOLT-DWO-HELPER-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_GNU_addr_index 0x1)
; BOLT-DWO-HELPER: DW_TAG_subprogram [4]
; BOLT-DWO-HELPER-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-HELPER-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
80 changes: 42 additions & 38 deletions bolt/test/X86/dwarf4-df-input-lowpc-ranges-cus.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,45 +17,47 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110
; BOLT-NEXT: 00000110 <End of list>
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150
; BOLT-NEXT: 00000150 <End of list>
; BOLT-NEXT: 00000190 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR11:]] [[#%.16x,ADDRB11:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 00000190 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 00000190 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 000000e0 <End of list>
; BOLT-NEXT: 00000120 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR11:]] [[#%.16x,ADDRB11:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 00000120 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 00000120 <End of list>
; BOLT-NEXT: 000001a0 <End of list>
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR8:]] [[#%.16x,ADDRB8:]]
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR9:]] [[#%.16x,ADDRB9:]]
; BOLT-NEXT: 000001b0 [[#%.16x,ADDR10:]] [[#%.16x,ADDRB10:]]
; BOLT-NEXT: 000001b0 <End of list>
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR12:]] [[#%.16x,ADDRB12:]]
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR13:]] [[#%.16x,ADDRB13:]]
; BOLT-NEXT: 000001f0 [[#%.16x,ADDR14:]] [[#%.16x,ADDRB14:]]
; BOLT-NEXT: 000001f0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000090)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
Expand All @@ -64,13 +66,14 @@
; BOLT-NEXT: [0x[[#ADDR6]], 0x[[#ADDRB6]])
; BOLT-NEXT: [0x[[#ADDR7]], 0x[[#ADDRB7]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)
; BOLT-NEXT: Compile Unit

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "mainOther.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000110)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x000001a0)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000190
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000120
; BOLT-NEXT: [0x[[#ADDR8]], 0x[[#ADDRB8]])
; BOLT-NEXT: [0x[[#ADDR9]], 0x[[#ADDRB9]])
; BOLT-NEXT: [0x[[#ADDR10]], 0x[[#ADDRB10]])
Expand All @@ -79,19 +82,20 @@
; BOLT-NEXT: [0x[[#ADDR13]], 0x[[#ADDRB13]])
; BOLT-NEXT: [0x[[#ADDR14]], 0x[[#ADDRB14]])
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000018)
; BOLT: {{^$}}

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
37 changes: 19 additions & 18 deletions bolt/test/X86/dwarf4-df-input-lowpc-ranges.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,30 @@

; BOLT: .debug_ranges
; BOLT-NEXT: 00000000 <End of list>
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010
; BOLT-NEXT: 00000010 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000010 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000010 <End of list>
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050
; BOLT-NEXT: 00000050 <End of list>
; BOLT-NEXT: 00000090 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR4:]] [[#%.16x,ADDRB4:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 00000090 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 00000090 <End of list>
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR1:]] [[#%.16x,ADDRB1:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR2:]] [[#%.16x,ADDRB2:]]
; BOLT-NEXT: 000000a0 [[#%.16x,ADDR3:]] [[#%.16x,ADDRB3:]]
; BOLT-NEXT: 000000a0 <End of list>
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR5:]] [[#%.16x,ADDRB5:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR6:]] [[#%.16x,ADDRB6:]]
; BOLT-NEXT: 000000e0 [[#%.16x,ADDR7:]] [[#%.16x,ADDRB7:]]
; BOLT-NEXT: 000000e0 <End of list>

; BOLT: DW_TAG_compile_unit
; BOLT: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-fA-F]+}}] = "main.dwo.dwo")
; BOLT-NEXT: DW_AT_GNU_dwo_id
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000010)
; BOLT-NEXT: DW_AT_GNU_ranges_base [DW_FORM_sec_offset] (0x00000090)
; BOLT-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000090
; BOLT-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-NEXT: [0x[[#ADDR1]], 0x[[#ADDRB1]])
; BOLT-NEXT: [0x[[#ADDR2]], 0x[[#ADDRB2]])
; BOLT-NEXT: [0x[[#ADDR3]], 0x[[#ADDRB3]])
Expand All @@ -48,9 +49,9 @@
; BOLT-NEXT: DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)

; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000010
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN: DW_TAG_subprogram
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000040
; BOLT-DWO-MAIN-NEXT: DW_AT_ranges [DW_FORM_sec_offset] (0x00000050
11 changes: 5 additions & 6 deletions bolt/test/X86/dwarf5-df-types-debug-names.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@
; BOLT: type_signature = [[TYPE1:0x[0-9a-f]*]]
; BOLT: Compile Unit
; BOLT: type_signature = [[TYPE2:0x[0-9a-f]*]]
; BOLT: type_signature = [[TYPE3:0x[0-9a-f]*]]
; BOLT: type_signature = [[TYPE1]]
; BOLT: Compile Unit
; BOLT: [[OFFSET:0x[0-9a-f]*]]: Compile Unit
; BOLT: [[OFFSET1:0x[0-9a-f]*]]: Compile Unit

; BOLT: Name Index @ 0x0 {
; BOLT-NEXT: Header {
; BOLT-NEXT: Length: 0x17E
; BOLT-NEXT: Length: 0x176
; BOLT-NEXT: Format: DWARF32
; BOLT-NEXT: Version: 5
; BOLT-NEXT: CU count: 2
; BOLT-NEXT: Local TU count: 0
; BOLT-NEXT: Foreign TU count: 4
; BOLT-NEXT: Foreign TU count: 3
; BOLT-NEXT: Bucket count: 9
; BOLT-NEXT: Name count: 9
; BOLT-NEXT: Abbreviations table size: 0x37
Expand All @@ -44,7 +44,6 @@
; BOLT-NEXT: ForeignTU[0]: [[TYPE]]
; BOLT-NEXT: ForeignTU[1]: [[TYPE1]]
; BOLT-NEXT: ForeignTU[2]: [[TYPE2]]
; BOLT-NEXT: ForeignTU[3]: [[TYPE3]]
; BOLT-NEXT: ]
; BOLT-NEXT: Abbreviations [
; BOLT-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
Expand Down Expand Up @@ -173,7 +172,7 @@
; BOLT-NEXT: Entry @ {{.+}} {
; BOLT-NEXT: Abbrev: [[ABBREV]]
; BOLT-NEXT: Tag: DW_TAG_structure_type
; BOLT-NEXT: DW_IDX_type_unit: 0x03
; BOLT-NEXT: DW_IDX_type_unit: 0x01
; BOLT-NEXT: DW_IDX_compile_unit: 0x01
; BOLT-NEXT: DW_IDX_die_offset: 0x00000021
; BOLT-NEXT: DW_IDX_parent: <parent not indexed>
Expand Down Expand Up @@ -237,7 +236,7 @@
; BOLT-NEXT: Entry @ {{.+}} {
; BOLT-NEXT: Abbrev: 0x5
; BOLT-NEXT: Tag: DW_TAG_base_type
; BOLT-NEXT: DW_IDX_type_unit: 0x03
; BOLT-NEXT: DW_IDX_type_unit: 0x01
; BOLT-NEXT: DW_IDX_compile_unit: 0x01
; BOLT-NEXT: DW_IDX_die_offset: 0x00000048
; BOLT-NEXT: DW_IDX_parent: <parent not indexed>
Expand Down
63 changes: 63 additions & 0 deletions bolt/test/X86/name-similarity-function-matching.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
## Tests function matching in YAMLProfileReader by name similarity.

# REQUIRES: system-linux
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
# RUN: --print-cfg --name-similarity-function-matching-threshold=1 --funcs=main --profile-ignore-hash=0 2>&1 | FileCheck %s

# CHECK: BOLT-INFO: matched 1 functions with similar names

#--- main.s
.globl main
.type main, @function
main:
.cfi_startproc
.LBB00:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
testq %rax, %rax
js .LBB03
.LBB01:
jne .LBB04
.LBB02:
nop
.LBB03:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
.LBB04:
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
## For relocations against .text
.reloc 0, R_X86_64_NONE
.cfi_endproc
.size main, .-main

#--- yaml
---
header:
profile-version: 1
binary-name: 'hashing-based-function-matching.s.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: main2
fid: 0
hash: 0x0000000000000001
exec: 1
nblocks: 5
blocks:
- bid: 1
insns: 1
succ: [ { bid: 3, cnt: 1} ]
...
4 changes: 4 additions & 0 deletions bolt/test/X86/register-fragments-bolt-symbols.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
# PREAGGWARM: B X:0 #chain.warm# 1 0
# RUN: perf2bolt %t.warm.bolt -p %t.preagg.warm --pa -o %t.warm.fdata -w %t.warm.yaml \
# RUN: -v=1 | FileCheck %s --check-prefix=CHECK-BOLT-WARM
# RUN: FileCheck %s --input-file %t.warm.fdata --check-prefix=CHECK-FDATA-WARM
# RUN: FileCheck %s --input-file %t.warm.yaml --check-prefix=CHECK-YAML-WARM

# CHECK-BOLT-WARM: marking chain.warm/1(*2) as a fragment of chain
# CHECK-FDATA-WARM: chain
# CHECK-YAML-WARM: chain

# RUN: sed -i 's|chain|chain/2|g' %t.fdata
# RUN: llvm-objcopy --localize-symbol=chain %t.main.o
Expand Down
2 changes: 2 additions & 0 deletions clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "../ClangTidy.h"
#include "../ClangTidyModule.h"
#include "../ClangTidyModuleRegistry.h"
#include "UseRangesCheck.h"
#include "UseToStringCheck.h"
using namespace clang::ast_matchers;

Expand All @@ -18,6 +19,7 @@ namespace boost {
class BoostModule : public ClangTidyModule {
public:
void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
CheckFactories.registerCheck<UseRangesCheck>("boost-use-ranges");
CheckFactories.registerCheck<UseToStringCheck>("boost-use-to-string");
}
};
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/boost/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS

add_clang_library(clangTidyBoostModule
BoostTidyModule.cpp
UseRangesCheck.cpp
UseToStringCheck.cpp

LINK_LIBS
Expand Down
371 changes: 371 additions & 0 deletions clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,371 @@
//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "UseRangesCheck.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <initializer_list>
#include <optional>
#include <string>

// FixItHint - Let the docs script know that this class does provide fixits

namespace clang::tidy::boost {

namespace {
/// Base replacer that handles the boost include path and namespace
class BoostReplacer : public UseRangesCheck::Replacer {
public:
BoostReplacer(ArrayRef<UseRangesCheck::Signature> Signatures,
bool IncludeSystem)
: Signatures(Signatures), IncludeSystem(IncludeSystem) {}

ArrayRef<UseRangesCheck::Signature> getReplacementSignatures() const final {
return Signatures;
}

virtual std::pair<StringRef, StringRef>
getBoostName(const NamedDecl &OriginalName) const = 0;

virtual std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl &OriginalName) const = 0;

std::optional<std::string>
getReplaceName(const NamedDecl &OriginalName) const final {
auto [Namespace, Function] = getBoostName(OriginalName);
return ("boost::" + Namespace + (Namespace.empty() ? "" : "::") + Function)
.str();
}

std::optional<std::string>
getHeaderInclusion(const NamedDecl &OriginalName) const final {
auto [Path, HeaderName] = getBoostHeader(OriginalName);
return ((IncludeSystem ? "<boost/" : "boost/") + Path +
(Path.empty() ? "" : "/") + HeaderName +
(IncludeSystem ? ".hpp>" : ".hpp"))
.str();
}

private:
SmallVector<UseRangesCheck::Signature> Signatures;
bool IncludeSystem;
};

/// Creates replaces where the header file lives in
/// `boost/algorithm/<FUNC_NAME>.hpp` and the function is named
/// `boost::range::<FUNC_NAME>`
class BoostRangeAlgorithmReplacer : public BoostReplacer {
public:
using BoostReplacer::BoostReplacer;

std::pair<StringRef, StringRef>
getBoostName(const NamedDecl &OriginalName) const override {
return {"range", OriginalName.getName()};
}

std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl &OriginalName) const override {
return {"range/algorithm", OriginalName.getName()};
}
};

/// Creates replaces where the header file lives in
/// `boost/algorithm/<CUSTOM_HEADER>.hpp` and the function is named
/// `boost::range::<FUNC_NAME>`
class CustomBoostAlgorithmHeaderReplacer : public BoostRangeAlgorithmReplacer {
public:
CustomBoostAlgorithmHeaderReplacer(
StringRef HeaderName, ArrayRef<UseRangesCheck::Signature> Signatures,
bool IncludeSystem)
: BoostRangeAlgorithmReplacer(Signatures, IncludeSystem),
HeaderName(HeaderName) {}

std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl & /*OriginalName*/) const override {
return {"range/algorithm", HeaderName};
}

private:
StringRef HeaderName;
};

/// Creates replaces where the header file lives in
/// `boost/algorithm/<SUB_HEADER>.hpp` and the function is named
/// `boost::algorithm::<FUNC_NAME>`
class BoostAlgorithmReplacer : public BoostReplacer {
public:
BoostAlgorithmReplacer(StringRef SubHeader,
ArrayRef<UseRangesCheck::Signature> Signatures,
bool IncludeSystem)
: BoostReplacer(Signatures, IncludeSystem),
SubHeader(("algorithm/" + SubHeader).str()) {}
std::pair<StringRef, StringRef>
getBoostName(const NamedDecl &OriginalName) const override {
return {"algorithm", OriginalName.getName()};
}

std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl &OriginalName) const override {
return {SubHeader, OriginalName.getName()};
}

private:
std::string SubHeader;
};

/// Creates replaces where the header file lives in
/// `boost/algorithm/<SUB_HEADER>/<HEADER_NAME>.hpp` and the function is named
/// `boost::algorithm::<FUNC_NAME>`
class CustomBoostAlgorithmReplacer : public BoostReplacer {
public:
CustomBoostAlgorithmReplacer(StringRef SubHeader, StringRef HeaderName,
ArrayRef<UseRangesCheck::Signature> Signatures,
bool IncludeSystem)
: BoostReplacer(Signatures, IncludeSystem),
SubHeader(("algorithm/" + SubHeader).str()), HeaderName(HeaderName) {}
std::pair<StringRef, StringRef>
getBoostName(const NamedDecl &OriginalName) const override {
return {"algorithm", OriginalName.getName()};
}

std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl & /*OriginalName*/) const override {
return {SubHeader, HeaderName};
}

private:
std::string SubHeader;
StringRef HeaderName;
};

/// A Replacer that is used for functions that just call a new overload
class MakeOverloadReplacer : public UseRangesCheck::Replacer {
public:
explicit MakeOverloadReplacer(ArrayRef<UseRangesCheck::Signature> Signatures)
: Signatures(Signatures) {}

ArrayRef<UseRangesCheck::Signature>
getReplacementSignatures() const override {
return Signatures;
}

std::optional<std::string>
getReplaceName(const NamedDecl & /* OriginalName */) const override {
return std::nullopt;
}

std::optional<std::string>
getHeaderInclusion(const NamedDecl & /* OriginalName */) const override {
return std::nullopt;
}

private:
SmallVector<UseRangesCheck::Signature> Signatures;
};

/// A replacer that replaces functions with an equivalent named function in the
/// root boost namespace
class FixedBoostReplace : public BoostReplacer {
public:
FixedBoostReplace(StringRef Header,
ArrayRef<UseRangesCheck::Signature> Signatures,
bool IncludeBoostSystem)
: BoostReplacer(Signatures, IncludeBoostSystem), Header(Header) {}

std::pair<StringRef, StringRef>
getBoostName(const NamedDecl &OriginalName) const override {
return {{}, OriginalName.getName()};
}

std::pair<StringRef, StringRef>
getBoostHeader(const NamedDecl & /* OriginalName */) const override {
return {{}, Header};
}

private:
StringRef Header;
};

} // namespace

utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const {

ReplacerMap Results;
static const Signature SingleSig = {{0}};
static const Signature TwoSig = {{0}, {2}};
static const auto AddFrom =
[&Results](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
std::initializer_list<StringRef> Names, StringRef Prefix) {
llvm::SmallString<64> Buffer;
for (const auto &Name : Names) {
Buffer.assign({"::", Prefix, (Prefix.empty() ? "" : "::"), Name});
Results.try_emplace(Buffer, Replacer);
}
};

static const auto AddFromStd =
[](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
std::initializer_list<StringRef> Names) {
AddFrom(Replacer, Names, "std");
};

static const auto AddFromBoost =
[](llvm::IntrusiveRefCntPtr<UseRangesCheck::Replacer> Replacer,
std::initializer_list<
std::pair<StringRef, std::initializer_list<StringRef>>>
NamespaceAndNames) {
for (auto [Namespace, Names] : NamespaceAndNames)
AddFrom(Replacer, Names,
SmallString<64>{"boost", (Namespace.empty() ? "" : "::"),
Namespace});
};

AddFromStd(llvm::makeIntrusiveRefCnt<CustomBoostAlgorithmHeaderReplacer>(
"set_algorithm", TwoSig, IncludeBoostSystem),
{"includes", "set_union", "set_intersection", "set_difference",
"set_symmetric_difference"});

AddFromStd(llvm::makeIntrusiveRefCnt<BoostRangeAlgorithmReplacer>(
SingleSig, IncludeBoostSystem),
{"unique", "lower_bound", "stable_sort",
"equal_range", "remove_if", "sort",
"random_shuffle", "remove_copy", "stable_partition",
"remove_copy_if", "count", "copy_backward",
"reverse_copy", "adjacent_find", "remove",
"upper_bound", "binary_search", "replace_copy_if",
"for_each", "generate", "count_if",
"min_element", "reverse", "replace_copy",
"fill", "unique_copy", "transform",
"copy", "replace", "find",
"replace_if", "find_if", "partition",
"max_element"});

AddFromStd(llvm::makeIntrusiveRefCnt<BoostRangeAlgorithmReplacer>(
TwoSig, IncludeBoostSystem),
{"find_end", "merge", "partial_sort_copy", "find_first_of",
"search", "lexicographical_compare", "equal", "mismatch"});

AddFromStd(llvm::makeIntrusiveRefCnt<CustomBoostAlgorithmHeaderReplacer>(
"permutation", SingleSig, IncludeBoostSystem),
{"next_permutation", "prev_permutation"});

AddFromStd(llvm::makeIntrusiveRefCnt<CustomBoostAlgorithmHeaderReplacer>(
"heap_algorithm", SingleSig, IncludeBoostSystem),
{"push_heap", "pop_heap", "make_heap", "sort_heap"});

AddFromStd(llvm::makeIntrusiveRefCnt<BoostAlgorithmReplacer>(
"cxx11", SingleSig, IncludeBoostSystem),
{"copy_if", "is_permutation", "is_partitioned", "find_if_not",
"partition_copy", "any_of", "iota", "all_of", "partition_point",
"is_sorted", "none_of"});

AddFromStd(llvm::makeIntrusiveRefCnt<CustomBoostAlgorithmReplacer>(
"cxx11", "is_sorted", SingleSig, IncludeBoostSystem),
{"is_sorted_until"});

AddFromStd(llvm::makeIntrusiveRefCnt<FixedBoostReplace>(
"range/numeric", SingleSig, IncludeBoostSystem),
{"accumulate", "partial_sum", "adjacent_difference"});

if (getLangOpts().CPlusPlus17)
AddFromStd(llvm::makeIntrusiveRefCnt<BoostAlgorithmReplacer>(
"cxx17", SingleSig, IncludeBoostSystem),
{"reduce"});

AddFromBoost(llvm::makeIntrusiveRefCnt<MakeOverloadReplacer>(SingleSig),
{{"algorithm",
{"reduce",
"find_backward",
"find_not_backward",
"find_if_backward",
"find_if_not_backward",
"hex",
"hex_lower",
"unhex",
"is_partitioned_until",
"is_palindrome",
"copy_if",
"copy_while",
"copy_until",
"copy_if_while",
"copy_if_until",
"is_permutation",
"is_partitioned",
"one_of",
"one_of_equal",
"find_if_not",
"partition_copy",
"any_of",
"any_of_equal",
"iota",
"all_of",
"all_of_equal",
"partition_point",
"is_sorted_until",
"is_sorted",
"is_increasing",
"is_decreasing",
"is_strictly_increasing",
"is_strictly_decreasing",
"none_of",
"none_of_equal",
"clamp_range"}}});

AddFromBoost(
llvm::makeIntrusiveRefCnt<MakeOverloadReplacer>(TwoSig),
{{"algorithm", {"apply_permutation", "apply_reverse_permutation"}}});

return Results;
}

UseRangesCheck::UseRangesCheck(StringRef Name, ClangTidyContext *Context)
: utils::UseRangesCheck(Name, Context),
IncludeBoostSystem(Options.get("IncludeBoostSystem", true)) {}

void UseRangesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
utils::UseRangesCheck::storeOptions(Opts);
Options.store(Opts, "IncludeBoostSystem", IncludeBoostSystem);
}
DiagnosticBuilder UseRangesCheck::createDiag(const CallExpr &Call) {
DiagnosticBuilder D =
diag(Call.getBeginLoc(), "use a %0 version of this algorithm");
D << (Call.getDirectCallee()->isInStdNamespace() ? "boost" : "ranged");
return D;
}
ArrayRef<std::pair<StringRef, StringRef>>
UseRangesCheck::getFreeBeginEndMethods() const {
static const std::pair<StringRef, StringRef> Refs[] = {
{"::std::begin", "::std::end"},
{"::std::cbegin", "::std::cend"},
{"::boost::range_adl_barrier::begin", "::boost::range_adl_barrier::end"},
{"::boost::range_adl_barrier::const_begin",
"::boost::range_adl_barrier::const_end"},
};
return Refs;
}
std::optional<UseRangesCheck::ReverseIteratorDescriptor>
UseRangesCheck::getReverseDescriptor() const {
static const std::pair<StringRef, StringRef> Refs[] = {
{"::std::rbegin", "::std::rend"},
{"::std::crbegin", "::std::crend"},
{"::boost::rbegin", "::boost::rend"},
{"::boost::const_rbegin", "::boost::const_rend"},
};
return ReverseIteratorDescriptor{"boost::adaptors::reverse",
IncludeBoostSystem
? "<boost/range/adaptor/reversed.hpp>"
: "boost/range/adaptor/reversed.hpp",
Refs};
}
} // namespace clang::tidy::boost
43 changes: 43 additions & 0 deletions clang-tools-extra/clang-tidy/boost/UseRangesCheck.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H

#include "../utils/UseRangesCheck.h"

namespace clang::tidy::boost {

/// Detects calls to standard library iterator algorithms that could be
/// replaced with a boost ranges version instead
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/boost/use-ranges.html
class UseRangesCheck : public utils::UseRangesCheck {
public:
UseRangesCheck(StringRef Name, ClangTidyContext *Context);

void storeOptions(ClangTidyOptions::OptionMap &Options) override;

ReplacerMap getReplacerMap() const override;

DiagnosticBuilder createDiag(const CallExpr &Call) override;

ArrayRef<std::pair<StringRef, StringRef>>
getFreeBeginEndMethods() const override;

std::optional<ReverseIteratorDescriptor>
getReverseDescriptor() const override;

private:
bool IncludeBoostSystem;
};

} // namespace clang::tidy::boost

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BOOST_USERANGESCHECK_H
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "NotNullTerminatedResultCheck.h"
#include "OptionalValueConversionCheck.h"
#include "ParentVirtualCallCheck.h"
#include "PointerArithmeticOnPolymorphicObjectCheck.h"
#include "PosixReturnCheck.h"
#include "RedundantBranchConditionCheck.h"
#include "ReservedIdentifierCheck.h"
Expand Down Expand Up @@ -171,6 +172,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-multiple-statement-macro");
CheckFactories.registerCheck<OptionalValueConversionCheck>(
"bugprone-optional-value-conversion");
CheckFactories.registerCheck<PointerArithmeticOnPolymorphicObjectCheck>(
"bugprone-pointer-arithmetic-on-polymorphic-object");
CheckFactories.registerCheck<RedundantBranchConditionCheck>(
"bugprone-redundant-branch-condition");
CheckFactories.registerCheck<cppcoreguidelines::NarrowingConversionsCheck>(
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ add_clang_library(clangTidyBugproneModule
NotNullTerminatedResultCheck.cpp
OptionalValueConversionCheck.cpp
ParentVirtualCallCheck.cpp
PointerArithmeticOnPolymorphicObjectCheck.cpp
PosixReturnCheck.cpp
RedundantBranchConditionCheck.cpp
ReservedIdentifierCheck.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//===--- PointerArithmeticOnPolymorphicObjectCheck.cpp - clang-tidy--------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "PointerArithmeticOnPolymorphicObjectCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"

using namespace clang::ast_matchers;

namespace clang::tidy::bugprone {

namespace {
AST_MATCHER(CXXRecordDecl, isAbstract) { return Node.isAbstract(); }
AST_MATCHER(CXXRecordDecl, isPolymorphic) { return Node.isPolymorphic(); }
} // namespace

PointerArithmeticOnPolymorphicObjectCheck::
PointerArithmeticOnPolymorphicObjectCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
IgnoreInheritedVirtualFunctions(
Options.get("IgnoreInheritedVirtualFunctions", false)) {}

void PointerArithmeticOnPolymorphicObjectCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "IgnoreInheritedVirtualFunctions",
IgnoreInheritedVirtualFunctions);
}

void PointerArithmeticOnPolymorphicObjectCheck::registerMatchers(
MatchFinder *Finder) {
const auto PolymorphicPointerExpr =
expr(hasType(hasCanonicalType(pointerType(pointee(hasCanonicalType(
hasDeclaration(cxxRecordDecl(unless(isFinal()), isPolymorphic())
.bind("pointee"))))))))
.bind("pointer");

const auto PointerExprWithVirtualMethod =
expr(hasType(hasCanonicalType(
pointerType(pointee(hasCanonicalType(hasDeclaration(
cxxRecordDecl(
unless(isFinal()),
anyOf(hasMethod(isVirtualAsWritten()), isAbstract()))
.bind("pointee"))))))))
.bind("pointer");

const auto SelectedPointerExpr = IgnoreInheritedVirtualFunctions
? PointerExprWithVirtualMethod
: PolymorphicPointerExpr;

const auto ArraySubscript = arraySubscriptExpr(hasBase(SelectedPointerExpr));

const auto BinaryOperators =
binaryOperator(hasAnyOperatorName("+", "-", "+=", "-="),
hasEitherOperand(SelectedPointerExpr));

const auto UnaryOperators = unaryOperator(
hasAnyOperatorName("++", "--"), hasUnaryOperand(SelectedPointerExpr));

Finder->addMatcher(ArraySubscript, this);
Finder->addMatcher(BinaryOperators, this);
Finder->addMatcher(UnaryOperators, this);
}

void PointerArithmeticOnPolymorphicObjectCheck::check(
const MatchFinder::MatchResult &Result) {
const auto *PointerExpr = Result.Nodes.getNodeAs<Expr>("pointer");
const auto *PointeeDecl = Result.Nodes.getNodeAs<CXXRecordDecl>("pointee");

diag(PointerExpr->getBeginLoc(),
"pointer arithmetic on polymorphic object of type %0 can result in "
"undefined behavior if the dynamic type differs from the pointer type")
<< PointeeDecl << PointerExpr->getSourceRange();
}

} // namespace clang::tidy::bugprone
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===--- PointerArithmeticOnPolymorphicObjectCheck.h ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POINTERARITHMETICONPOLYMORPHICOBJECTCHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POINTERARITHMETICONPOLYMORPHICOBJECTCHECK_H

#include "../ClangTidyCheck.h"

namespace clang::tidy::bugprone {

/// Finds pointer arithmetic performed on classes that contain a
/// virtual function.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.html
class PointerArithmeticOnPolymorphicObjectCheck : public ClangTidyCheck {
public:
PointerArithmeticOnPolymorphicObjectCheck(StringRef Name,
ClangTidyContext *Context);
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
return LangOpts.CPlusPlus;
}
std::optional<TraversalKind> getCheckTraversalKind() const override {
return TK_IgnoreUnlessSpelledInSource;
}

private:
const bool IgnoreInheritedVirtualFunctions;
};

} // namespace clang::tidy::bugprone

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_POINTERARITHMETICONPOLYMORPHICOBJECTCHECK_H
42 changes: 33 additions & 9 deletions clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h"
#include "clang/Analysis/CFG.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"

#include "../utils/ExprSequence.h"
#include "../utils/Matchers.h"
Expand All @@ -34,7 +36,12 @@ struct UseAfterMove {
const DeclRefExpr *DeclRef;

// Is the order in which the move and the use are evaluated undefined?
bool EvaluationOrderUndefined;
bool EvaluationOrderUndefined = false;

// Does the use happen in a later loop iteration than the move?
//
// We default to false and change it to true if required in find().
bool UseHappensInLaterLoopIteration = false;
};

/// Finds uses of a variable after a move (and maintains state required by the
Expand All @@ -48,7 +55,7 @@ class UseAfterMoveFinder {
// use-after-move is found, writes information about it to 'TheUseAfterMove'.
// Returns whether a use-after-move was found.
bool find(Stmt *CodeBlock, const Expr *MovingCall,
const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove);
const DeclRefExpr *MovedVariable, UseAfterMove *TheUseAfterMove);

private:
bool findInternal(const CFGBlock *Block, const Expr *MovingCall,
Expand Down Expand Up @@ -89,7 +96,7 @@ UseAfterMoveFinder::UseAfterMoveFinder(ASTContext *TheContext)
: Context(TheContext) {}

bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall,
const ValueDecl *MovedVariable,
const DeclRefExpr *MovedVariable,
UseAfterMove *TheUseAfterMove) {
// Generate the CFG manually instead of through an AnalysisDeclContext because
// it seems the latter can't be used to generate a CFG for the body of a
Expand All @@ -110,15 +117,32 @@ bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall,
BlockMap = std::make_unique<StmtToBlockMap>(TheCFG.get(), Context);
Visited.clear();

const CFGBlock *Block = BlockMap->blockContainingStmt(MovingCall);
if (!Block) {
const CFGBlock *MoveBlock = BlockMap->blockContainingStmt(MovingCall);
if (!MoveBlock) {
// This can happen if MovingCall is in a constructor initializer, which is
// not included in the CFG because the CFG is built only from the function
// body.
Block = &TheCFG->getEntry();
MoveBlock = &TheCFG->getEntry();
}

return findInternal(Block, MovingCall, MovedVariable, TheUseAfterMove);
bool Found = findInternal(MoveBlock, MovingCall, MovedVariable->getDecl(),
TheUseAfterMove);

if (Found) {
if (const CFGBlock *UseBlock =
BlockMap->blockContainingStmt(TheUseAfterMove->DeclRef)) {
// Does the use happen in a later loop iteration than the move?
// - If they are in the same CFG block, we know the use happened in a
// later iteration if we visited that block a second time.
// - Otherwise, we know the use happened in a later iteration if the
// move is reachable from the use.
CFGReverseBlockReachabilityAnalysis CFA(*TheCFG);
TheUseAfterMove->UseHappensInLaterLoopIteration =
UseBlock == MoveBlock ? Visited.contains(UseBlock)
: CFA.isReachable(UseBlock, MoveBlock);
}
}
return Found;
}

bool UseAfterMoveFinder::findInternal(const CFGBlock *Block,
Expand Down Expand Up @@ -394,7 +418,7 @@ static void emitDiagnostic(const Expr *MovingCall, const DeclRefExpr *MoveArg,
"there is no guarantee about the order in which they are evaluated",
DiagnosticIDs::Note)
<< IsMove;
} else if (UseLoc < MoveLoc || Use.DeclRef == MoveArg) {
} else if (Use.UseHappensInLaterLoopIteration) {
Check->diag(UseLoc,
"the use happens in a later loop iteration than the "
"%select{forward|move}0",
Expand Down Expand Up @@ -495,7 +519,7 @@ void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) {
for (Stmt *CodeBlock : CodeBlocks) {
UseAfterMoveFinder Finder(Result.Context);
UseAfterMove Use;
if (Finder.find(CodeBlock, MovingCall, Arg->getDecl(), &Use))
if (Finder.find(CodeBlock, MovingCall, Arg, &Use))
emitDiagnostic(MovingCall, Arg, Use, this, Result.Context,
determineMoveType(MoveDecl));
}
Expand Down
5 changes: 5 additions & 0 deletions clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "../ClangTidyModule.h"
#include "../ClangTidyModuleRegistry.h"
#include "../bugprone/BadSignalToKillThreadCheck.h"
#include "../bugprone/PointerArithmeticOnPolymorphicObjectCheck.h"
#include "../bugprone/ReservedIdentifierCheck.h"
#include "../bugprone/SignalHandlerCheck.h"
#include "../bugprone/SignedCharMisuseCheck.h"
Expand Down Expand Up @@ -238,6 +239,10 @@ class CERTModule : public ClangTidyModule {
// CON
CheckFactories.registerCheck<bugprone::SpuriouslyWakeUpFunctionsCheck>(
"cert-con54-cpp");
// CTR
CheckFactories
.registerCheck<bugprone::PointerArithmeticOnPolymorphicObjectCheck>(
"cert-ctr56-cpp");
// DCL
CheckFactories.registerCheck<VariadicFunctionDefCheck>("cert-dcl50-cpp");
CheckFactories.registerCheck<bugprone::ReservedIdentifierCheck>(
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/modernize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ add_clang_library(clangTidyModernizeModule
UseNoexceptCheck.cpp
UseNullptrCheck.cpp
UseOverrideCheck.cpp
UseRangesCheck.cpp
UseStartsEndsWithCheck.cpp
UseStdFormatCheck.cpp
UseStdNumbersCheck.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "UseNoexceptCheck.h"
#include "UseNullptrCheck.h"
#include "UseOverrideCheck.h"
#include "UseRangesCheck.h"
#include "UseStartsEndsWithCheck.h"
#include "UseStdFormatCheck.h"
#include "UseStdNumbersCheck.h"
Expand Down Expand Up @@ -75,6 +76,7 @@ class ModernizeModule : public ClangTidyModule {
CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
CheckFactories.registerCheck<UseDesignatedInitializersCheck>(
"modernize-use-designated-initializers");
CheckFactories.registerCheck<UseRangesCheck>("modernize-use-ranges");
CheckFactories.registerCheck<UseStartsEndsWithCheck>(
"modernize-use-starts-ends-with");
CheckFactories.registerCheck<UseStdFormatCheck>("modernize-use-std-format");
Expand Down
185 changes: 185 additions & 0 deletions clang-tools-extra/clang-tidy/modernize/UseRangesCheck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "UseRangesCheck.h"
#include "clang/AST/Decl.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <initializer_list>

// FixItHint - Let the docs script know that this class does provide fixits

namespace clang::tidy::modernize {

static constexpr const char *SingleRangeNames[] = {
"all_of",
"any_of",
"none_of",
"for_each",
"find",
"find_if",
"find_if_not",
"adjacent_find",
"copy",
"copy_if",
"copy_backward",
"move",
"move_backward",
"fill",
"transform",
"replace",
"replace_if",
"generate",
"remove",
"remove_if",
"remove_copy",
"remove_copy_if",
"unique",
"unique_copy",
"sample",
"partition_point",
"lower_bound",
"upper_bound",
"equal_range",
"binary_search",
"push_heap",
"pop_heap",
"make_heap",
"sort_heap",
"next_permutation",
"prev_permutation",
"reverse",
"reverse_copy",
"shift_left",
"shift_right",
"is_partitioned",
"partition",
"partition_copy",
"stable_partition",
"sort",
"stable_sort",
"is_sorted",
"is_sorted_until",
"is_heap",
"is_heap_until",
"max_element",
"min_element",
"minmax_element",
"uninitialized_copy",
"uninitialized_fill",
"uninitialized_move",
"uninitialized_default_construct",
"uninitialized_value_construct",
"destroy",
};

static constexpr const char *TwoRangeNames[] = {
"equal",
"mismatch",
"partial_sort_copy",
"includes",
"set_union",
"set_intersection",
"set_difference",
"set_symmetric_difference",
"merge",
"lexicographical_compare",
"find_end",
"search",
"is_permutation",
};

namespace {
class StdReplacer : public utils::UseRangesCheck::Replacer {
public:
explicit StdReplacer(SmallVector<UseRangesCheck::Signature> Signatures)
: Signatures(std::move(Signatures)) {}
std::optional<std::string>
getReplaceName(const NamedDecl &OriginalName) const override {
return ("std::ranges::" + OriginalName.getName()).str();
}
ArrayRef<UseRangesCheck::Signature>
getReplacementSignatures() const override {
return Signatures;
}

private:
SmallVector<UseRangesCheck::Signature> Signatures;
};

class StdAlgorithmReplacer : public StdReplacer {
using StdReplacer::StdReplacer;
std::optional<std::string>
getHeaderInclusion(const NamedDecl & /*OriginalName*/) const override {
return "<algorithm>";
}
};

class StdNumericReplacer : public StdReplacer {
using StdReplacer::StdReplacer;
std::optional<std::string>
getHeaderInclusion(const NamedDecl & /*OriginalName*/) const override {
return "<numeric>";
}
};
} // namespace

utils::UseRangesCheck::ReplacerMap UseRangesCheck::getReplacerMap() const {

utils::UseRangesCheck::ReplacerMap Result;

// template<typename Iter> Func(Iter first, Iter last,...).
static const Signature SingleRangeArgs = {{0}};
// template<typename Iter1, typename Iter2>
// Func(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2,...).
static const Signature TwoRangeArgs = {{0}, {2}};

static const Signature SingleRangeFunc[] = {SingleRangeArgs};

static const Signature TwoRangeFunc[] = {TwoRangeArgs};

static const std::pair<ArrayRef<Signature>, ArrayRef<const char *>>
AlgorithmNames[] = {{SingleRangeFunc, SingleRangeNames},
{TwoRangeFunc, TwoRangeNames}};
SmallString<64> Buff;
for (const auto &[Signatures, Values] : AlgorithmNames) {
auto Replacer = llvm::makeIntrusiveRefCnt<StdAlgorithmReplacer>(
SmallVector<UseRangesCheck::Signature>{Signatures});
for (const auto &Name : Values) {
Buff.assign({"::std::", Name});
Result.try_emplace(Buff, Replacer);
}
}
if (getLangOpts().CPlusPlus23)
Result.try_emplace(
"::std::iota",
llvm::makeIntrusiveRefCnt<StdNumericReplacer>(
SmallVector<UseRangesCheck::Signature>{std::begin(SingleRangeFunc),
std::end(SingleRangeFunc)}));
return Result;
}

bool UseRangesCheck::isLanguageVersionSupported(
const LangOptions &LangOpts) const {
return LangOpts.CPlusPlus20;
}
ArrayRef<std::pair<StringRef, StringRef>>
UseRangesCheck::getFreeBeginEndMethods() const {
static const std::pair<StringRef, StringRef> Refs[] = {
{"::std::begin", "::std::end"}, {"::std::cbegin", "::std::cend"}};
return Refs;
}
std::optional<UseRangesCheck::ReverseIteratorDescriptor>
UseRangesCheck::getReverseDescriptor() const {
static const std::pair<StringRef, StringRef> Refs[] = {
{"::std::rbegin", "::std::rend"}, {"::std::crbegin", "::std::crend"}};
return ReverseIteratorDescriptor{"std::views::reverse", "<ranges>", Refs};
}
} // namespace clang::tidy::modernize
38 changes: 38 additions & 0 deletions clang-tools-extra/clang-tidy/modernize/UseRangesCheck.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===--- UseRangesCheck.h - clang-tidy --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H

#include "../utils/UseRangesCheck.h"

namespace clang::tidy::modernize {

/// Detects calls to standard library iterator algorithms that could be
/// replaced with a ranges version instead
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/use-ranges.html
class UseRangesCheck : public utils::UseRangesCheck {
public:
using utils::UseRangesCheck::UseRangesCheck;

ReplacerMap getReplacerMap() const override;

ArrayRef<std::pair<StringRef, StringRef>>
getFreeBeginEndMethods() const override;

std::optional<ReverseIteratorDescriptor>
getReverseDescriptor() const override;

bool isLanguageVersionSupported(const LangOptions &LangOpts) const override;
};

} // namespace clang::tidy::modernize

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_USERANGESCHECK_H
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ add_clang_library(clangTidyUtils
RenamerClangTidyCheck.cpp
TransformerClangTidyCheck.cpp
TypeTraits.cpp
UseRangesCheck.cpp
UsingInserter.cpp

LINK_LIBS
Expand Down
68 changes: 62 additions & 6 deletions clang-tools-extra/clang-tidy/utils/ExprSequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,18 @@ bool isDescendantOrEqual(const Stmt *Descendant, const Stmt *Ancestor,
ASTContext *Context) {
if (Descendant == Ancestor)
return true;
for (const Stmt *Parent : getParentStmts(Descendant, Context)) {
if (isDescendantOrEqual(Parent, Ancestor, Context))
return true;
}
return llvm::any_of(getParentStmts(Descendant, Context),
[Ancestor, Context](const Stmt *Parent) {
return isDescendantOrEqual(Parent, Ancestor, Context);
});
}

return false;
bool isDescendantOfArgs(const Stmt *Descendant, const CallExpr *Call,
ASTContext *Context) {
return llvm::any_of(Call->arguments(),
[Descendant, Context](const Expr *Arg) {
return isDescendantOrEqual(Descendant, Arg, Context);
});
}

llvm::SmallVector<const InitListExpr *>
Expand Down Expand Up @@ -95,9 +101,59 @@ bool ExprSequence::inSequence(const Stmt *Before, const Stmt *After) const {
return true;
}

SmallVector<const Stmt *, 1> BeforeParents = getParentStmts(Before, Context);

// Since C++17, the callee of a call expression is guaranteed to be sequenced
// before all of the arguments.
// We handle this as a special case rather than using the general
// `getSequenceSuccessor` logic above because the callee expression doesn't
// have an unambiguous successor; the order in which arguments are evaluated
// is indeterminate.
for (const Stmt *Parent : BeforeParents) {
// Special case: If the callee is a `MemberExpr` with a `DeclRefExpr` as its
// base, we consider it to be sequenced _after_ the arguments. This is
// because the variable referenced in the base will only actually be
// accessed when the call happens, i.e. once all of the arguments have been
// evaluated. This has no basis in the C++ standard, but it reflects actual
// behavior that is relevant to a use-after-move scenario:
//
// ```
// a.bar(consumeA(std::move(a));
// ```
//
// In this example, we end up accessing `a` after it has been moved from,
// even though nominally the callee `a.bar` is evaluated before the argument
// `consumeA(std::move(a))`. Note that this is not specific to C++17, so
// we implement this logic unconditionally.
if (const auto *Call = dyn_cast<CXXMemberCallExpr>(Parent)) {
if (is_contained(Call->arguments(), Before) &&
isa<DeclRefExpr>(
Call->getImplicitObjectArgument()->IgnoreParenImpCasts()) &&
isDescendantOrEqual(After, Call->getImplicitObjectArgument(),
Context))
return true;

// We need this additional early exit so that we don't fall through to the
// more general logic below.
if (const auto *Member = dyn_cast<MemberExpr>(Before);
Member && Call->getCallee() == Member &&
isa<DeclRefExpr>(Member->getBase()->IgnoreParenImpCasts()) &&
isDescendantOfArgs(After, Call, Context))
return false;
}

if (!Context->getLangOpts().CPlusPlus17)
continue;

if (const auto *Call = dyn_cast<CallExpr>(Parent);
Call && Call->getCallee() == Before &&
isDescendantOfArgs(After, Call, Context))
return true;
}

// If 'After' is a parent of 'Before' or is sequenced after one of these
// parents, we know that it is sequenced after 'Before'.
for (const Stmt *Parent : getParentStmts(Before, Context)) {
for (const Stmt *Parent : BeforeParents) {
if (Parent == After || inSequence(Parent, After))
return true;
}
Expand Down
306 changes: 306 additions & 0 deletions clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,306 @@
//===--- UseRangesCheck.cpp - clang-tidy ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "UseRangesCheck.h"
#include "Matchers.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <optional>
#include <string>

using namespace clang::ast_matchers;

static constexpr const char BoundCall[] = "CallExpr";
static constexpr const char FuncDecl[] = "FuncDecl";
static constexpr const char ArgName[] = "ArgName";

namespace clang::tidy::utils {

static bool operator==(const UseRangesCheck::Indexes &L,
const UseRangesCheck::Indexes &R) {
return std::tie(L.BeginArg, L.EndArg, L.ReplaceArg) ==
std::tie(R.BeginArg, R.EndArg, R.ReplaceArg);
}

static std::string getFullPrefix(ArrayRef<UseRangesCheck::Indexes> Signature) {
std::string Output;
llvm::raw_string_ostream OS(Output);
for (const UseRangesCheck::Indexes &Item : Signature)
OS << Item.BeginArg << ":" << Item.EndArg << ":"
<< (Item.ReplaceArg == Item.First ? '0' : '1');
return Output;
}

static llvm::hash_code hash_value(const UseRangesCheck::Indexes &Indexes) {
return llvm::hash_combine(Indexes.BeginArg, Indexes.EndArg,
Indexes.ReplaceArg);
}

static llvm::hash_code hash_value(const UseRangesCheck::Signature &Sig) {
return llvm::hash_combine_range(Sig.begin(), Sig.end());
}

namespace {

AST_MATCHER(Expr, hasSideEffects) {
return Node.HasSideEffects(Finder->getASTContext());
}
} // namespace

static auto
makeExprMatcher(ast_matchers::internal::Matcher<Expr> ArgumentMatcher,
ArrayRef<StringRef> MethodNames,
ArrayRef<StringRef> FreeNames) {
return expr(
anyOf(cxxMemberCallExpr(argumentCountIs(0),
callee(cxxMethodDecl(hasAnyName(MethodNames))),
on(ArgumentMatcher)),
callExpr(argumentCountIs(1), hasArgument(0, ArgumentMatcher),
hasDeclaration(functionDecl(hasAnyName(FreeNames))))));
}

static ast_matchers::internal::Matcher<CallExpr>
makeMatcherPair(StringRef State, const UseRangesCheck::Indexes &Indexes,
ArrayRef<StringRef> BeginFreeNames,
ArrayRef<StringRef> EndFreeNames,
const std::optional<UseRangesCheck::ReverseIteratorDescriptor>
&ReverseDescriptor) {
std::string ArgBound = (ArgName + llvm::Twine(Indexes.BeginArg)).str();
SmallString<64> ID = {BoundCall, State};
ast_matchers::internal::Matcher<CallExpr> ArgumentMatcher = allOf(
hasArgument(Indexes.BeginArg,
makeExprMatcher(expr(unless(hasSideEffects())).bind(ArgBound),
{"begin", "cbegin"}, BeginFreeNames)),
hasArgument(Indexes.EndArg,
makeExprMatcher(
expr(matchers::isStatementIdenticalToBoundNode(ArgBound)),
{"end", "cend"}, EndFreeNames)));
if (ReverseDescriptor) {
ArgBound.push_back('R');
SmallVector<StringRef> RBegin{
llvm::make_first_range(ReverseDescriptor->FreeReverseNames)};
SmallVector<StringRef> REnd{
llvm::make_second_range(ReverseDescriptor->FreeReverseNames)};
ArgumentMatcher = anyOf(
ArgumentMatcher,
allOf(hasArgument(
Indexes.BeginArg,
makeExprMatcher(expr(unless(hasSideEffects())).bind(ArgBound),
{"rbegin", "crbegin"}, RBegin)),
hasArgument(
Indexes.EndArg,
makeExprMatcher(
expr(matchers::isStatementIdenticalToBoundNode(ArgBound)),
{"rend", "crend"}, REnd))));
}
return callExpr(argumentCountAtLeast(
std::max(Indexes.BeginArg, Indexes.EndArg) + 1),
ArgumentMatcher)
.bind(ID);
}

void UseRangesCheck::registerMatchers(MatchFinder *Finder) {
Replaces = getReplacerMap();
ReverseDescriptor = getReverseDescriptor();
auto BeginEndNames = getFreeBeginEndMethods();
llvm::SmallVector<StringRef, 4> BeginNames{
llvm::make_first_range(BeginEndNames)};
llvm::SmallVector<StringRef, 4> EndNames{
llvm::make_second_range(BeginEndNames)};
llvm::DenseSet<ArrayRef<Signature>> Seen;
for (auto I = Replaces.begin(), E = Replaces.end(); I != E; ++I) {
const ArrayRef<Signature> &Signatures =
I->getValue()->getReplacementSignatures();
if (!Seen.insert(Signatures).second)
continue;
assert(!Signatures.empty() &&
llvm::all_of(Signatures, [](auto Index) { return !Index.empty(); }));
std::vector<StringRef> Names(1, I->getKey());
for (auto J = std::next(I); J != E; ++J)
if (J->getValue()->getReplacementSignatures() == Signatures)
Names.push_back(J->getKey());

std::vector<ast_matchers::internal::DynTypedMatcher> TotalMatchers;
// As we match on the first matched signature, we need to sort the
// signatures in order of length(longest to shortest). This way any
// signature that is a subset of another signature will be matched after the
// other.
SmallVector<Signature> SigVec(Signatures);
llvm::sort(SigVec, [](auto &L, auto &R) { return R.size() < L.size(); });
for (const auto &Signature : SigVec) {
std::vector<ast_matchers::internal::DynTypedMatcher> Matchers;
for (const auto &ArgPair : Signature)
Matchers.push_back(makeMatcherPair(getFullPrefix(Signature), ArgPair,
BeginNames, EndNames,
ReverseDescriptor));
TotalMatchers.push_back(
ast_matchers::internal::DynTypedMatcher::constructVariadic(
ast_matchers::internal::DynTypedMatcher::VO_AllOf,
ASTNodeKind::getFromNodeKind<CallExpr>(), std::move(Matchers)));
}
Finder->addMatcher(
callExpr(
callee(functionDecl(hasAnyName(std::move(Names))).bind(FuncDecl)),
ast_matchers::internal::DynTypedMatcher::constructVariadic(
ast_matchers::internal::DynTypedMatcher::VO_AnyOf,
ASTNodeKind::getFromNodeKind<CallExpr>(),
std::move(TotalMatchers))
.convertTo<CallExpr>()),
this);
}
}

static void removeFunctionArgs(DiagnosticBuilder &Diag, const CallExpr &Call,
ArrayRef<unsigned> Indexes,
const ASTContext &Ctx) {
llvm::SmallVector<unsigned> Sorted(Indexes);
llvm::sort(Sorted);
// Keep track of commas removed
llvm::SmallBitVector Commas(Call.getNumArgs());
// The first comma is actually the '(' which we can't remove
Commas[0] = true;
for (unsigned Index : Sorted) {
const Expr *Arg = Call.getArg(Index);
if (Commas[Index]) {
if (Index >= Commas.size()) {
Diag << FixItHint::CreateRemoval(Arg->getSourceRange());
} else {
// Remove the next comma
Commas[Index + 1] = true;
Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange(
{Arg->getBeginLoc(),
Lexer::getLocForEndOfToken(
Arg->getEndLoc(), 0, Ctx.getSourceManager(), Ctx.getLangOpts())
.getLocWithOffset(1)}));
}
} else {
Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange(
Arg->getBeginLoc().getLocWithOffset(-1), Arg->getEndLoc()));
Commas[Index] = true;
}
}
}

void UseRangesCheck::check(const MatchFinder::MatchResult &Result) {
const auto *Function = Result.Nodes.getNodeAs<FunctionDecl>(FuncDecl);
std::string Qualified = "::" + Function->getQualifiedNameAsString();
auto Iter = Replaces.find(Qualified);
assert(Iter != Replaces.end());
SmallString<64> Buffer;
for (const Signature &Sig : Iter->getValue()->getReplacementSignatures()) {
Buffer.assign({BoundCall, getFullPrefix(Sig)});
const auto *Call = Result.Nodes.getNodeAs<CallExpr>(Buffer);
if (!Call)
continue;
auto Diag = createDiag(*Call);
if (auto ReplaceName = Iter->getValue()->getReplaceName(*Function))
Diag << FixItHint::CreateReplacement(Call->getCallee()->getSourceRange(),
*ReplaceName);
if (auto Include = Iter->getValue()->getHeaderInclusion(*Function))
Diag << Inserter.createIncludeInsertion(
Result.SourceManager->getFileID(Call->getBeginLoc()), *Include);
llvm::SmallVector<unsigned, 3> ToRemove;
for (const auto &[First, Second, Replace] : Sig) {
auto ArgNode = ArgName + std::to_string(First);
if (const auto *ArgExpr = Result.Nodes.getNodeAs<Expr>(ArgNode)) {
Diag << FixItHint::CreateReplacement(
Call->getArg(Replace == Indexes::Second ? Second : First)
->getSourceRange(),
Lexer::getSourceText(
CharSourceRange::getTokenRange(ArgExpr->getSourceRange()),
Result.Context->getSourceManager(),
Result.Context->getLangOpts()));
} else {
assert(ReverseDescriptor && "Couldn't find forward argument");
ArgNode.push_back('R');
ArgExpr = Result.Nodes.getNodeAs<Expr>(ArgNode);
assert(ArgExpr && "Couldn't find forward or reverse argument");
if (ReverseDescriptor->ReverseHeader)
Diag << Inserter.createIncludeInsertion(
Result.SourceManager->getFileID(Call->getBeginLoc()),
*ReverseDescriptor->ReverseHeader);
Diag << FixItHint::CreateReplacement(
Call->getArg(Replace == Indexes::Second ? Second : First)
->getSourceRange(),
SmallString<128>{
ReverseDescriptor->ReverseAdaptorName, "(",
Lexer::getSourceText(
CharSourceRange::getTokenRange(ArgExpr->getSourceRange()),
Result.Context->getSourceManager(),
Result.Context->getLangOpts()),
")"});
}
ToRemove.push_back(Replace == Indexes::Second ? First : Second);
}
removeFunctionArgs(Diag, *Call, ToRemove, *Result.Context);
return;
}
llvm_unreachable("No valid signature found");
}

bool UseRangesCheck::isLanguageVersionSupported(
const LangOptions &LangOpts) const {
return LangOpts.CPlusPlus11;
}

UseRangesCheck::UseRangesCheck(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Inserter(Options.getLocalOrGlobal("IncludeStyle",
utils::IncludeSorter::IS_LLVM),
areDiagsSelfContained()) {}

void UseRangesCheck::registerPPCallbacks(const SourceManager &,
Preprocessor *PP, Preprocessor *) {
Inserter.registerPreprocessor(PP);
}

void UseRangesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "IncludeStyle", Inserter.getStyle());
}

std::optional<std::string>
UseRangesCheck::Replacer::getHeaderInclusion(const NamedDecl &) const {
return std::nullopt;
}

DiagnosticBuilder UseRangesCheck::createDiag(const CallExpr &Call) {
return diag(Call.getBeginLoc(), "use a ranges version of this algorithm");
}

std::optional<UseRangesCheck::ReverseIteratorDescriptor>
UseRangesCheck::getReverseDescriptor() const {
return std::nullopt;
}

ArrayRef<std::pair<StringRef, StringRef>>
UseRangesCheck::getFreeBeginEndMethods() const {
return {};
}

std::optional<TraversalKind> UseRangesCheck::getCheckTraversalKind() const {
return TK_IgnoreUnlessSpelledInSource;
}
} // namespace clang::tidy::utils
Loading