140 changes: 106 additions & 34 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,39 +115,9 @@ class BoltAddressTranslation {
/// Save function and basic block hashes used for metadata dump.
void saveMetadata(BinaryContext &BC);

/// Returns BB hash by function output address (after BOLT) and basic block
/// input offset.
size_t getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

/// Returns BB index by function output address (after BOLT) and basic block
/// input offset.
unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

using BBHashMap = std::map<uint32_t, std::pair<unsigned, size_t>>;
/// Return a mapping from basic block input offset to hash and block index for a given function.
const BBHashMap &getBBHashMap(uint64_t OutputAddress) const {
return FuncHashes.at(OutputAddress).second;
}

static unsigned getBBIndex(const BBHashMap &BBMap, uint32_t BBInputOffset) {
return BBMap.at(BBInputOffset).first;
}

static size_t getBBHash(const BBHashMap &BBMap, uint32_t BBInputOffset) {
return BBMap.at(BBInputOffset).second;
}

/// Returns the maximum BB index for a given function.
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
return NumBasicBlocksMap.at(OutputAddress);
}

/// Returns branch offsets grouped by containing basic block in a given
/// function.
std::unordered_map<uint32_t, std::vector<uint32_t>>
Expand All @@ -159,7 +129,7 @@ class BoltAddressTranslation {
/// emitted for the start of the BB. More entries may be emitted to cover
/// the location of calls or any instruction that may change control flow.
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
uint64_t FuncAddress);
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);

/// Write the serialized address translation table for a function.
template <bool Cold>
Expand All @@ -182,9 +152,6 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

/// Map basic block input offset to a basic block index and hash pair.
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;

/// Map a function to its basic blocks count
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;

Expand All @@ -200,6 +167,111 @@ class BoltAddressTranslation {
/// Identifies the address of a control-flow changing instructions in a
/// translation map entry
const static uint32_t BRANCHENTRY = 0x1;

public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
class EntryTy {
unsigned Index;
size_t Hash;

public:
unsigned getBBIndex() const { return Index; }
size_t getBBHash() const { return Hash; }
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::unordered_map<uint32_t, EntryTy> Map;
const EntryTy &getEntry(uint32_t BBInputOffset) const {
auto It = Map.find(BBInputOffset);
assert(It != Map.end());
return It->second;
}

public:
bool isInputBlock(uint32_t InputOffset) const {
return Map.count(InputOffset);
}

unsigned getBBIndex(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBIndex();
}

size_t getBBHash(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBHash();
}

void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
}

size_t getNumBasicBlocks() const { return Map.size(); }
};

/// Map function output address to its hash and basic blocks hash map.
class FuncHashesTy {
class EntryTy {
size_t Hash;
BBHashMapTy BBHashMap;

public:
size_t getBFHash() const { return Hash; }
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
EntryTy(size_t Hash) : Hash(Hash) {}
};

std::unordered_map<uint64_t, EntryTy> Map;
const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
auto It = Map.find(FuncOutputAddress);
assert(It != Map.end());
return It->second;
}

public:
size_t getBFHash(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBFHash();
}

const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBBHashMap();
}

void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
}

size_t getNumFunctions() const { return Map.size(); };

size_t getNumBasicBlocks() const {
size_t NumBasicBlocks{0};
for (auto &I : Map)
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
return NumBasicBlocks;
}
};

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t FuncOutputAddress) const {
return FuncHashes.getBFHash(FuncOutputAddress);
}

/// Returns BBHashMap by function output address (after BOLT).
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return FuncHashes.getBBHashMap(FuncOutputAddress);
}

BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
return const_cast<BBHashMapTy &>(
std::as_const(*this).getBBHashMap(FuncOutputAddress));
}

/// Returns the number of basic blocks in a function.
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
return NumBasicBlocksMap.at(OutputAddress);
}

private:
FuncHashesTy FuncHashes;
};
} // namespace bolt

Expand Down
6 changes: 3 additions & 3 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3547,7 +3547,7 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
if (!isMultiEntry())
return nullptr;

uint64_t NumEntries = 0;
uint64_t NumEntries = 1;
if (hasCFG()) {
for (BinaryBasicBlock *BB : BasicBlocks) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
Expand Down Expand Up @@ -3580,7 +3580,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
return 0;

// Check all secondary entries available as either basic blocks or lables.
uint64_t NumEntries = 0;
uint64_t NumEntries = 1;
for (const BinaryBasicBlock *BB : BasicBlocks) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
if (!EntrySymbol)
Expand All @@ -3589,7 +3589,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
return NumEntries;
++NumEntries;
}
NumEntries = 0;
NumEntries = 1;
for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
if (!EntrySymbol)
Expand Down
6 changes: 5 additions & 1 deletion bolt/lib/Core/DIEBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,10 @@ void DIEBuilder::cloneDieReferenceAttribute(
NewRefDie = DieInfo.Die;

if (AttrSpec.Form == dwarf::DW_FORM_ref_addr) {
// Adding referenced DIE to DebugNames to be used when entries are created
// that contain cross cu references.
if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec))
DebugNamesTable.addCrossCUDie(DieInfo.Die);
// no matter forward reference or backward reference, we are supposed
// to calculate them in `finish` due to the possible modification of
// the DIE.
Expand All @@ -554,7 +558,7 @@ void DIEBuilder::cloneDieReferenceAttribute(
std::make_pair(CurDieInfo, AddrReferenceInfo(&DieInfo, AttrSpec)));

Die.addValue(getState().DIEAlloc, AttrSpec.Attr, dwarf::DW_FORM_ref_addr,
DIEInteger(0xDEADBEEF));
DIEInteger(DieInfo.Die->getOffset()));
return;
}

Expand Down
109 changes: 68 additions & 41 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,55 @@ static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {
return false;
}

bool static canProcess(const DWARFUnit &Unit, const DIE &Die,
std::string &NameToUse, const bool TagsOnly) {
switch (Die.getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_imported_declaration:
case dwarf::DW_TAG_pointer_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_unspecified_type:
if (TagsOnly || Die.findAttribute(dwarf::Attribute::DW_AT_name))
return true;
return false;
case dwarf::DW_TAG_namespace:
// According to DWARF5 spec namespaces without DW_AT_name needs to have
// "(anonymous namespace)"
if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
NameToUse = "(anonymous namespace)";
return true;
case dwarf::DW_TAG_inlined_subroutine:
case dwarf::DW_TAG_label:
case dwarf::DW_TAG_subprogram:
if (TagsOnly || Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
return true;
return false;
case dwarf::DW_TAG_variable:
return TagsOnly || shouldIncludeVariable(Unit, Die);
default:
break;
}
return false;
}

bool DWARF5AcceleratorTable::canGenerateEntryWithCrossCUReference(
const DWARFUnit &Unit, const DIE &Die,
const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec) {
if (!isCreated())
return false;
std::string NameToUse = "";
if (!canProcess(Unit, Die, NameToUse, true))
return false;
return (AttrSpec.Attr == dwarf::Attribute::DW_AT_abstract_origin ||
AttrSpec.Attr == dwarf::Attribute::DW_AT_specification) &&
AttrSpec.Form == dwarf::DW_FORM_ref_addr;
}
/// Returns name offset in String Offset section.
static uint64_t getNameOffset(BinaryContext &BC, DWARFUnit &Unit,
const uint64_t Index) {
Expand Down Expand Up @@ -175,41 +224,6 @@ DWARF5AcceleratorTable::addAccelTableEntry(
if (Unit.getVersion() < 5 || !NeedToCreate)
return std::nullopt;
std::string NameToUse = "";
auto canProcess = [&](const DIE &Die) -> bool {
switch (Die.getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_enumeration_type:
case dwarf::DW_TAG_imported_declaration:
case dwarf::DW_TAG_pointer_type:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_unspecified_type:
if (Die.findAttribute(dwarf::Attribute::DW_AT_name))
return true;
return false;
case dwarf::DW_TAG_namespace:
// According to DWARF5 spec namespaces without DW_AT_name needs to have
// "(anonymous namespace)"
if (!Die.findAttribute(dwarf::Attribute::DW_AT_name))
NameToUse = "(anonymous namespace)";
return true;
case dwarf::DW_TAG_inlined_subroutine:
case dwarf::DW_TAG_label:
case dwarf::DW_TAG_subprogram:
if (Die.findAttribute(dwarf::Attribute::DW_AT_low_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_high_pc) ||
Die.findAttribute(dwarf::Attribute::DW_AT_ranges) ||
Die.findAttribute(dwarf::Attribute::DW_AT_entry_pc))
return true;
return false;
case dwarf::DW_TAG_variable:
return shouldIncludeVariable(Unit, Die);
default:
break;
}
return false;
};

auto getUnitID = [&](const DWARFUnit &Unit, bool &IsTU,
uint32_t &DieTag) -> uint32_t {
Expand All @@ -223,7 +237,7 @@ DWARF5AcceleratorTable::addAccelTableEntry(
return CUList.size() - 1;
};

if (!canProcess(Die))
if (!canProcess(Unit, Die, NameToUse, false))
return std::nullopt;

// Addes a Unit to either CU, LocalTU or ForeignTU list the first time we
Expand Down Expand Up @@ -318,10 +332,24 @@ DWARF5AcceleratorTable::addAccelTableEntry(
const DIEValue Value = Die.findAttribute(Attr);
if (!Value)
return std::nullopt;
const DIEEntry &DIEENtry = Value.getDIEEntry();
DIE &EntryDie = DIEENtry.getEntry();
addEntry(EntryDie.findAttribute(dwarf::Attribute::DW_AT_linkage_name));
return addEntry(EntryDie.findAttribute(dwarf::Attribute::DW_AT_name));
const DIE *EntryDie = nullptr;
if (Value.getForm() == dwarf::DW_FORM_ref_addr) {
auto Iter = CrossCUDies.find(Value.getDIEInteger().getValue());
if (Iter == CrossCUDies.end()) {
BC.errs() << "BOLT-WARNING: [internal-dwarf-warning]: Could not find "
"referenced DIE in CrossCUDies for "
<< Twine::utohexstr(Value.getDIEInteger().getValue())
<< ".\n";
return std::nullopt;
}
EntryDie = Iter->second;
} else {
const DIEEntry &DIEENtry = Value.getDIEEntry();
EntryDie = &DIEENtry.getEntry();
}

addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_linkage_name));
return addEntry(EntryDie->findAttribute(dwarf::Attribute::DW_AT_name));
};

if (std::optional<BOLTDWARF5AccelTableData *> Entry =
Expand All @@ -332,7 +360,6 @@ DWARF5AcceleratorTable::addAccelTableEntry(
return *Entry;

return addEntry(Die.findAttribute(dwarf::Attribute::DW_AT_name));
;
}

/// Algorithm from llvm implementation.
Expand Down
97 changes: 46 additions & 51 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@ const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";

void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncAddress) {
uint64_t HotFuncAddress = ColdPartSource.count(FuncAddress)
? ColdPartSource[FuncAddress]
: FuncAddress;
uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) {
const uint64_t BBOutputOffset =
BB.getOutputAddressRange().first - FuncAddress;
BB.getOutputAddressRange().first - FuncOutputAddress;
const uint32_t BBInputOffset = BB.getInputOffset();

// Every output BB must track back to an input BB for profile collection
Expand All @@ -42,11 +40,14 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
getBBHash(HotFuncAddress, BBInputOffset)));
(void)HotFuncAddress;
LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n",
getBBIndex(HotFuncAddress, BBInputOffset)));
// NB: in `writeEntriesForBB` we use the input address because hashes are
// saved early in `saveMetadata` before output addresses are assigned.
const BBHashMapTy &BBHashMap = getBBHashMap(FuncInputAddress);
(void)BBHashMap;
LLVM_DEBUG(
dbgs() << formatv(" Hash: {0:x}\n", BBHashMap.getBBHash(BBInputOffset)));
LLVM_DEBUG(
dbgs() << formatv(" Index: {0}\n", BBHashMap.getBBIndex(BBInputOffset)));
// In case of conflicts (same Key mapping to different Vals), the last
// update takes precedence. Of course it is not ideal to have conflicts and
// those happen when we have an empty BB that either contained only
Expand All @@ -63,7 +64,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
const auto OutputAddress = IOAddressMap.lookup(InputAddress);
assert(OutputAddress && "Unknown instruction address");
const auto OutputOffset = *OutputAddress - FuncAddress;
const auto OutputOffset = *OutputAddress - FuncOutputAddress;

// Is this the first instruction in the BB? No need to duplicate the entry.
if (OutputOffset == BBOutputOffset)
Expand Down Expand Up @@ -106,7 +107,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
MapTy Map;
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, Function.getOutputAddress());
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
Maps.emplace(Function.getOutputAddress(), std::move(Map));
ReverseMap.emplace(OutputAddress, InputAddress);

Expand All @@ -120,7 +121,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
Map.clear();
for (const BinaryBasicBlock *const BB : FF)
writeEntriesForBB(Map, *BB, FF.getAddress());
writeEntriesForBB(Map, *BB, InputAddress, FF.getAddress());

Maps.emplace(FF.getAddress(), std::move(Map));
}
Expand All @@ -132,11 +133,9 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
writeMaps</*Cold=*/true>(Maps, PrevAddress, OS);

BC.outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
const uint64_t NumBBHashes = std::accumulate(
FuncHashes.begin(), FuncHashes.end(), 0ull,
[](size_t Acc, const auto &B) { return Acc + B.second.second.size(); });
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.size() << " function and "
<< NumBBHashes << " basic block hashes\n";
BC.outs() << "BOLT-INFO: Wrote " << FuncHashes.getNumFunctions()
<< " function and " << FuncHashes.getNumBasicBlocks()
<< " basic block hashes\n";
}

APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
Expand Down Expand Up @@ -183,11 +182,10 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
// Only process cold fragments in cold mode, and vice versa.
if (Cold != ColdPartSource.count(Address))
continue;
// NB: here we use the input address because hashes are saved early (in
// `saveMetadata`) before output addresses are assigned.
// NB: in `writeMaps` we use the input address because hashes are saved
// early in `saveMetadata` before output addresses are assigned.
const uint64_t HotInputAddress =
ReverseMap[Cold ? ColdPartSource[Address] : Address];
std::pair<size_t, BBHashMap> &FuncHashPair = FuncHashes[HotInputAddress];
MapTy &Map = MapEntry.second;
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
Expand All @@ -196,7 +194,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
PrevAddress = Address;
const uint32_t NumSecondaryEntryPoints =
SecondaryEntryPointsMap.count(Address)
? SecondaryEntryPointsMap.at(Address).size()
? SecondaryEntryPointsMap[Address].size()
: 0;
if (Cold) {
size_t HotIndex =
Expand All @@ -205,10 +203,11 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
PrevIndex = HotIndex;
} else {
// Function hash
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", FuncHashPair.first));
OS.write(reinterpret_cast<char *>(&FuncHashPair.first), 8);
size_t BFHash = getBFHash(HotInputAddress);
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
OS.write(reinterpret_cast<char *>(&BFHash), 8);
// Number of basic blocks
size_t NumBasicBlocks = FuncHashPair.second.size();
size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks();
LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n');
encodeULEB128(NumBasicBlocks, OS);
// Secondary entry points
Expand Down Expand Up @@ -236,6 +235,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
});
}
}
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
size_t Index = 0;
uint64_t InOffset = 0;
size_t PrevBBIndex = 0;
Expand All @@ -248,9 +248,9 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
encodeSLEB128(KeyVal.second - InOffset, OS);
InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded
if ((InOffset & BRANCHENTRY) == 0) {
unsigned BBIndex;
size_t BBHash;
std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1];
const bool IsBlock = BBHashMap.isInputBlock(InOffset >> 1);
unsigned BBIndex = IsBlock ? BBHashMap.getBBIndex(InOffset >> 1) : 0;
size_t BBHash = IsBlock ? BBHashMap.getBBHash(InOffset >> 1) : 0;
OS.write(reinterpret_cast<char *>(&BBHash), 8);
// Basic block index in the input binary
encodeULEB128(BBIndex - PrevBBIndex, OS);
Expand All @@ -263,7 +263,7 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
if (!Cold && NumSecondaryEntryPoints) {
LLVM_DEBUG(dbgs() << "Secondary entry points: ");
// Secondary entry point offsets, delta-encoded
for (uint32_t Offset : SecondaryEntryPointsMap.at(Address)) {
for (uint32_t Offset : SecondaryEntryPointsMap[Address]) {
encodeULEB128(Offset - PrevOffset, OS);
LLVM_DEBUG(dbgs() << formatv("{0:x} ", Offset));
PrevOffset = Offset;
Expand Down Expand Up @@ -322,7 +322,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
HotFuncs.push_back(Address);
// Function hash
const size_t FuncHash = DE.getU64(&Offset, &Err);
FuncHashes[Address].first = FuncHash;
FuncHashes.addEntry(Address, FuncHash);
LLVM_DEBUG(dbgs() << formatv("{0:x}: hash {1:x}\n", Address, FuncHash));
// Number of basic blocks
const size_t NumBasicBlocks = DE.getULEB128(&Offset, &Err);
Expand Down Expand Up @@ -388,8 +388,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
BBIndexDelta = DE.getULEB128(&Offset, &Err);
BBIndex += BBIndexDelta;
// Map basic block hash to hot fragment by input offset
FuncHashes[HotAddress].second.emplace(InputOffset >> 1,
std::pair(BBIndex, BBHash));
getBBHashMap(HotAddress).addEntry(InputOffset >> 1, BBIndex, BBHash);
}
LLVM_DEBUG({
dbgs() << formatv(
Expand Down Expand Up @@ -431,6 +430,8 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
OS << formatv(", hash: {0:x}", getBFHash(Address));
OS << "\n";
OS << "BB mappings:\n";
const BBHashMapTy &BBHashMap =
getBBHashMap(HotAddress ? HotAddress : Address);
for (const auto &Entry : MapEntry.second) {
const bool IsBranch = Entry.second & BRANCHENTRY;
const uint32_t Val = Entry.second >> 1; // dropping BRANCHENTRY bit
Expand All @@ -439,10 +440,16 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
if (IsBranch)
OS << " (branch)";
else
OS << formatv(" hash: {0:x}",
getBBHash(HotAddress ? HotAddress : Address, Val));
OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val));
OS << "\n";
}
if (SecondaryEntryPointsMap.count(Address)) {
const std::vector<uint32_t> &SecondaryEntryPoints =
SecondaryEntryPointsMap[Address];
OS << SecondaryEntryPoints.size() << " secondary entry points:\n";
for (uint32_t EntryPointOffset : SecondaryEntryPoints)
OS << formatv("{0:x}\n", EntryPointOffset);
}
OS << "\n";
}
const size_t NumColdParts = ColdPartSource.size();
Expand Down Expand Up @@ -561,28 +568,15 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
if (BF.isIgnored() || (!BC.HasRelocations && !BF.isSimple()))
continue;
// Prepare function and block hashes
FuncHashes[BF.getAddress()].first = BF.computeHash();
FuncHashes.addEntry(BF.getAddress(), BF.computeHash());
BF.computeBlockHashes();
BBHashMapTy &BBHashMap = getBBHashMap(BF.getAddress());
// Set BF/BB metadata
for (const BinaryBasicBlock &BB : BF)
FuncHashes[BF.getAddress()].second.emplace(
BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash()));
BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
}
}

size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return getBBHash(getBBHashMap(FuncOutputAddress), BBInputOffset);
}

size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const {
return FuncHashes.at(OutputAddress).first;
}

unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress,
uint32_t BBInputOffset) const {
return getBBIndex(getBBHashMap(FuncOutputAddress), BBInputOffset);
}

std::unordered_map<uint32_t, std::vector<uint32_t>>
BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
std::unordered_map<uint32_t, std::vector<uint32_t>> Branches;
Expand All @@ -602,5 +596,6 @@ BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
}
return Branches;
}

} // namespace bolt
} // namespace llvm
39 changes: 19 additions & 20 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include "llvm/Support/Regex.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <map>
#include <optional>
#include <unordered_map>
Expand Down Expand Up @@ -2356,21 +2355,17 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const auto &BlockMap = BAT->getBBHashMap(FuncAddress);

auto addBBProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t Offset) {
if (!Branches.IntraIndex.contains(Offset))
return;
for (const auto &[SuccOffset, SuccIdx] :
Branches.IntraIndex.at(Offset)) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BAT->getBBIndex(BlockMap, SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
YamlBB.Successors.emplace_back(SI);
}
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);

auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t SuccOffset, unsigned SuccDataIdx) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BlockMap.getBBIndex(SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
YamlBB.Successors.emplace_back(SI);
};

std::unordered_map<uint32_t, std::vector<uint32_t>> BFBranches =
Expand Down Expand Up @@ -2430,11 +2425,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
}
};

for (const auto &[Offset, Val] : BlockMap) {
for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
std::tie(YamlBB.Index, YamlBB.Hash) = Val;
addBBProfile(YamlBB, Offset);
addCallsProfile(YamlBB, Offset);
if (!BlockMap.isInputBlock(FromOffset))
continue;
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
addCallsProfile(YamlBB, FromOffset);
if (YamlBB.ExecCount || !YamlBB.Successors.empty() ||
!YamlBB.CallSites.empty())
YamlBF.Blocks.emplace_back(YamlBB);
Expand Down
16 changes: 11 additions & 5 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,21 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
if (!ICSP)
continue;
for (const IndirectCallProfile &CSP : ICSP.get()) {
StringRef TargetName = "";
const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol);
if (Callee)
TargetName = Callee->getOneName();
CSI.Count = CSP.Count;
CSI.Mispreds = CSP.Mispreds;
if (CSI.Count && Callee)
CSTargets.emplace_back(Callee->getOneName(), CSI);
CSTargets.emplace_back(TargetName, CSI);
}
} else { // direct call or a tail call
StringRef TargetName = "";
const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
const BinaryFunction *Callee = setCSIDestination(BC, CSI, CalleeSymbol);
const BinaryFunction *const Callee =
setCSIDestination(BC, CSI, CalleeSymbol);
if (Callee)
TargetName = Callee->getOneName();

auto getAnnotationWithDefault = [&](const MCInst &Inst, StringRef Ann) {
return BC.MIB->getAnnotationWithDefault(Instr, Ann, 0ull);
Expand All @@ -117,8 +123,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
CSI.Count = getAnnotationWithDefault(Instr, "Count");
}

if (CSI.Count && Callee)
CSTargets.emplace_back(Callee->getOneName(), CSI);
if (CSI.Count)
CSTargets.emplace_back(TargetName, CSI);
}
// Sort targets in a similar way to getBranchData, see Location::operator<
llvm::sort(CSTargets, [](const auto &RHS, const auto &LHS) {
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static cl::opt<bool> JTFootprintReductionFlag(
"instructions at jump sites"),
cl::cat(BoltOptCategory));

static cl::opt<bool>
cl::opt<bool>
KeepNops("keep-nops",
cl::desc("keep no-op instructions. By default they are removed."),
cl::Hidden, cl::cat(BoltOptCategory));
Expand Down
43 changes: 33 additions & 10 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,11 @@ static cl::opt<bool> AlwaysConvertToRanges(
extern cl::opt<std::string> CompDirOverride;
} // namespace opts

static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
uint64_t &LowPC, uint64_t &HighPC,
uint64_t &SectionIndex) {
/// If DW_AT_low_pc exists sets LowPC and returns true.
static bool getLowPC(const DIE &Die, const DWARFUnit &DU, uint64_t &LowPC,
uint64_t &SectionIndex) {
DIEValue DvalLowPc = Die.findAttribute(dwarf::DW_AT_low_pc);
DIEValue DvalHighPc = Die.findAttribute(dwarf::DW_AT_high_pc);
if (!DvalLowPc || !DvalHighPc)
if (!DvalLowPc)
return false;

dwarf::Form Form = DvalLowPc.getForm();
Expand All @@ -403,14 +402,39 @@ static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
LowPC = LowPcValue;
SectionIndex = 0;
}
return true;
}

/// If DW_AT_high_pc exists sets HighPC and returns true.
static bool getHighPC(const DIE &Die, const uint64_t LowPC, uint64_t &HighPC) {
DIEValue DvalHighPc = Die.findAttribute(dwarf::DW_AT_high_pc);
if (!DvalHighPc)
return false;
if (DvalHighPc.getForm() == dwarf::DW_FORM_addr)
HighPC = DvalHighPc.getDIEInteger().getValue();
else
HighPC = LowPC + DvalHighPc.getDIEInteger().getValue();

return true;
}

/// If DW_AT_low_pc and DW_AT_high_pc exist sets LowPC and HighPC and returns
/// true.
static bool getLowAndHighPC(const DIE &Die, const DWARFUnit &DU,
uint64_t &LowPC, uint64_t &HighPC,
uint64_t &SectionIndex) {
uint64_t TempLowPC = LowPC;
uint64_t TempHighPC = HighPC;
uint64_t TempSectionIndex = SectionIndex;
if (getLowPC(Die, DU, TempLowPC, TempSectionIndex) &&
getHighPC(Die, TempLowPC, TempHighPC)) {
LowPC = TempLowPC;
HighPC = TempHighPC;
SectionIndex = TempSectionIndex;
return true;
}
return false;
}

static Expected<llvm::DWARFAddressRangesVector>
getDIEAddressRanges(const DIE &Die, DWARFUnit &DU) {
uint64_t LowPC, HighPC, Index;
Expand Down Expand Up @@ -1248,10 +1272,9 @@ void DWARFRewriter::updateUnitDebugInfo(
}
}
} else if (LowPCAttrInfo) {
const std::optional<uint64_t> Result =
LowPCAttrInfo.getDIEInteger().getValue();
if (Result.has_value()) {
const uint64_t Address = Result.value();
uint64_t Address = 0;
uint64_t SectionIndex = 0;
if (getLowPC(*Die, Unit, Address, SectionIndex)) {
uint64_t NewAddress = 0;
if (const BinaryFunction *Function =
BC.getBinaryFunctionContainingAddress(Address)) {
Expand Down
49 changes: 48 additions & 1 deletion bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,17 @@ class LinuxKernelRewriter final : public MetadataRewriter {

/// Paravirtual instruction patch sites.
Error readParaInstructions();
Error rewriteParaInstructions();

Error readBugTable();

/// Read alternative instruction info from .altinstructions.
/// Do no process functions containing instruction annotated with
/// \p Annotation.
void skipFunctionsWithAnnotation(StringRef Annotation) const;

/// Handle alternative instruction info from .altinstructions.
Error readAltInstructions();
Error rewriteAltInstructions();

/// Read .pci_fixup
Error readPCIFixupTable();
Expand Down Expand Up @@ -318,6 +324,12 @@ class LinuxKernelRewriter final : public MetadataRewriter {
if (Error E = rewriteExceptionTable())
return E;

if (Error E = rewriteAltInstructions())
return E;

if (Error E = rewriteParaInstructions())
return E;

if (Error E = rewriteORCTables())
return E;

Expand Down Expand Up @@ -1126,6 +1138,31 @@ Error LinuxKernelRewriter::readParaInstructions() {
return Error::success();
}

void LinuxKernelRewriter::skipFunctionsWithAnnotation(
StringRef Annotation) const {
for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
if (!BC.shouldEmit(BF))
continue;
for (const BinaryBasicBlock &BB : BF) {
const bool HasAnnotation = llvm::any_of(BB, [&](const MCInst &Inst) {
return BC.MIB->hasAnnotation(Inst, Annotation);
});
if (HasAnnotation) {
BF.setSimple(false);
break;
}
}
}
}

Error LinuxKernelRewriter::rewriteParaInstructions() {
// Disable output of functions with paravirtual instructions before the
// rewrite support is complete.
skipFunctionsWithAnnotation("ParaSite");

return Error::success();
}

/// Process __bug_table section.
/// This section contains information useful for kernel debugging.
/// Each entry in the section is a struct bug_entry that contains a pointer to
Expand Down Expand Up @@ -1305,6 +1342,14 @@ Error LinuxKernelRewriter::readAltInstructions() {
return Error::success();
}

Error LinuxKernelRewriter::rewriteAltInstructions() {
// Disable output of functions with alt instructions before the rewrite
// support is complete.
skipFunctionsWithAnnotation("AltInst");

return Error::success();
}

/// When the Linux kernel needs to handle an error associated with a given PCI
/// device, it uses a table stored in .pci_fixup section to locate a fixup code
/// specific to the vendor and the problematic device. The section contains a
Expand Down Expand Up @@ -1679,6 +1724,8 @@ Error LinuxKernelRewriter::updateStaticKeysJumpTablePostEmit() {
<< "\n\tTargetAddress: 0x" << Twine::utohexstr(TargetAddress)
<< "\n\tKeyAddress: 0x" << Twine::utohexstr(KeyAddress) << '\n';
});
(void)TargetAddress;
(void)KeyAddress;

BinaryFunction *BF =
BC.getBinaryFunctionContainingAddress(JumpAddress,
Expand Down
4 changes: 4 additions & 0 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> KeepNops;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TimeBuild;
Expand Down Expand Up @@ -2031,6 +2032,9 @@ void RewriteInstance::adjustCommandLineOptions() {

if (opts::Lite)
BC->outs() << "BOLT-INFO: enabling lite mode\n";

if (BC->IsLinuxKernel && !opts::KeepNops.getNumOccurrences())
opts::KeepNops = true;
}

namespace {
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/X86/bolt-address-translation-yaml.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s

WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps
WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 380
WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 384

READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
Expand Down Expand Up @@ -61,4 +61,4 @@ YAML-BAT-CHECK-NEXT: hash: 0xD70DC695320E0010
YAML-BAT-CHECK-NEXT: succ: {{.*}} { bid: 2, cnt: [[#]] }

CHECK-BOLT-YAML: pre-processing profile using YAML profile reader
CHECK-BOLT-YAML-NEXT: 1 out of 16 functions in the binary (6.2%) have non-empty execution profile
CHECK-BOLT-YAML-NEXT: 5 out of 16 functions in the binary (31.2%) have non-empty execution profile
263 changes: 263 additions & 0 deletions bolt/test/X86/dwarf4-label-low-pc.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@

# REQUIRES: system-linux

# RUN: llvm-mc -dwarf-version=4 -filetype=obj -triple x86_64-unknown-linux %s -o %tmain.o
# RUN: %clang %cflags -dwarf-4 %tmain.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.exe | FileCheck --check-prefix=PRECHECK %s
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt > %t.txt
# RUN: llvm-objdump -d %t.bolt >> %t.txt
# RUN: cat %t.txt | FileCheck --check-prefix=POSTCHECK %s

## This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addr] that is part of DW_TAG_label.

# PRECHECK: version = 0x0004
# PRECHECK: DW_TAG_label
# PRECHECK-NEXT: DW_AT_name
# PRECHECK-NEXT: DW_AT_decl_file
# PRECHECK-NEXT: DW_AT_decl_line
# PRECHECK-NEXT:DW_AT_low_pc [DW_FORM_addr]
# PRECHECK: DW_TAG_label
# PRECHECK-NEXT: DW_AT_name
# PRECHECK-NEXT: DW_AT_decl_file
# PRECHECK-NEXT: DW_AT_decl_line
# PRECHECK-NEXT:DW_AT_low_pc [DW_FORM_addr]

# POSTCHECK: version = 0x0004
# POSTCHECK: DW_TAG_label
# POSTCHECK-NEXT: DW_AT_name
# POSTCHECK-NEXT: DW_AT_decl_file
# POSTCHECK-NEXT: DW_AT_decl_line
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addr] (0x[[ADDR:[1-9a-f]*]]
# POSTCHECK: DW_TAG_label
# POSTCHECK-NEXT: DW_AT_name
# POSTCHECK-NEXT: DW_AT_decl_file
# POSTCHECK-NEXT: DW_AT_decl_line
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addr] (0x[[ADDR2:[1-9a-f]*]]

# POSTCHECK: [[ADDR]]: 8b 45 f8
# POSTCHECK: [[ADDR2]]: 8b 45 f8

## clang++ main.cpp -g2 -gdwarf-4 -S
## int main() {
## int a = 4;
## if (a == 5)
## goto LABEL1;
## else
## goto LABEL2;
## LABEL1:a++;
## LABEL2:a--;
## return 0;
## }

.text
.file "main.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.Lfunc_begin0:
.file 1 "/home" "main.cpp"
.loc 1 1 0 # main.cpp:1:0
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movl $0, -4(%rbp)
.Ltmp0:
.loc 1 2 7 prologue_end # main.cpp:2:7
movl $4, -8(%rbp)
.Ltmp1:
.loc 1 3 9 # main.cpp:3:9
cmpl $5, -8(%rbp)
.Ltmp2:
.loc 1 3 7 is_stmt 0 # main.cpp:3:7
jne .LBB0_2
# %bb.1: # %if.then
.Ltmp3:
.loc 1 4 5 is_stmt 1 # main.cpp:4:5
jmp .LBB0_3
.LBB0_2: # %if.else
.loc 1 6 5 # main.cpp:6:5
jmp .LBB0_4
.Ltmp4:
.LBB0_3: # %LABEL1
#DEBUG_LABEL: main:LABEL1
.loc 1 7 11 # main.cpp:7:11
movl -8(%rbp), %eax
addl $1, %eax
movl %eax, -8(%rbp)
.LBB0_4: # %LABEL2
.Ltmp5:
#DEBUG_LABEL: main:LABEL2
.loc 1 8 11 # main.cpp:8:11
movl -8(%rbp), %eax
addl $-1, %eax
movl %eax, -8(%rbp)
.loc 1 9 3 # main.cpp:9:3
xorl %eax, %eax
.loc 1 9 3 epilogue_begin is_stmt 0 # main.cpp:9:3
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Ltmp6:
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 14 # DW_FORM_strp
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 14 # DW_FORM_strp
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 1 # DW_CHILDREN_yes
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 52 # DW_TAG_variable
.byte 0 # DW_CHILDREN_no
.byte 2 # DW_AT_location
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 4 # Abbreviation Code
.byte 10 # DW_TAG_label
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 17 # DW_AT_low_pc
.byte 1 # DW_FORM_addr
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 5 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 14 # DW_FORM_strp
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.short 4 # DWARF version number
.long .debug_abbrev # Offset Into Abbrev. Section
.byte 8 # Address Size (in bytes)
.byte 1 # Abbrev [1] 0xb:0x6d DW_TAG_compile_unit
.long .Linfo_string0 # DW_AT_producer
.short 33 # DW_AT_language
.long .Linfo_string1 # DW_AT_name
.long .Lline_table_start0 # DW_AT_stmt_list
.long .Linfo_string2 # DW_AT_comp_dir
.quad .Lfunc_begin0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 2 # Abbrev [2] 0x2a:0x46 DW_TAG_subprogram
.quad .Lfunc_begin0 # DW_AT_low_pc
.long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 86
.long .Linfo_string3 # DW_AT_name
.byte 1 # DW_AT_decl_file
.byte 1 # DW_AT_decl_line
.long 112 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x43:0xe DW_TAG_variable
.byte 2 # DW_AT_location
.byte 145
.byte 120
.long .Linfo_string5 # DW_AT_name
.byte 1 # DW_AT_decl_file
.byte 2 # DW_AT_decl_line
.long 112 # DW_AT_type
.byte 4 # Abbrev [4] 0x51:0xf DW_TAG_label
.long .Linfo_string6 # DW_AT_name
.byte 1 # DW_AT_decl_file
.byte 7 # DW_AT_decl_line
.quad .Ltmp4 # DW_AT_low_pc
.byte 4 # Abbrev [4] 0x60:0xf DW_TAG_label
.long .Linfo_string7 # DW_AT_name
.byte 1 # DW_AT_decl_file
.byte 8 # DW_AT_decl_line
.quad .Ltmp5 # DW_AT_low_pc
.byte 0 # End Of Children Mark
.byte 5 # Abbrev [5] 0x70:0x7 DW_TAG_base_type
.long .Linfo_string4 # DW_AT_name
.byte 5 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 19.0.0git" # string offset=0
.Linfo_string1:
.asciz "main.cpp" # string offset=24
.Linfo_string2:
.asciz "/home" # string offset=33
.Linfo_string3:
.asciz "main" # string offset=71
.Linfo_string4:
.asciz "int" # string offset=76
.Linfo_string5:
.asciz "a" # string offset=80
.Linfo_string6:
.asciz "LABEL1" # string offset=82
.Linfo_string7:
.asciz "LABEL2" # string offset=89
.ident "clang version 19.0.0git"
.section ".note.GNU-stack","",@progbits
.addrsig
.section .debug_line,"",@progbits
.Lline_table_start0:
712 changes: 712 additions & 0 deletions bolt/test/X86/dwarf5-debug-names-cross-cu.s

Large diffs are not rendered by default.

36 changes: 20 additions & 16 deletions bolt/test/X86/dwarf5-label-low-pc.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@

# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt
# RUN: llvm-objdump -d %t.bolt >> %t.txt
# RUN: cat %t.txt | FileCheck --check-prefix=POSTCHECK %s

# This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addrx] that is part of DW_TAG_label.
## This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addrx] that is part of DW_TAG_label.

# PRECHECK: version = 0x0005
# PRECHECK: DW_TAG_label
Expand All @@ -28,34 +29,37 @@
# POSTCHECK: Addrs: [
# POSTCHECK-NEXT: 0x
# POSTCHECK-NEXT: 0x
# POSTCHECK-NEXT: 0x[[#%.16x,ADDR:]]
# POSTCHECK-NEXT: 0x[[#%.16x,ADDR2:]]
# POSTCHECK-NEXT: 0x[[ADDR:[1-9a-f]*]]
# POSTCHECK-NEXT: 0x[[ADDR2:[1-9a-f]*]]

# POSTCHECK: version = 0x0005
# POSTCHECK: DW_TAG_label
# POSTCHECK-NEXT: DW_AT_name
# POSTCHECK-NEXT: DW_AT_decl_file
# POSTCHECK-NEXT: DW_AT_decl_line
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000002)
# POSTCHECK-SAME: 0x[[#ADDR]]
# POSTCHECK-SAME: 0x[[ADDR]]
# POSTCHECK: DW_TAG_label
# POSTCHECK-NEXT: DW_AT_name
# POSTCHECK-NEXT: DW_AT_decl_file
# POSTCHECK-NEXT: DW_AT_decl_line
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000003)
# POSTCHECK-SAME: 0x[[#ADDR2]]
# POSTCHECK-SAME: 0x[[ADDR2]]

# clang++ main.cpp -g -S
# int main() {
# int a = 4;
# if (a == 5)
# goto LABEL1;
# else
# goto LABEL2;
# LABEL1:a++;
# LABEL2:a--;
# return 0;
# }
# POSTCHECK: [[ADDR]]: 8b 45 f8
# POSTCHECK: [[ADDR2]]: 8b 45 f8

## clang++ main.cpp -g -S
## int main() {
## int a = 4;
## if (a == 5)
## goto LABEL1;
## else
## goto LABEL2;
## LABEL1:a++;
## LABEL2:a--;
## return 0;
## }

.text
.file "main.cpp"
Expand Down
15 changes: 7 additions & 8 deletions bolt/test/X86/linux-alt-instruction.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,30 @@
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops -o %t.out \
# RUN: --alt-inst-feature-size=2 | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=2 -o %t.out \
# RUN: | FileCheck %s

## Older kernels used to have padlen field in alt_instr. Check compatibility.

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown --defsym PADLEN=1 \
# RUN: %s -o %t.o
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops --alt-inst-has-padlen \
# RUN: -o %t.out | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-has-padlen -o %t.out \
# RUN: | FileCheck %s

## Check with a larger size of "feature" field in alt_instr.

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: --defsym FEATURE_SIZE_4=1 %s -o %t.o
# RUN: %clang %cflags -nostdlib %t.o -o %t.exe \
# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr,--no-pie
# RUN: llvm-bolt %t.exe --print-normalized --keep-nops \
# RUN: --alt-inst-feature-size=4 -o %t.out | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=4 -o %t.out \
# RUN: | FileCheck %s

## Check that out-of-bounds read is handled properly.

# RUN: not llvm-bolt %t.exe --print-normalized --keep-nops \
# RUN: --alt-inst-feature-size=2 -o %t.out
# RUN: not llvm-bolt %t.exe --print-normalized --alt-inst-feature-size=2 -o %t.out

# CHECK: BOLT-INFO: Linux kernel binary detected
# CHECK: BOLT-INFO: parsed 2 alternative instruction entries
Expand Down
4 changes: 2 additions & 2 deletions bolt/test/X86/linux-orc.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
## Verify ORC bindings to instructions.

# RUN: llvm-bolt %t.exe --print-normalized --dump-orc --print-orc -o %t.out \
# RUN: --bolt-info=0 |& FileCheck %s
# RUN: --keep-nops=0 --bolt-info=0 |& FileCheck %s


## Verify ORC bindings after rewrite.
Expand All @@ -37,7 +37,7 @@

## Verify ORC binding after rewrite when some of the functions are skipped.

# RUN: llvm-bolt %t.exe -o %t.out --skip-funcs=bar --bolt-info=0
# RUN: llvm-bolt %t.exe -o %t.out --skip-funcs=bar --bolt-info=0 --keep-nops=0
# RUN: llvm-bolt %t.out -o %t.out.1 --print-normalized --print-orc \
# RUN: |& FileCheck %s

Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/linux-parainstructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

## Verify paravirtual bindings to instructions.

# RUN: llvm-bolt %t.exe --print-normalized -o %t.out | FileCheck %s
# RUN: llvm-bolt %t.exe --print-normalized -o %t.out --keep-nops=0 | FileCheck %s

# CHECK: BOLT-INFO: Linux kernel binary detected
# CHECK: BOLT-INFO: parsed 2 paravirtual patch sites
Expand Down
74 changes: 74 additions & 0 deletions bolt/test/X86/yaml-secondary-entry-discriminator.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# This reproduces a bug with BOLT setting incorrect discriminator for
# secondary entry points in YAML profile.

# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata -w %t.yaml --print-profile \
# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG
# RUN: FileCheck %s -input-file %t.yaml
# CHECK: - name: main
# CHECK-NEXT: fid: 2
# CHECK-NEXT: hash: 0xADF270D550151185
# CHECK-NEXT: exec: 0
# CHECK-NEXT: nblocks: 4
# CHECK-NEXT: blocks:
# CHECK: - bid: 1
# CHECK-NEXT: insns: 1
# CHECK-NEXT: hash: 0x36A303CBA4360014
# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ]
# CHECK: - bid: 2
# CHECK-NEXT: insns: 5
# CHECK-NEXT: hash: 0x8B2F5747CD0019
# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ]

# Make sure that the profile is attached correctly
# RUN: llvm-bolt %t.exe -o %t.out --data %t.yaml --print-profile \
# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG

# CHECK-CFG: Binary Function "main" after attaching profile {
# CHECK-CFG: callq secondary_entry # Offset: [[#]] # Count: 1
# CHECK-CFG: callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) :
# CHECK-CFG-NEXT: { secondary_entry: 1 (1 misses) }

.globl func
.type func, @function
func:
# FDATA: 0 [unknown] 0 1 func 0 1 0
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
.globl secondary_entry
secondary_entry:
popq %rbp
retq
nopl (%rax)
.cfi_endproc
.size func, .-func

.globl main
.type main, @function
main:
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl $0, -4(%rbp)
testq %rax, %rax
jne Lindcall
Lcall:
call secondary_entry
# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
Lindcall:
callq *%rax
# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
# For relocations against .text
call exit
.cfi_endproc
.size main, .-main
2 changes: 1 addition & 1 deletion clang-tools-extra/clang-tidy/ClangTidy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ class ErrorReporter {
if (!tooling::applyAllReplacements(Replacements.get(), Rewrite)) {
llvm::errs() << "Can't apply replacements for file " << File << "\n";
}
AnyNotWritten &= Rewrite.overwriteChangedFiles();
AnyNotWritten |= Rewrite.overwriteChangedFiles();
}

if (AnyNotWritten) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ void IncDecInConditionsCheck::registerMatchers(MatchFinder *Finder) {
anyOf(binaryOperator(anyOf(isComparisonOperator(), isLogicalOperator())),
cxxOperatorCallExpr(isComparisonOperator())));

auto IsInUnevaluatedContext =
expr(anyOf(hasAncestor(expr(matchers::hasUnevaluatedContext())),
hasAncestor(typeLoc())));

Finder->addMatcher(
expr(
OperatorMatcher, unless(isExpansionInSystemHeader()),
Expand All @@ -42,12 +46,14 @@ void IncDecInConditionsCheck::registerMatchers(MatchFinder *Finder) {
cxxOperatorCallExpr(
isPrePostOperator(),
hasUnaryOperand(expr().bind("operand")))),
unless(IsInUnevaluatedContext),
hasAncestor(
expr(equalsBoundNode("parent"),
hasDescendant(
expr(unless(equalsBoundNode("operand")),
matchers::isStatementIdenticalToBoundNode(
"operand"))
"operand"),
unless(IsInUnevaluatedContext))
.bind("second")))))
.bind("operator"))),
this);
Expand Down
41 changes: 33 additions & 8 deletions clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,34 @@ namespace {
AST_MATCHER(FunctionDecl, isUserDefineLiteral) {
return Node.getLiteralIdentifier() != nullptr;
}

AST_MATCHER(TypeLoc, isValidAndNotInMacro) {
const SourceLocation Loc = Node.getBeginLoc();
return Loc.isValid() && !Loc.isMacroID();
}

AST_MATCHER(TypeLoc, isBuiltinType) {
TypeLoc TL = Node;
if (auto QualLoc = Node.getAs<QualifiedTypeLoc>())
TL = QualLoc.getUnqualifiedLoc();

const auto BuiltinLoc = TL.getAs<BuiltinTypeLoc>();
if (!BuiltinLoc)
return false;

switch (BuiltinLoc.getTypePtr()->getKind()) {
case BuiltinType::Short:
case BuiltinType::Long:
case BuiltinType::LongLong:
case BuiltinType::UShort:
case BuiltinType::ULong:
case BuiltinType::ULongLong:
return true;
default:
return false;
}
}

} // namespace

namespace tidy::google::runtime {
Expand All @@ -63,11 +91,11 @@ void IntegerTypesCheck::registerMatchers(MatchFinder *Finder) {
// "Where possible, avoid passing arguments of types specified by
// bitwidth typedefs to printf-based APIs."
Finder->addMatcher(
typeLoc(loc(isInteger()),
unless(anyOf(hasAncestor(callExpr(
callee(functionDecl(hasAttr(attr::Format))))),
hasParent(parmVarDecl(hasAncestor(
functionDecl(isUserDefineLiteral())))))))
typeLoc(loc(isInteger()), isValidAndNotInMacro(), isBuiltinType(),
unless(hasAncestor(
callExpr(callee(functionDecl(hasAttr(attr::Format)))))),
unless(hasParent(parmVarDecl(
hasAncestor(functionDecl(isUserDefineLiteral()))))))
.bind("tl"),
this);
IdentTable = std::make_unique<IdentifierTable>(getLangOpts());
Expand All @@ -77,9 +105,6 @@ void IntegerTypesCheck::check(const MatchFinder::MatchResult &Result) {
auto TL = *Result.Nodes.getNodeAs<TypeLoc>("tl");
SourceLocation Loc = TL.getBeginLoc();

if (Loc.isInvalid() || Loc.isMacroID())
return;

// Look through qualification.
if (auto QualLoc = TL.getAs<QualifiedTypeLoc>())
TL = QualLoc.getUnqualifiedLoc();
Expand Down
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class IntegerTypesCheck : public ClangTidyCheck {
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
std::optional<TraversalKind> getCheckTraversalKind() const override {
return TK_IgnoreUnlessSpelledInSource;
}

private:
const StringRef UnsignedTypePrefix;
Expand Down
24 changes: 21 additions & 3 deletions clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "UseUsingCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclGroup.h"
#include "clang/Lex/Lexer.h"

using namespace clang::ast_matchers;
Expand All @@ -24,6 +25,7 @@ static constexpr llvm::StringLiteral ExternCDeclName = "extern-c-decl";
static constexpr llvm::StringLiteral ParentDeclName = "parent-decl";
static constexpr llvm::StringLiteral TagDeclName = "tag-decl";
static constexpr llvm::StringLiteral TypedefName = "typedef";
static constexpr llvm::StringLiteral DeclStmtName = "decl-stmt";

UseUsingCheck::UseUsingCheck(StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Expand All @@ -41,7 +43,8 @@ void UseUsingCheck::registerMatchers(MatchFinder *Finder) {
unless(isInstantiated()),
optionally(hasAncestor(
linkageSpecDecl(isExternCLinkage()).bind(ExternCDeclName))),
hasParent(decl().bind(ParentDeclName)))
anyOf(hasParent(decl().bind(ParentDeclName)),
hasParent(declStmt().bind(DeclStmtName))))
.bind(TypedefName),
this);

Expand All @@ -51,17 +54,32 @@ void UseUsingCheck::registerMatchers(MatchFinder *Finder) {
tagDecl(
anyOf(allOf(unless(anyOf(isImplicit(),
classTemplateSpecializationDecl())),
hasParent(decl().bind(ParentDeclName))),
anyOf(hasParent(decl().bind(ParentDeclName)),
hasParent(declStmt().bind(DeclStmtName)))),
// We want the parent of the ClassTemplateDecl, not the parent
// of the specialization.
classTemplateSpecializationDecl(hasAncestor(classTemplateDecl(
hasParent(decl().bind(ParentDeclName)))))))
anyOf(hasParent(decl().bind(ParentDeclName)),
hasParent(declStmt().bind(DeclStmtName))))))))
.bind(TagDeclName),
this);
}

void UseUsingCheck::check(const MatchFinder::MatchResult &Result) {
const auto *ParentDecl = Result.Nodes.getNodeAs<Decl>(ParentDeclName);

if (!ParentDecl) {
const auto *ParentDeclStmt = Result.Nodes.getNodeAs<DeclStmt>(DeclStmtName);
if (ParentDeclStmt) {
if (ParentDeclStmt->isSingleDecl())
ParentDecl = ParentDeclStmt->getSingleDecl();
else
ParentDecl =
ParentDeclStmt->getDeclGroup().getDeclGroup()
[ParentDeclStmt->getDeclGroup().getDeclGroup().size() - 1];
}
}

if (!ParentDecl)
return;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ class StaticDefinitionInAnonymousNamespaceCheck : public ClangTidyCheck {
: ClangTidyCheck(Name, Context) {}
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
return LangOpts.CPlusPlus;
}
std::optional<TraversalKind> getCheckTraversalKind() const override {
return TK_IgnoreUnlessSpelledInSource;
}
};

} // namespace clang::tidy::readability
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/ClangdLSPServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1390,7 +1390,7 @@ void ClangdLSPServer::onClangdInlayHints(const InlayHintsParams &Params,
// Extension doesn't have paddingLeft/Right so adjust the label
// accordingly.
{"label",
((Hint.paddingLeft ? " " : "") + llvm::StringRef(Hint.label) +
((Hint.paddingLeft ? " " : "") + llvm::StringRef(Hint.joinLabels()) +
(Hint.paddingRight ? " " : ""))
.str()},
});
Expand Down
5 changes: 3 additions & 2 deletions clang-tools-extra/clangd/InlayHints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,8 +977,9 @@ class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
return;
bool PadLeft = Prefix.consume_front(" ");
bool PadRight = Suffix.consume_back(" ");
Results.push_back(InlayHint{LSPPos, (Prefix + Label + Suffix).str(), Kind,
PadLeft, PadRight, LSPRange});
Results.push_back(InlayHint{LSPPos,
/*label=*/{(Prefix + Label + Suffix).str()},
Kind, PadLeft, PadRight, LSPRange});
}

// Get the range of the main file that *exactly* corresponds to R.
Expand Down
31 changes: 31 additions & 0 deletions clang-tools-extra/clangd/Protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1501,6 +1501,10 @@ bool operator<(const InlayHint &A, const InlayHint &B) {
return std::tie(A.position, A.range, A.kind, A.label) <
std::tie(B.position, B.range, B.kind, B.label);
}
std::string InlayHint::joinLabels() const {
return llvm::join(llvm::map_range(label, [](auto &L) { return L.value; }),
"");
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, InlayHintKind Kind) {
auto ToString = [](InlayHintKind K) {
Expand All @@ -1519,6 +1523,33 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, InlayHintKind Kind) {
return OS << ToString(Kind);
}

llvm::json::Value toJSON(const InlayHintLabelPart &L) {
llvm::json::Object Result{{"value", L.value}};
if (L.tooltip)
Result["tooltip"] = *L.tooltip;
if (L.location)
Result["location"] = *L.location;
if (L.command)
Result["command"] = *L.command;
return Result;
}

bool operator==(const InlayHintLabelPart &LHS, const InlayHintLabelPart &RHS) {
return std::tie(LHS.value, LHS.location) == std::tie(RHS.value, RHS.location);
}

bool operator<(const InlayHintLabelPart &LHS, const InlayHintLabelPart &RHS) {
return std::tie(LHS.value, LHS.location) < std::tie(RHS.value, RHS.location);
}

llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const InlayHintLabelPart &L) {
OS << L.value;
if (L.location)
OS << " (" << L.location << ")";
return OS;
}

static const char *toString(OffsetEncoding OE) {
switch (OE) {
case OffsetEncoding::UTF8:
Expand Down
47 changes: 46 additions & 1 deletion clang-tools-extra/clangd/Protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,48 @@ enum class InlayHintKind {
};
llvm::json::Value toJSON(const InlayHintKind &);

/// An inlay hint label part allows for interactive and composite labels
/// of inlay hints.
struct InlayHintLabelPart {

InlayHintLabelPart() = default;

InlayHintLabelPart(std::string value,
std::optional<Location> location = std::nullopt)
: value(std::move(value)), location(std::move(location)) {}

/// The value of this label part.
std::string value;

/// The tooltip text when you hover over this label part. Depending on
/// the client capability `inlayHint.resolveSupport`, clients might resolve
/// this property late using the resolve request.
std::optional<MarkupContent> tooltip;

/// An optional source code location that represents this
/// label part.
///
/// The editor will use this location for the hover and for code navigation
/// features: This part will become a clickable link that resolves to the
/// definition of the symbol at the given location (not necessarily the
/// location itself), it shows the hover that shows at the given location,
/// and it shows a context menu with further code navigation commands.
///
/// Depending on the client capability `inlayHint.resolveSupport` clients
/// might resolve this property late using the resolve request.
std::optional<Location> location;

/// An optional command for this label part.
///
/// Depending on the client capability `inlayHint.resolveSupport` clients
/// might resolve this property late using the resolve request.
std::optional<Command> command;
};
llvm::json::Value toJSON(const InlayHintLabelPart &);
bool operator==(const InlayHintLabelPart &, const InlayHintLabelPart &);
bool operator<(const InlayHintLabelPart &, const InlayHintLabelPart &);
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const InlayHintLabelPart &);

/// Inlay hint information.
struct InlayHint {
/// The position of this hint.
Expand All @@ -1698,7 +1740,7 @@ struct InlayHint {
/// InlayHintLabelPart label parts.
///
/// *Note* that neither the string nor the label part can be empty.
std::string label;
std::vector<InlayHintLabelPart> label;

/// The kind of this hint. Can be omitted in which case the client should fall
/// back to a reasonable default.
Expand All @@ -1724,6 +1766,9 @@ struct InlayHint {
/// The range allows clients more flexibility of when/how to display the hint.
/// This is an (unserialized) clangd extension.
Range range;

/// Join the label[].value together.
std::string joinLabels() const;
};
llvm::json::Value toJSON(const InlayHint &);
bool operator==(const InlayHint &, const InlayHint &);
Expand Down
6 changes: 5 additions & 1 deletion clang-tools-extra/clangd/test/inlayHints.test
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@
# CHECK-NEXT: "result": [
# CHECK-NEXT: {
# CHECK-NEXT: "kind": 2,
# CHECK-NEXT: "label": "bar:",
# CHECK-NEXT: "label": [
# CHECK-NEXT: {
# CHECK-NEXT: "value": "bar:"
# CHECK-NEXT: }
# CHECK-NEXT: ],
# CHECK-NEXT: "paddingLeft": false,
# CHECK-NEXT: "paddingRight": true,
# CHECK-NEXT: "position": {
Expand Down
8 changes: 7 additions & 1 deletion clang-tools-extra/clangd/tool/Check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,13 @@ class Checker {
auto Hints = inlayHints(*AST, LineRange);

for (const auto &Hint : Hints) {
vlog(" {0} {1} {2}", Hint.kind, Hint.position, Hint.label);
vlog(" {0} {1} [{2}]", Hint.kind, Hint.position, [&] {
return llvm::join(llvm::map_range(Hint.label,
[&](auto &L) {
return llvm::formatv("{{{0}}", L);
}),
", ");
}());
}
}

Expand Down
9 changes: 5 additions & 4 deletions clang-tools-extra/clangd/unittests/InlayHintTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace clangd {

llvm::raw_ostream &operator<<(llvm::raw_ostream &Stream,
const InlayHint &Hint) {
return Stream << Hint.label << "@" << Hint.range;
return Stream << Hint.joinLabels() << "@" << Hint.range;
}

namespace {
Expand Down Expand Up @@ -57,10 +57,11 @@ struct ExpectedHint {

MATCHER_P2(HintMatcher, Expected, Code, llvm::to_string(Expected)) {
llvm::StringRef ExpectedView(Expected.Label);
if (arg.label != ExpectedView.trim(" ") ||
std::string ResultLabel = arg.joinLabels();
if (ResultLabel != ExpectedView.trim(" ") ||
arg.paddingLeft != ExpectedView.starts_with(" ") ||
arg.paddingRight != ExpectedView.ends_with(" ")) {
*result_listener << "label is '" << arg.label << "'";
*result_listener << "label is '" << ResultLabel << "'";
return false;
}
if (arg.range != Code.range(Expected.RangeName)) {
Expand All @@ -72,7 +73,7 @@ MATCHER_P2(HintMatcher, Expected, Code, llvm::to_string(Expected)) {
return true;
}

MATCHER_P(labelIs, Label, "") { return arg.label == Label; }
MATCHER_P(labelIs, Label, "") { return arg.joinLabels() == Label; }

Config noHintsConfig() {
Config C;
Expand Down
35 changes: 25 additions & 10 deletions clang-tools-extra/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ Changes in existing checks
<clang-tidy/checks/bugprone/assert-side-effect>` check by detecting side
effect from calling a method with non-const reference parameters.

- Improved :doc:`bugprone-inc-dec-in-conditions
<clang-tidy/checks/bugprone/inc-dec-in-conditions>` check to ignore code
within unevaluated contexts, such as ``decltype``.

- Improved :doc:`bugprone-non-zero-enum-to-bool-conversion
<clang-tidy/checks/bugprone/non-zero-enum-to-bool-conversion>` check by
eliminating false positives resulting from direct usage of bitwise operators
Expand Down Expand Up @@ -176,13 +180,13 @@ Changes in existing checks
<clang-tidy/checks/cppcoreguidelines/owning-memory>` check to properly handle
return type in lambdas and in nested functions.

- Cleaned up :doc:`cppcoreguidelines-prefer-member-initializer
<clang-tidy/checks/cppcoreguidelines/prefer-member-initializer>`
- Improved :doc:`cppcoreguidelines-prefer-member-initializer
<clang-tidy/checks/cppcoreguidelines/prefer-member-initializer>` check
by removing enforcement of rule `C.48
<https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#c48-prefer-in-class-initializers-to-member-initializers-in-constructors-for-constant-initializers>`_,
which was deprecated since :program:`clang-tidy` 17. This rule is now covered
by :doc:`cppcoreguidelines-use-default-member-init
<clang-tidy/checks/cppcoreguidelines/use-default-member-init>` and fixes
<clang-tidy/checks/cppcoreguidelines/use-default-member-init>`. Fixed
incorrect hints when using list-initialization.

- Improved :doc:`google-build-namespaces
Expand All @@ -197,6 +201,9 @@ Changes in existing checks
<clang-tidy/checks/google/global-names-in-headers>` check by replacing the local
option `HeaderFileExtensions` by the global option of the same name.

- Improved :doc:`google-runtime-int <clang-tidy/checks/google/runtime-int>`
check performance through optimizations.

- Improved :doc:`llvm-header-guard
<clang-tidy/checks/llvm/header-guard>` check by replacing the local
option `HeaderFileExtensions` by the global option of the same name.
Expand Down Expand Up @@ -229,12 +236,20 @@ Changes in existing checks
<clang-tidy/checks/modernize/use-override>` check to also remove any trailing
whitespace when deleting the ``virtual`` keyword.

- Improved :doc:`modernize-use-using <clang-tidy/checks/modernize/use-using>`
check by adding support for detection of typedefs declared on function level.

- Improved :doc:`performance-unnecessary-copy-initialization
<clang-tidy/checks/performance/unnecessary-copy-initialization>` check by
detecting more cases of constant access. In particular, pointers can be
analyzed, se the check now handles the common patterns
`const auto e = (*vector_ptr)[i]` and `const auto e = vector_ptr->at(i);`.

- Improved :doc:`readability-identifier-naming
<clang-tidy/checks/readability/identifier-naming>` check in `GetConfigPerFile`
mode by resolving symbolic links to header files. Fixed handling of Hungarian
Prefix when configured to `LowerCase`.

- Improved :doc:`readability-implicit-bool-conversion
<clang-tidy/checks/readability/implicit-bool-conversion>` check to provide
valid fix suggestions for ``static_cast`` without a preceding space and
Expand All @@ -244,10 +259,10 @@ Changes in existing checks
<clang-tidy/checks/readability/redundant-inline-specifier>` check to properly
emit warnings for static data member with an in-class initializer.

- Improved :doc:`readability-identifier-naming
<clang-tidy/checks/readability/identifier-naming>` check in `GetConfigPerFile`
mode by resolving symbolic links to header files. Fixed handling of Hungarian
Prefix when configured to `LowerCase`.
- Improved :doc:`readability-static-definition-in-anonymous-namespace
<clang-tidy/checks/readability/static-definition-in-anonymous-namespace>`
check by resolving fix-it overlaps in template code by disregarding implicit
instances.

Removed checks
^^^^^^^^^^^^^^
Expand All @@ -258,9 +273,9 @@ Removed checks
Miscellaneous
^^^^^^^^^^^^^

- Fixed incorrect formatting in ``clang-apply-replacements`` when no ``--format``
option is specified. Now ``clang-apply-replacements`` applies formatting only with
the option.
- Fixed incorrect formatting in :program:`clang-apply-replacements` when no
``--format`` option is specified. Now :program:`clang-apply-replacements`
applies formatting only with the option.

Improvements to include-fixer
-----------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,13 @@ bool doubleCheck(Container<int> x) {
// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: decrementing and referencing a variable in a complex condition can cause unintended side-effects due to C++'s order of evaluation, consider moving the modification outside of the condition to avoid misunderstandings [bugprone-inc-dec-in-conditions]
// CHECK-MESSAGES: :[[@LINE-2]]:31: warning: incrementing and referencing a variable in a complex condition can cause unintended side-effects due to C++'s order of evaluation, consider moving the modification outside of the condition to avoid misunderstandings [bugprone-inc-dec-in-conditions]
}

namespace PR85838 {
void test()
{
auto foo = 0;
auto bar = 0;
if (++foo < static_cast<decltype(foo)>(bar)) {}
if (static_cast<decltype(++foo)>(bar) < foo) {}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %check_clang_tidy %s modernize-use-using %t -- -- -I %S/Inputs/use-using/
// RUN: %check_clang_tidy %s modernize-use-using %t -- -- -fno-delayed-template-parsing -I %S/Inputs/use-using/

typedef int Type;
// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' [modernize-use-using]
Expand Down Expand Up @@ -342,3 +342,44 @@ typedef int InExternCPP;
// CHECK-FIXES: using InExternCPP = int;

}

namespace ISSUE_72179
{
void foo()
{
typedef int a;
// CHECK-MESSAGES: :[[@LINE-1]]:5: warning: use 'using' instead of 'typedef' [modernize-use-using]
// CHECK-FIXES: using a = int;

}

void foo2()
{
typedef struct { int a; union { int b; }; } c;
// CHECK-MESSAGES: :[[@LINE-1]]:5: warning: use 'using' instead of 'typedef' [modernize-use-using]
// CHECK-FIXES: using c = struct { int a; union { int b; }; };
}

template <typename T>
void foo3()
{
typedef T b;
// CHECK-MESSAGES: :[[@LINE-1]]:5: warning: use 'using' instead of 'typedef' [modernize-use-using]
// CHECK-FIXES: using b = T;
}

template <typename T>
class MyClass
{
void foo()
{
typedef MyClass c;
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use 'using' instead of 'typedef' [modernize-use-using]
// CHECK-FIXES: using c = MyClass;
}
};

const auto foo4 = [](int a){typedef int d;};
// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: use 'using' instead of 'typedef' [modernize-use-using]
// CHECK-FIXES: const auto foo4 = [](int a){using d = int;};
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ static int c = 1;
} // namespace deep_inner
} // namespace inner

template<typename T>
static void printTemplate(T&&) {}
// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: 'printTemplate' is a static definition in anonymous namespace; static is redundant here [readability-static-definition-in-anonymous-namespace]
// CHECK-FIXES: {{^}}void printTemplate(T&&) {}

void testTemplate() {
printTemplate(5);
printTemplate(5U);
printTemplate("some string");
}

} // namespace

namespace N {
Expand Down
30 changes: 9 additions & 21 deletions clang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,16 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(CLANG_BUILT_STANDALONE TRUE)
endif()

# Make sure that our source directory is on the current cmake module path so that
# we can include cmake files from this directory.
list(INSERT CMAKE_MODULE_PATH 0
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
"${LLVM_COMMON_CMAKE_UTILS}/Modules"
)

# Must go below project(..)
include(GNUInstallDirs)
include(GetDarwinLinkerVersion)

if(CLANG_BUILT_STANDALONE)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")
Expand Down Expand Up @@ -140,13 +148,6 @@ if(CLANG_BUILT_STANDALONE)
endif() # LLVM_INCLUDE_TESTS
endif() # standalone

# Make sure that our source directory is on the current cmake module path so that
# we can include cmake files from this directory.
list(INSERT CMAKE_MODULE_PATH 0
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
"${LLVM_COMMON_CMAKE_UTILS}/Modules"
)

# This allows disabling clang's XML dependency even if LLVM finds libxml2.
# By default, clang depends on libxml2 if LLVM does.
option(CLANG_ENABLE_LIBXML2 "Whether libclang may depend on libxml2"
Expand Down Expand Up @@ -346,20 +347,7 @@ endif ()
# Determine HOST_LINK_VERSION on Darwin.
set(HOST_LINK_VERSION)
if (APPLE AND NOT CMAKE_LINKER MATCHES ".*lld.*")
set(LD_V_OUTPUT)
execute_process(
COMMAND sh -c "${CMAKE_LINKER} -v 2>&1 | head -1"
RESULT_VARIABLE HAD_ERROR
OUTPUT_VARIABLE LD_V_OUTPUT
)
if (HAD_ERROR)
message(FATAL_ERROR "${CMAKE_LINKER} failed with status ${HAD_ERROR}")
endif()
if ("${LD_V_OUTPUT}" MATCHES ".*ld64-([0-9.]+).*")
string(REGEX REPLACE ".*ld64-([0-9.]+).*" "\\1" HOST_LINK_VERSION ${LD_V_OUTPUT})
elseif ("${LD_V_OUTPUT}" MATCHES "[^0-9]*([0-9.]+).*")
string(REGEX REPLACE "[^0-9]*([0-9.]+).*" "\\1" HOST_LINK_VERSION ${LD_V_OUTPUT})
endif()
get_darwin_linker_version(HOST_LINK_VERSION)
message(STATUS "Host linker version: ${HOST_LINK_VERSION}")
endif()

Expand Down
2 changes: 1 addition & 1 deletion clang/cmake/caches/HLSL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")

# Include the DirectX target for DXIL code generation, eventually we'll include
# SPIR-V here too.
set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD DirectX CACHE STRING "")
set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "DirectX;SPIRV" CACHE STRING "")

# HLSL support is currently limted to clang, eventually it will expand to
# clang-tools-extra too.
Expand Down
145 changes: 145 additions & 0 deletions clang/docs/ClangFormatStyleOptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,151 @@ the configuration (without a prefix: ``Auto``).
}


.. _AlignConsecutiveTableGenBreakingDAGArgColons:

**AlignConsecutiveTableGenBreakingDAGArgColons** (``AlignConsecutiveStyle``) :versionbadge:`clang-format 19` :ref:`¶ <AlignConsecutiveTableGenBreakingDAGArgColons>`
Style of aligning consecutive TableGen DAGArg operator colons.
If enabled, align the colon inside DAGArg which have line break inside.
This works only when TableGenBreakInsideDAGArg is BreakElements or
BreakAll and the DAGArg is not excepted by
TableGenBreakingDAGArgOperators's effect.

.. code-block:: c++

let dagarg = (ins
a :$src1,
aa :$src2,
aaa:$src3
)

Nested configuration flags:

Alignment options.

They can also be read as a whole for compatibility. The choices are:
- None
- Consecutive
- AcrossEmptyLines
- AcrossComments
- AcrossEmptyLinesAndComments

For example, to align across empty lines and not across comments, either
of these work.

.. code-block:: c++

AlignConsecutiveTableGenBreakingDAGArgColons: AcrossEmptyLines

AlignConsecutiveTableGenBreakingDAGArgColons:
Enabled: true
AcrossEmptyLines: true
AcrossComments: false

* ``bool Enabled`` Whether aligning is enabled.

.. code-block:: c++

#define SHORT_NAME 42
#define LONGER_NAME 0x007f
#define EVEN_LONGER_NAME (2)
#define foo(x) (x * x)
#define bar(y, z) (y + z)

int a = 1;
int somelongname = 2;
double c = 3;

int aaaa : 1;
int b : 12;
int ccc : 8;

int aaaa = 12;
float b = 23;
std::string ccc;

* ``bool AcrossEmptyLines`` Whether to align across empty lines.

.. code-block:: c++

true:
int a = 1;
int somelongname = 2;
double c = 3;

int d = 3;

false:
int a = 1;
int somelongname = 2;
double c = 3;

int d = 3;

* ``bool AcrossComments`` Whether to align across comments.

.. code-block:: c++

true:
int d = 3;
/* A comment. */
double e = 4;
false:
int d = 3;
/* A comment. */
double e = 4;
* ``bool AlignCompound`` Only for ``AlignConsecutiveAssignments``. Whether compound assignments
like ``+=`` are aligned along with ``=``.

.. code-block:: c++

true:
a &= 2;
bbb = 2;

false:
a &= 2;
bbb = 2;

* ``bool AlignFunctionPointers`` Only for ``AlignConsecutiveDeclarations``. Whether function pointers are
aligned.

.. code-block:: c++

true:
unsigned i;
int &r;
int *p;
int (*f)();
false:
unsigned i;
int &r;
int *p;
int (*f)();
* ``bool PadOperators`` Only for ``AlignConsecutiveAssignments``. Whether short assignment
operators are left-padded to the same length as long ones in order to
put all assignment operators to the right of the left hand side.

.. code-block:: c++

true:
a >>= 2;
bbb = 2;

a = 2;
bbb >>= 2;

false:
a >>= 2;
bbb = 2;

a = 2;
bbb >>= 2;


.. _AlignConsecutiveTableGenCondOperatorColons:

**AlignConsecutiveTableGenCondOperatorColons** (``AlignConsecutiveStyle``) :versionbadge:`clang-format 19` :ref:`¶ <AlignConsecutiveTableGenCondOperatorColons>`
Expand Down
2 changes: 2 additions & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5420,10 +5420,12 @@ The following builtin intrinsics can be used in constant expressions:
* ``__builtin_clzl``
* ``__builtin_clzll``
* ``__builtin_clzs``
* ``__builtin_clzg``
* ``__builtin_ctz``
* ``__builtin_ctzl``
* ``__builtin_ctzll``
* ``__builtin_ctzs``
* ``__builtin_ctzg``
* ``__builtin_ffs``
* ``__builtin_ffsl``
* ``__builtin_ffsll``
Expand Down
27 changes: 27 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ ABI Changes in This Version
inline member function that contains a static local variable with a dynamic
initializer is declared with ``__declspec(dllimport)``. (#GH83616).

- Fixed Microsoft name mangling of lifetime extended temporary objects. This
change corrects missing back reference registrations that could result in
incorrect back reference indexes and suprising demangled name results. Since
MSVC uses a different mangling for these objects, compatibility is not affected.
(#GH85423).

AST Dumping Potentially Breaking Changes
----------------------------------------

Expand Down Expand Up @@ -182,6 +188,11 @@ Non-comprehensive list of changes in this release

- Lambda expressions are now accepted in C++03 mode as an extension.

- Added ``__builtin_clzg`` and ``__builtin_ctzg`` as type-generic alternatives
to ``__builtin_clz{,s,l,ll}`` and ``__builtin_ctz{,s,l,ll}`` respectively,
with support for any unsigned integer type. Like the previous builtins, these
new builtins are constexpr and may be used in constant expressions.

New Compiler Flags
------------------

Expand Down Expand Up @@ -289,6 +300,13 @@ Improvements to Clang's diagnostics
- Clang now correctly diagnoses no arguments to a variadic macro parameter as a C23/C++20 extension.
Fixes #GH84495.

- Clang no longer emits a ``-Wexit-time destructors`` warning on static variables explicitly
annotated with the ``clang::always_destroy`` attribute.
Fixes #GH68686, #GH86486

- ``-Wmicrosoft``, ``-Wgnu``, or ``-pedantic`` is now required to diagnose C99
flexible array members in a union or alone in a struct. Fixes GH#84565.

Improvements to Clang's time-trace
----------------------------------

Expand Down Expand Up @@ -437,6 +455,9 @@ Bug Fixes to C++ Support
- Clang's __builtin_bit_cast will now produce a constant value for records with empty bases. See:
(#GH82383)
- Fix a crash when instantiating a lambda that captures ``this`` outside of its context. Fixes (#GH85343).
- Fix an issue where a namespace alias could be defined using a qualified name (all name components
following the first `::` were ignored).
- Fix an out-of-bounds crash when checking the validity of template partial specializations. (part of #GH86757).

Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -520,6 +541,7 @@ RISC-V Support
^^^^^^^^^^^^^^

- ``__attribute__((rvv_vector_bits(N)))`` is now supported for RVV vbool*_t types.
- Profile names in ``-march`` option are now supported.

CUDA/HIP Language Changes
^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -576,6 +598,7 @@ Static Analyzer
- Fixed crashing on loops if the loop variable was declared in switch blocks
but not under any case blocks if ``unroll-loops=true`` analyzer config is
set. (#GH68819)
- Support C++23 static operator calls. (#GH84972)

New features
^^^^^^^^^^^^
Expand Down Expand Up @@ -607,6 +630,10 @@ Sanitizers
manually disable potentially noisy signed integer overflow checks with
``-fno-sanitize=signed-integer-overflow``

- ``-fsanitize=cfi -fsanitize-cfi-cross-dso`` (cross-DSO CFI instrumentation)
now generates the ``__cfi_check`` function with proper target-specific
attributes, for example allowing unwind table generation.

Python Binding Changes
----------------------

Expand Down
60 changes: 47 additions & 13 deletions clang/docs/UsersManual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2441,20 +2441,39 @@ usual build cycle when using sample profilers for optimization:

1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
requirement is that you add ``-gline-tables-only`` or ``-g`` to the
command line. This is important for the profiler to be able to map
instructions back to source line locations.
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
to map instructions back to source line locations.

On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:

.. code-block:: console
$ clang++ -O2 -gline-tables-only code.cc -o code
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
``-gdwarf`` to include DWARF debug information:

.. code-block:: console
$ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld -link -debug:dwarf
2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
into the format that the LLVM optimizer understands. Currently, there
exists a conversion tool for the Linux Perf profiler
(https://perf.wiki.kernel.org/), so these examples assume that you
are using Linux Perf to profile your code.
into the format that the LLVM optimizer understands.

Two such profilers are the the Linux Perf profiler
(https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
available as part of `Intel VTune
<https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/vtune-profiler.html>`_.
While Perf is Linux-specific, SEP can be used on Linux, Windows, and FreeBSD.

The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
external project, `AutoFDO <https://github.com/google/autofdo>`_, also
provides a ``create_llvm_prof`` tool which supports Linux Perf output.

When using Perf:

.. code-block:: console
Expand All @@ -2465,11 +2484,19 @@ usual build cycle when using sample profilers for optimization:
it provides better call information, which improves the accuracy of
the profile data.

3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
installed, you can convert the ``perf.data`` file to LLVM using
the command:
When using SEP:

.. code-block:: console
$ sep -start -out code.tb7 -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr no_filter:usr -perf-script brstack -app ./code
This produces a ``code.perf.data.script`` output which can be used with
``llvm-profgen``'s ``--perfscript`` input option.

3. Convert the collected profile data to LLVM's sample profile format. This is
currently supported via the `AutoFDO <https://github.com/google/autofdo>`_
converter ``create_llvm_prof``. Once built and installed, you can convert
the ``perf.data`` file to LLVM using the command:

.. code-block:: console
Expand All @@ -2485,7 +2512,14 @@ usual build cycle when using sample profilers for optimization:

.. code-block:: console
$ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
$ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
When using SEP the output is in the textual format corresponding to
``llvm-profgen --perfscript``. For example:

.. code-block:: console
$ llvm-profgen --binary=./code --output=code.prof --perfscript=code.perf.data.script
4. Build the code again using the collected profile. This step feeds
Expand Down
152 changes: 126 additions & 26 deletions clang/docs/analyzer/checkers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,51 @@ cplusplus
C++ Checkers.
.. _cplusplus-ArrayDelete:
cplusplus.ArrayDelete (C++)
"""""""""""""""""""""""""""
Reports destructions of arrays of polymorphic objects that are destructed as
their base class. If the dynamic type of the array is different from its static
type, calling `delete[]` is undefined.
This checker corresponds to the SEI CERT rule `EXP51-CPP: Do not delete an array through a pointer of the incorrect type <https://wiki.sei.cmu.edu/confluence/display/cplusplus/EXP51-CPP.+Do+not+delete+an+array+through+a+pointer+of+the+incorrect+type>`_.
.. code-block:: cpp
class Base {
public:
virtual ~Base() {}
};
class Derived : public Base {};
Base *create() {
Base *x = new Derived[10]; // note: Casting from 'Derived' to 'Base' here
return x;
}
void foo() {
Base *x = create();
delete[] x; // warn: Deleting an array of 'Derived' objects as their base class 'Base' is undefined
}
**Limitations**
The checker does not emit note tags when casting to and from reference types,
even though the pointer values are tracked across references.
.. code-block:: cpp
void foo() {
Derived *d = new Derived[10];
Derived &dref = *d;
Base &bref = static_cast<Base&>(dref); // no note
Base *b = &bref;
delete[] b; // warn: Deleting an array of 'Derived' objects as their base class 'Base' is undefined
}
.. _cplusplus-InnerPointer:
cplusplus.InnerPointer (C++)
Expand Down Expand Up @@ -804,10 +849,89 @@ Check for performance anti-patterns when using Grand Central Dispatch.
.. _optin-performance-Padding:
optin.performance.Padding
"""""""""""""""""""""""""
optin.performance.Padding (C, C++, ObjC)
""""""""""""""""""""""""""""""""""""""""
Check for excessively padded structs.
This checker detects structs with excessive padding, which can lead to wasted
memory thus decreased performance by reducing the effectiveness of the
processor cache. Padding bytes are added by compilers to align data accesses
as some processors require data to be aligned to certain boundaries. On others,
unaligned data access are possible, but impose significantly larger latencies.
To avoid padding bytes, the fields of a struct should be ordered by decreasing
by alignment. Usually, its easier to think of the ``sizeof`` of the fields, and
ordering the fields by ``sizeof`` would usually also lead to the same optimal
layout.
In rare cases, one can use the ``#pragma pack(1)`` directive to enforce a packed
layout too, but it can significantly increase the access times, so reordering the
fields is usually a better solution.
.. code-block:: cpp
// warn: Excessive padding in 'struct NonOptimal' (35 padding bytes, where 3 is optimal)
struct NonOptimal {
char c1;
// 7 bytes of padding
std::int64_t big1; // 8 bytes
char c2;
// 7 bytes of padding
std::int64_t big2; // 8 bytes
char c3;
// 7 bytes of padding
std::int64_t big3; // 8 bytes
char c4;
// 7 bytes of padding
std::int64_t big4; // 8 bytes
char c5;
// 7 bytes of padding
};
static_assert(sizeof(NonOptimal) == 4*8+5+5*7);
// no-warning: The fields are nicely aligned to have the minimal amount of padding bytes.
struct Optimal {
std::int64_t big1; // 8 bytes
std::int64_t big2; // 8 bytes
std::int64_t big3; // 8 bytes
std::int64_t big4; // 8 bytes
char c1;
char c2;
char c3;
char c4;
char c5;
// 3 bytes of padding
};
static_assert(sizeof(Optimal) == 4*8+5+3);
// no-warning: Bit packing representation is also accepted by this checker, but
// it can significantly increase access times, so prefer reordering the fields.
#pragma pack(1)
struct BitPacked {
char c1;
std::int64_t big1; // 8 bytes
char c2;
std::int64_t big2; // 8 bytes
char c3;
std::int64_t big3; // 8 bytes
char c4;
std::int64_t big4; // 8 bytes
char c5;
};
static_assert(sizeof(BitPacked) == 4*8+5);
The ``AllowedPad`` option can be used to specify a threshold for the number
padding bytes raising the warning. If the number of padding bytes of the struct
and the optimal number of padding bytes differ by more than the threshold value,
a warning will be raised.
By default, the ``AllowedPad`` threshold is 24 bytes.
To override this threshold to e.g. 4 bytes, use the
``-analyzer-config optin.performance.Padding:AllowedPad=4`` option.
.. _optin-portability-UnixAPI:
optin.portability.UnixAPI
Expand Down Expand Up @@ -2139,30 +2263,6 @@ Either the comparison is useless or there is division by zero.
alpha.cplusplus
^^^^^^^^^^^^^^^
.. _alpha-cplusplus-ArrayDelete:
alpha.cplusplus.ArrayDelete (C++)
"""""""""""""""""""""""""""""""""
Reports destructions of arrays of polymorphic objects that are destructed as their base class.
This checker corresponds to the CERT rule `EXP51-CPP: Do not delete an array through a pointer of the incorrect type <https://wiki.sei.cmu.edu/confluence/display/cplusplus/EXP51-CPP.+Do+not+delete+an+array+through+a+pointer+of+the+incorrect+type>`_.
.. code-block:: cpp
class Base {
virtual ~Base() {}
};
class Derived : public Base {}
Base *create() {
Base *x = new Derived[10]; // note: Casting from 'Derived' to 'Base' here
return x;
}
void foo() {
Base *x = create();
delete[] x; // warn: Deleting an array of 'Derived' objects as their base class 'Base' is undefined
}
.. _alpha-cplusplus-DeleteWithNonVirtualDtor:
alpha.cplusplus.DeleteWithNonVirtualDtor (C++)
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang-c/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -2991,6 +2991,7 @@ enum CXCallingConv {
CXCallingConv_AArch64SVEPCS = 18,
CXCallingConv_M68kRTD = 19,
CXCallingConv_PreserveNone = 20,
CXCallingConv_RISCVVectorCall = 21,

CXCallingConv_Invalid = 100,
CXCallingConv_Unexposed = 200
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/AST/DeclContextInternals.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class StoredDeclsList {
Data.setPointer(Head);
}

/// Return an array of all the decls that this list represents.
/// Return the list of all the decls.
DeclContext::lookup_result getLookupResult() const {
return DeclContext::lookup_result(Data.getPointer());
}
Expand Down
Loading