8 changes: 0 additions & 8 deletions bolt/lib/Core/BinaryData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,6 @@ bool BinaryData::hasName(StringRef Name) const {
return false;
}

bool BinaryData::hasNameRegex(StringRef NameRegex) const {
Regex MatchName(NameRegex);
for (const MCSymbol *Symbol : Symbols)
if (MatchName.match(Symbol->getName()))
return true;
return false;
}

bool BinaryData::nameStartsWith(StringRef Prefix) const {
for (const MCSymbol *Symbol : Symbols)
if (Symbol->getName().starts_with(Prefix))
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,

// Emit sized NOPs via MCAsmBackend::writeNopData() interface on x86.
// This is a workaround for invalid NOPs handling by asm/disasm layer.
if (BC.MIB->isNoop(Instr) && BC.isX86()) {
if (BC.isX86() && BC.MIB->isNoop(Instr)) {
if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr)) {
SmallString<15> Code;
raw_svector_ostream VecOS(Code);
Expand Down
215 changes: 156 additions & 59 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,60 +774,95 @@ static bool isPCRelativeRISCV(uint64_t Type) {
}

bool Relocation::isSupported(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
return false;
case Triple::aarch64:
return isSupportedAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return isSupportedRISCV(Type);
return isSupportedX86(Type);
case Triple::x86_64:
return isSupportedX86(Type);
}
}

size_t Relocation::getSizeForType(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return getSizeForTypeAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return getSizeForTypeRISCV(Type);
return getSizeForTypeX86(Type);
case Triple::x86_64:
return getSizeForTypeX86(Type);
}
}

bool Relocation::skipRelocationType(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return skipRelocationTypeAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return skipRelocationTypeRISCV(Type);
return skipRelocationTypeX86(Type);
case Triple::x86_64:
return skipRelocationTypeX86(Type);
}
}

bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return skipRelocationProcessAArch64(Type, Contents);
if (Arch == Triple::riscv64)
skipRelocationProcessRISCV(Type, Contents);
return skipRelocationProcessX86(Type, Contents);
case Triple::riscv64:
return skipRelocationProcessRISCV(Type, Contents);
case Triple::x86_64:
return skipRelocationProcessX86(Type, Contents);
}
}

uint64_t Relocation::encodeValue(uint64_t Type, uint64_t Value, uint64_t PC) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return encodeValueAArch64(Type, Value, PC);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return encodeValueRISCV(Type, Value, PC);
return encodeValueX86(Type, Value, PC);
case Triple::x86_64:
return encodeValueX86(Type, Value, PC);
}
}

uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents,
uint64_t PC) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return extractValueAArch64(Type, Contents, PC);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return extractValueRISCV(Type, Contents, PC);
return extractValueX86(Type, Contents, PC);
case Triple::x86_64:
return extractValueX86(Type, Contents, PC);
}
}

bool Relocation::isGOT(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return isGOTAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return isGOTRISCV(Type);
return isGOTX86(Type);
case Triple::x86_64:
return isGOTX86(Type);
}
}

bool Relocation::isX86GOTPCRELX(uint64_t Type) {
Expand All @@ -845,27 +880,42 @@ bool Relocation::isX86GOTPC64(uint64_t Type) {
bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }

bool Relocation::isRelative(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return Type == ELF::R_AARCH64_RELATIVE;
if (Arch == Triple::riscv64)
case Triple::riscv64:
return Type == ELF::R_RISCV_RELATIVE;
return Type == ELF::R_X86_64_RELATIVE;
case Triple::x86_64:
return Type == ELF::R_X86_64_RELATIVE;
}
}

bool Relocation::isIRelative(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return Type == ELF::R_AARCH64_IRELATIVE;
if (Arch == Triple::riscv64)
case Triple::riscv64:
llvm_unreachable("not implemented");
return Type == ELF::R_X86_64_IRELATIVE;
case Triple::x86_64:
return Type == ELF::R_X86_64_IRELATIVE;
}
}

bool Relocation::isTLS(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return isTLSAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return isTLSRISCV(Type);
return isTLSX86(Type);
case Triple::x86_64:
return isTLSX86(Type);
}
}

bool Relocation::isInstructionReference(uint64_t Type) {
Expand All @@ -882,49 +932,81 @@ bool Relocation::isInstructionReference(uint64_t Type) {
}

uint64_t Relocation::getNone() {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return ELF::R_AARCH64_NONE;
if (Arch == Triple::riscv64)
case Triple::riscv64:
return ELF::R_RISCV_NONE;
return ELF::R_X86_64_NONE;
case Triple::x86_64:
return ELF::R_X86_64_NONE;
}
}

uint64_t Relocation::getPC32() {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return ELF::R_AARCH64_PREL32;
if (Arch == Triple::riscv64)
case Triple::riscv64:
return ELF::R_RISCV_32_PCREL;
return ELF::R_X86_64_PC32;
case Triple::x86_64:
return ELF::R_X86_64_PC32;
}
}

uint64_t Relocation::getPC64() {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return ELF::R_AARCH64_PREL64;
if (Arch == Triple::riscv64)
case Triple::riscv64:
llvm_unreachable("not implemented");
return ELF::R_X86_64_PC64;
case Triple::x86_64:
return ELF::R_X86_64_PC64;
}
}

bool Relocation::isPCRelative(uint64_t Type) {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return isPCRelativeAArch64(Type);
if (Arch == Triple::riscv64)
case Triple::riscv64:
return isPCRelativeRISCV(Type);
return isPCRelativeX86(Type);
case Triple::x86_64:
return isPCRelativeX86(Type);
}
}

uint64_t Relocation::getAbs64() {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return ELF::R_AARCH64_ABS64;
if (Arch == Triple::riscv64)
case Triple::riscv64:
return ELF::R_RISCV_64;
return ELF::R_X86_64_64;
case Triple::x86_64:
return ELF::R_X86_64_64;
}
}

uint64_t Relocation::getRelative() {
if (Arch == Triple::aarch64)
switch (Arch) {
default:
llvm_unreachable("Unsupported architecture");
case Triple::aarch64:
return ELF::R_AARCH64_RELATIVE;
return ELF::R_X86_64_RELATIVE;
case Triple::riscv64:
llvm_unreachable("not implemented");
case Triple::x86_64:
return ELF::R_X86_64_RELATIVE;
}
}

size_t Relocation::emit(MCStreamer *Streamer) const {
Expand Down Expand Up @@ -982,32 +1064,47 @@ MCBinaryExpr::Opcode Relocation::getComposeOpcodeFor(uint64_t Type) {
}
}

#define ELF_RELOC(name, value) #name,

void Relocation::print(raw_ostream &OS) const {
static const char *X86RelocNames[] = {
#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
};
static const char *AArch64RelocNames[] = {
switch (Arch) {
default:
OS << "RType:" << Twine::utohexstr(Type);
break;

case Triple::aarch64:
static const char *const AArch64RelocNames[] = {
#define ELF_RELOC(name, value) #name,
#include "llvm/BinaryFormat/ELFRelocs/AArch64.def"
};
if (Arch == Triple::aarch64)
#undef ELF_RELOC
};
assert(Type < ArrayRef(AArch64RelocNames).size());
OS << AArch64RelocNames[Type];
else if (Arch == Triple::riscv64) {
break;

case Triple::riscv64:
// RISC-V relocations are not sequentially numbered so we cannot use an
// array
switch (Type) {
default:
llvm_unreachable("illegal RISC-V relocation");
#undef ELF_RELOC
#define ELF_RELOC(name, value) \
case value: \
OS << #name; \
break;
#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
#undef ELF_RELOC
}
} else
break;

case Triple::x86_64:
static const char *const X86RelocNames[] = {
#define ELF_RELOC(name, value) #name,
#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
#undef ELF_RELOC
};
assert(Type < ArrayRef(X86RelocNames).size());
OS << X86RelocNames[Type];
break;
}
OS << ", 0x" << Twine::utohexstr(Offset);
if (Symbol) {
OS << ", " << Symbol->getName();
Expand Down
161 changes: 96 additions & 65 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
LLVM_DEBUG(dbgs() << " Address reference: 0x"
<< Twine::utohexstr(Function.getOutputAddress()) << "\n");
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress)));
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(InputAddress)));
LLVM_DEBUG(dbgs() << " Secondary Entry Points: " << NumSecondaryEntryPoints
<< '\n');

Expand Down Expand Up @@ -153,12 +153,13 @@ APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
return BitMask;
}

size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
uint32_t Skew) const {
size_t EqualOffsets = 0;
for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
const uint32_t OutputOffset = KeyVal.first;
const uint32_t InputOffset = KeyVal.second >> 1;
if (OutputOffset == InputOffset)
if (OutputOffset == InputOffset - Skew)
++EqualOffsets;
else
break;
Expand Down Expand Up @@ -196,18 +197,24 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
SecondaryEntryPointsMap.count(Address)
? SecondaryEntryPointsMap[Address].size()
: 0;
uint32_t Skew = 0;
if (Cold) {
size_t HotIndex =
std::distance(ColdPartSource.begin(), ColdPartSource.find(Address));
auto HotEntryIt = Maps.find(ColdPartSource[Address]);
assert(HotEntryIt != Maps.end());
size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
encodeULEB128(HotIndex - PrevIndex, OS);
PrevIndex = HotIndex;
// Skew of all input offsets for cold fragments is simply the first input
// offset.
Skew = Map.begin()->second >> 1;
encodeULEB128(Skew, OS);
} else {
// Function hash
size_t BFHash = getBFHash(HotInputAddress);
LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash));
OS.write(reinterpret_cast<char *>(&BFHash), 8);
// Number of basic blocks
size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks();
size_t NumBasicBlocks = NumBasicBlocksMap[HotInputAddress];
LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n');
encodeULEB128(NumBasicBlocks, OS);
// Secondary entry points
Expand All @@ -216,24 +223,21 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
<< '\n');
}
encodeULEB128(NumEntries, OS);
// For hot fragments only: encode the number of equal offsets
// (output = input) in the beginning of the function. Only encode one offset
// in these cases.
const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
if (!Cold) {
encodeULEB128(EqualElems, OS);
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BranchEntries: ";
SmallString<8> BitMaskStr;
BranchEntries.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << '\n';
});
}
// Encode the number of equal offsets (output = input - skew) in the
// beginning of the function. Only encode one offset in these cases.
const size_t EqualElems = getNumEqualOffsets(Map, Skew);
encodeULEB128(EqualElems, OS);
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BranchEntries: ";
SmallString<8> BitMaskStr;
BranchEntries.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << '\n';
});
}
const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
size_t Index = 0;
Expand Down Expand Up @@ -314,10 +318,12 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
uint64_t HotAddress = Cold ? 0 : Address;
PrevAddress = Address;
uint32_t SecondaryEntryPoints = 0;
uint64_t ColdInputSkew = 0;
if (Cold) {
HotIndex += DE.getULEB128(&Offset, &Err);
HotAddress = HotFuncs[HotIndex];
ColdPartSource.emplace(Address, HotAddress);
ColdInputSkew = DE.getULEB128(&Offset, &Err);
} else {
HotFuncs.push_back(Address);
// Function hash
Expand All @@ -338,28 +344,25 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
getULEB128Size(SecondaryEntryPoints)));
}
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
// Equal offsets, hot fragments only.
size_t EqualElems = 0;
// Equal offsets.
const size_t EqualElems = DE.getULEB128(&Offset, &Err);
APInt BEBitMask;
if (!Cold) {
EqualElems = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
EqualElems, getULEB128Size(EqualElems)));
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
LoadIntFromMemory(
BEBitMask,
reinterpret_cast<const uint8_t *>(
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BEBitMask: ";
SmallString<8> BitMaskStr;
BEBitMask.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
});
}
LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n", EqualElems,
getULEB128Size(EqualElems)));
if (EqualElems) {
const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
BEBitMask = APInt(alignTo(EqualElems, 8), 0);
LoadIntFromMemory(
BEBitMask,
reinterpret_cast<const uint8_t *>(
DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
BranchEntriesBytes);
LLVM_DEBUG({
dbgs() << "BEBitMask: ";
SmallString<8> BitMaskStr;
BEBitMask.toString(BitMaskStr, 2, false);
dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
});
}
MapTy Map;

Expand All @@ -374,7 +377,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
PrevAddress = OutputAddress;
int64_t InputDelta = 0;
if (J < EqualElems) {
InputOffset = (OutputOffset << 1) | BEBitMask[J];
InputOffset = ((OutputOffset + ColdInputSkew) << 1) | BEBitMask[J];
} else {
InputDelta = DE.getSLEB128(&Offset, &Err);
InputOffset += InputDelta;
Expand Down Expand Up @@ -425,8 +428,9 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
for (const auto &MapEntry : Maps) {
const uint64_t Address = MapEntry.first;
const uint64_t HotAddress = fetchParentAddress(Address);
const bool IsHotFunction = HotAddress == 0;
OS << "Function Address: 0x" << Twine::utohexstr(Address);
if (HotAddress == 0)
if (IsHotFunction)
OS << formatv(", hash: {0:x}", getBFHash(Address));
OS << "\n";
OS << "BB mappings:\n";
Expand All @@ -443,6 +447,8 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val));
OS << "\n";
}
if (IsHotFunction)
OS << "NumBlocks: " << NumBasicBlocksMap[Address] << '\n';
if (SecondaryEntryPointsMap.count(Address)) {
const std::vector<uint32_t> &SecondaryEntryPoints =
SecondaryEntryPointsMap[Address];
Expand Down Expand Up @@ -574,27 +580,52 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) {
// Set BF/BB metadata
for (const BinaryBasicBlock &BB : BF)
BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash());
NumBasicBlocksMap.emplace(BF.getAddress(), BF.size());
}
}

std::unordered_map<uint32_t, std::vector<uint32_t>>
BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const {
std::unordered_map<uint32_t, std::vector<uint32_t>> Branches;
auto FuncIt = Maps.find(OutputAddress);
assert(FuncIt != Maps.end());
std::vector<uint32_t> InputOffsets;
for (const auto &KV : FuncIt->second)
InputOffsets.emplace_back(KV.second);
// Sort with LSB BRANCHENTRY bit.
llvm::sort(InputOffsets);
uint32_t BBOffset{0};
for (uint32_t InOffset : InputOffsets) {
if (InOffset & BRANCHENTRY)
Branches[BBOffset].push_back(InOffset >> 1);
else
BBOffset = InOffset >> 1;
}
return Branches;
unsigned
BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address,
uint32_t Offset) const {
auto FunctionIt = SecondaryEntryPointsMap.find(Address);
if (FunctionIt == SecondaryEntryPointsMap.end())
return 0;
const std::vector<uint32_t> &Offsets = FunctionIt->second;
auto OffsetIt = std::find(Offsets.begin(), Offsets.end(), Offset);
if (OffsetIt == Offsets.end())
return 0;
// Adding one here because main entry point is not stored in BAT, and
// enumeration for secondary entry points starts with 1.
return OffsetIt - Offsets.begin() + 1;
}

std::pair<const BinaryFunction *, unsigned>
BoltAddressTranslation::translateSymbol(const BinaryContext &BC,
const MCSymbol &Symbol,
uint32_t Offset) const {
// The symbol could be a secondary entry in a cold fragment.
uint64_t SymbolValue = cantFail(errorOrToExpected(BC.getSymbolValue(Symbol)));

const BinaryFunction *Callee = BC.getFunctionForSymbol(&Symbol);
assert(Callee);

// Containing function, not necessarily the same as symbol value.
const uint64_t CalleeAddress = Callee->getAddress();
const uint32_t OutputOffset = SymbolValue - CalleeAddress;

const uint64_t ParentAddress = fetchParentAddress(CalleeAddress);
const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress;

const BinaryFunction *ParentBF = BC.getBinaryFunctionAtAddress(HotAddress);

const uint32_t InputOffset =
translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false) + Offset;

unsigned SecondaryEntryId{0};
if (InputOffset)
SecondaryEntryId = getSecondaryEntryPointId(HotAddress, InputOffset);

return std::pair(ParentBF, SecondaryEntryId);
}

} // namespace bolt
Expand Down
229 changes: 97 additions & 132 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,6 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
// BAT YAML is handled by DataAggregator since normal YAML output requires
// CFG which is not available in BAT mode.
if (usesBAT()) {
// Postprocess split function profile for BAT
fixupBATProfile(BC);
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
report_error("cannot create output data file", EC);
Expand Down Expand Up @@ -664,18 +662,19 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
/*UseMaxSize=*/true);
}

StringRef DataAggregator::getLocationName(BinaryFunction &Func,
uint64_t Count) {
BinaryFunction *
DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
if (BAT)
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
return getBinaryFunctionContainingAddress(HotAddr);
return nullptr;
}

StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
if (!BAT)
return Func.getOneName();

const BinaryFunction *OrigFunc = &Func;
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
NumColdSamples += Count;
BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
if (HotFunc)
OrigFunc = HotFunc;
}
// If it is a local function, prefer the name containing the file name where
// the local function was declared
for (StringRef AlternativeName : OrigFunc->getNames()) {
Expand All @@ -690,12 +689,17 @@ StringRef DataAggregator::getLocationName(BinaryFunction &Func,
return OrigFunc->getOneName();
}

bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
uint64_t Count) {
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc)
NumColdSamples += Count;

auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func, Count);
StringRef LocName = getLocationName(Func);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
Expand All @@ -715,22 +719,12 @@ bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
FuncBranchData *AggrData = getBranchData(Func);
if (!AggrData) {
AggrData = &NamesToBranches[Func.getOneName()];
AggrData->Name = getLocationName(Func, Count);
AggrData->Name = getLocationName(Func);
setBranchData(Func, AggrData);
}

From -= Func.getAddress();
To -= Func.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
if (BAT) {
From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
}

AggrData->bumpBranchCount(From, To, Count, Mispreds);
return true;
}
Expand All @@ -744,30 +738,24 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
StringRef SrcFunc;
StringRef DstFunc;
if (FromFunc) {
SrcFunc = getLocationName(*FromFunc, Count);
SrcFunc = getLocationName(*FromFunc);
FromAggrData = getBranchData(*FromFunc);
if (!FromAggrData) {
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
FromAggrData->Name = SrcFunc;
setBranchData(*FromFunc, FromAggrData);
}
From -= FromFunc->getAddress();
if (BAT)
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);

recordExit(*FromFunc, From, Mispreds, Count);
}
if (ToFunc) {
DstFunc = getLocationName(*ToFunc, 0);
DstFunc = getLocationName(*ToFunc);
ToAggrData = getBranchData(*ToFunc);
if (!ToAggrData) {
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
ToAggrData->Name = DstFunc;
setBranchData(*ToFunc, ToAggrData);
}
To -= ToFunc->getAddress();
if (BAT)
To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);

recordEntry(*ToFunc, To, Mispreds, Count);
}
Expand All @@ -783,15 +771,32 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}

return Func;
}
return nullptr;
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
recordBranch(*FromFunc, From - FromFunc->getAddress(),
To - FromFunc->getAddress(), Count, Mispreds);
if (FromFunc == ToFunc && To != 0) {
recordBranch(*FromFunc, From, To, Count, Mispreds);
return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
}

Expand Down Expand Up @@ -842,9 +847,14 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(), Count, false);
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
}

return true;
}
Expand Down Expand Up @@ -2273,29 +2283,6 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
return std::error_code();
}

void DataAggregator::fixupBATProfile(BinaryContext &BC) {
for (auto &[FuncName, Branches] : NamesToBranches) {
BinaryData *BD = BC.getBinaryDataByName(FuncName);
assert(BD);
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
// Convert inter-branches between hot and cold fragments into
// intra-branches.
for (auto &[OffsetFrom, CallToMap] : Branches.InterIndex) {
for (auto &[CallToLoc, CallToIdx] : CallToMap) {
if (CallToLoc.Name != FuncName)
continue;
Branches.IntraIndex[OffsetFrom][CallToLoc.Offset] = CallToIdx;
Branches.InterIndex[OffsetFrom].erase(CallToLoc);
}
}
}
}

std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const {
std::error_code EC;
Expand Down Expand Up @@ -2333,7 +2320,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
if (BAT->isBATFunction(Function.getAddress()))
continue;
BP.Functions.emplace_back(
YAMLProfileWriter::convert(Function, /*UseDFS=*/false));
YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
}

for (const auto &KV : NamesToBranches) {
Expand All @@ -2345,9 +2332,6 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
assert(BF);
YamlBF.Name = FuncName.str();
Expand All @@ -2357,87 +2341,68 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);

auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t SuccOffset, unsigned SuccDataIdx) {
for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
YamlBB.Index = Idx;

for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();

auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BlockMap.getBBIndex(SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
YamlBB.Successors.emplace_back(SI);
return SI;
};

std::unordered_map<uint32_t, std::vector<uint32_t>> BFBranches =
BAT->getBFBranches(FuncAddress);

auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t Offset) {
// Iterate over BRANCHENTRY records in the current block
for (uint32_t BranchOffset : BFBranches[Offset]) {
if (!Branches.InterIndex.contains(BranchOffset))
continue;
for (const auto &[CallToLoc, CallToIdx] :
Branches.InterIndex.at(BranchOffset)) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo YamlCSI;
YamlCSI.DestId = 0; // designated for unknown functions
YamlCSI.EntryDiscriminator = 0;
YamlCSI.Count = BI.Branches;
YamlCSI.Mispreds = BI.Mispreds;
YamlCSI.Offset = BranchOffset - Offset;
BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
if (!CallTargetBD) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
uint64_t CallTargetAddress = CallTargetBD->getAddress();
BinaryFunction *CallTargetBF =
BC.getBinaryFunctionAtAddress(CallTargetAddress);
if (!CallTargetBF) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
// Calls between hot and cold fragments must be handled in
// fixupBATProfile.
assert(CallTargetBF != BF && "invalid CallTargetBF");
YamlCSI.DestId = CallTargetBF->getFunctionNumber();
if (CallToLoc.Offset) {
if (BAT->isBATFunction(CallTargetAddress)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
"entry point in BAT function "
<< CallToLoc.Name << '\n');
} else if (const BinaryBasicBlock *CallTargetBB =
CallTargetBF->getBasicBlockAtOffset(
CallToLoc.Offset)) {
// Only record true call information, ignoring returns (normally
// won't have a target basic block) and jumps to the landing
// pads (not an entry point).
if (CallTargetBB->isEntryPoint()) {
YamlCSI.EntryDiscriminator =
CallTargetBF->getEntryIDForSymbol(
CallTargetBB->getLabel());
}
}
}
YamlBB.CallSites.emplace_back(YamlCSI);
}
}
auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx,
uint32_t Offset) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo CSI;
CSI.DestId = 0; // designated for unknown functions
CSI.EntryDiscriminator = 0;
CSI.Count = BI.Branches;
CSI.Mispreds = BI.Mispreds;
CSI.Offset = Offset;
if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
CallToLoc.Offset);
return CSI;
};

for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
if (!BlockMap.isInputBlock(FromOffset))
continue;
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
const unsigned Index = BlockMap.getBBIndex(FromOffset);
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index];
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
addCallsProfile(YamlBB, FromOffset);
if (YamlBB.ExecCount || !YamlBB.Successors.empty() ||
!YamlBB.CallSites.empty())
YamlBF.Blocks.emplace_back(YamlBB);
if (BlockMap.isInputBlock(SuccOffset))
YamlBB.Successors.emplace_back(
getSuccessorInfo(SuccOffset, SuccDataIdx));
}
for (const auto &[FromOffset, CallTo] : Branches.InterIndex) {
auto BlockIt = BlockMap.upper_bound(FromOffset);
--BlockIt;
const unsigned BlockOffset = BlockIt->first;
const unsigned BlockIndex = BlockIt->second.getBBIndex();
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
const uint32_t Offset = FromOffset - BlockOffset;
for (const auto &[CallToLoc, CallToIdx] : CallTo)
YamlBB.CallSites.emplace_back(
getCallSiteInfo(CallToLoc, CallToIdx, Offset));
llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo &A,
yaml::bolt::CallSiteInfo &B) {
return A.Offset < B.Offset;
});
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
return YamlBB.Hash == (yaml::Hex64)0;
});
BP.Functions.emplace_back(YamlBF);
}
}
Expand Down
23 changes: 14 additions & 9 deletions bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "bolt/Profile/YAMLProfileWriter.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "llvm/Support/CommandLine.h"
Expand All @@ -25,17 +26,19 @@ extern llvm::cl::opt<bool> ProfileUseDFS;
namespace llvm {
namespace bolt {

/// Set CallSiteInfo destination fields from \p Symbol and return a target
/// BinaryFunction for that symbol.
static const BinaryFunction *setCSIDestination(const BinaryContext &BC,
yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol) {
const BinaryFunction *YAMLProfileWriter::setCSIDestination(
const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
uint32_t Offset) {
CSI.DestId = 0; // designated for unknown functions
CSI.EntryDiscriminator = 0;

if (Symbol) {
uint64_t EntryID = 0;
if (const BinaryFunction *const Callee =
if (const BinaryFunction *Callee =
BC.getFunctionForSymbol(Symbol, &EntryID)) {
if (BAT && BAT->isBATFunction(Callee->getAddress()))
std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
CSI.DestId = Callee->getFunctionNumber();
CSI.EntryDiscriminator = EntryID;
return Callee;
Expand All @@ -45,7 +48,8 @@ static const BinaryFunction *setCSIDestination(const BinaryContext &BC,
}

yaml::bolt::BinaryFunctionProfile
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const BoltAddressTranslation *BAT) {
yaml::bolt::BinaryFunctionProfile YamlBF;
const BinaryContext &BC = BF.getBinaryContext();

Expand Down Expand Up @@ -98,7 +102,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
continue;
for (const IndirectCallProfile &CSP : ICSP.get()) {
StringRef TargetName = "";
const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol);
const BinaryFunction *Callee =
setCSIDestination(BC, CSI, CSP.Symbol, BAT);
if (Callee)
TargetName = Callee->getOneName();
CSI.Count = CSP.Count;
Expand All @@ -109,7 +114,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) {
StringRef TargetName = "";
const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr);
const BinaryFunction *const Callee =
setCSIDestination(BC, CSI, CalleeSymbol);
setCSIDestination(BC, CSI, CalleeSymbol, BAT);
if (Callee)
TargetName = Callee->getOneName();

Expand Down
5 changes: 3 additions & 2 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {

Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized));

Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);
if (BC.isX86())
Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);

Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
opts::ICF);
Expand Down
50 changes: 24 additions & 26 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ Error RewriteInstance::discoverStorage() {
if (Error E = SectionNameOrErr.takeError())
return E;
StringRef SectionName = SectionNameOrErr.get();
if (SectionName == ".text") {
if (SectionName == BC->getMainCodeSectionName()) {
BC->OldTextSectionAddress = Section.getAddress();
BC->OldTextSectionSize = Section.getSize();

Expand Down Expand Up @@ -1670,7 +1670,9 @@ void RewriteInstance::disassemblePLT() {
return disassemblePLTSectionAArch64(Section);
if (BC->isRISCV())
return disassemblePLTSectionRISCV(Section);
return disassemblePLTSectionX86(Section, EntrySize);
if (BC->isX86())
return disassemblePLTSectionX86(Section, EntrySize);
llvm_unreachable("Unmplemented PLT");
};

for (BinarySection &Section : BC->allocatableSections()) {
Expand Down Expand Up @@ -1864,7 +1866,8 @@ Error RewriteInstance::readSpecialSections() {
"Use -update-debug-sections to keep it.\n";
}

HasTextRelocations = (bool)BC->getUniqueSectionByName(".rela.text");
HasTextRelocations = (bool)BC->getUniqueSectionByName(
".rela" + std::string(BC->getMainCodeSectionName()));
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
Expand Down Expand Up @@ -2305,9 +2308,13 @@ void RewriteInstance::processRelocations() {
return;

for (const SectionRef &Section : InputFile->sections()) {
if (cantFail(Section.getRelocatedSection()) != InputFile->section_end() &&
!BinarySection(*BC, Section).isAllocatable())
readRelocations(Section);
section_iterator SecIter = cantFail(Section.getRelocatedSection());
if (SecIter == InputFile->section_end())
continue;
if (BinarySection(*BC, Section).isAllocatable())
continue;

readRelocations(Section);
}

if (NumFailedRelocations)
Expand Down Expand Up @@ -2600,7 +2607,7 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
const bool IsToCode = ReferencedSection && ReferencedSection->isText();

// Special handling of PC-relative relocations.
if (!IsAArch64 && !BC->isRISCV() && Relocation::isPCRelative(RType)) {
if (BC->isX86() && Relocation::isPCRelative(RType)) {
if (!IsFromCode && IsToCode) {
// PC-relative relocations from data to code are tricky since the
// original information is typically lost after linking, even with
Expand Down Expand Up @@ -2854,15 +2861,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
BC->isRISCV())
ForceRelocation = true;

if (IsFromCode) {
if (IsFromCode)
ContainingBF->addRelocation(Rel.getOffset(), ReferencedSymbol, RType,
Addend, ExtractedValue);
} else if (IsToCode || ForceRelocation) {
else if (IsToCode || ForceRelocation)
BC->addRelocation(Rel.getOffset(), ReferencedSymbol, RType, Addend,
ExtractedValue);
} else {
else
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation from data to data\n");
}
}

void RewriteInstance::selectFunctionsToProcess() {
Expand Down Expand Up @@ -3441,7 +3447,8 @@ void RewriteInstance::emitAndLink() {
ErrorOr<BinarySection &> TextSection =
BC->getUniqueSectionByName(BC->getMainCodeSectionName());
if (BC->HasRelocations && TextSection)
BC->renameSection(*TextSection, getOrgSecPrefix() + ".text");
BC->renameSection(*TextSection,
getOrgSecPrefix() + BC->getMainCodeSectionName());

//////////////////////////////////////////////////////////////////////////////
// Assign addresses to new sections.
Expand Down Expand Up @@ -4298,18 +4305,17 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
for (auto &SectionKV : OutputSections) {
ELFShdrTy &Section = SectionKV.second;

// Ignore TLS sections as they don't take any space in the file.
// Ignore NOBITS sections as they don't take any space in the file.
if (Section.sh_type == ELF::SHT_NOBITS)
continue;

// Note that address continuity is not guaranteed as sections could be
// placed in different loadable segments.
if (PrevSection &&
PrevSection->sh_offset + PrevSection->sh_size > Section.sh_offset) {
if (opts::Verbosity > 1) {
if (opts::Verbosity > 1)
BC->outs() << "BOLT-INFO: adjusting size for section "
<< PrevBinSec->getOutputName() << '\n';
}
PrevSection->sh_size = Section.sh_offset - PrevSection->sh_offset;
}

Expand Down Expand Up @@ -4417,6 +4423,7 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
raw_fd_ostream &OS = Out->os();
const ELFFile<ELFT> &Obj = File->getELFFile();

// Mapping from old section indices to new ones
std::vector<uint32_t> NewSectionIndex;
std::vector<ELFShdrTy> OutputSections =
getOutputSections(File, NewSectionIndex);
Expand All @@ -4434,10 +4441,8 @@ void RewriteInstance::patchELFSectionHeaderTable(ELFObjectFile<ELFT> *File) {
// Write all section header entries while patching section references.
for (ELFShdrTy &Section : OutputSections) {
Section.sh_link = NewSectionIndex[Section.sh_link];
if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA) {
if (Section.sh_info)
Section.sh_info = NewSectionIndex[Section.sh_info];
}
if (Section.sh_type == ELF::SHT_REL || Section.sh_type == ELF::SHT_RELA)
Section.sh_info = NewSectionIndex[Section.sh_info];
OS.write(reinterpret_cast<const char *>(&Section), sizeof(Section));
}

Expand Down Expand Up @@ -5762,10 +5767,3 @@ bool RewriteInstance::isDebugSection(StringRef SectionName) {

return false;
}

bool RewriteInstance::isKSymtabSection(StringRef SectionName) {
if (SectionName.starts_with("__ksymtab"))
return true;

return false;
}
7 changes: 6 additions & 1 deletion bolt/test/X86/bolt-address-translation-yaml.test
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,14 @@ YAML-BAT-CHECK-NEXT: - bid: 0
YAML-BAT-CHECK-NEXT: insns: 26
YAML-BAT-CHECK-NEXT: hash: 0xA900AE79CFD40000
YAML-BAT-CHECK-NEXT: succ: [ { bid: 3, cnt: 0 }, { bid: 1, cnt: 0 } ]
# Calls from no-BAT to BAT function
YAML-BAT-CHECK: - bid: 28
YAML-BAT-CHECK-NEXT: insns: 13
YAML-BAT-CHECK-NEXT: hash: 0xB2F04C1F25F00400
YAML-BAT-CHECK-NEXT: calls: [ { off: 0x21, fid: [[#SOLVECUBIC:]], cnt: 25 }, { off: 0x2D, fid: [[#]], cnt: 9 } ]
# Function covered by BAT with calls
YAML-BAT-CHECK: - name: SolveCubic
YAML-BAT-CHECK-NEXT: fid: [[#]]
YAML-BAT-CHECK-NEXT: fid: [[#SOLVECUBIC]]
YAML-BAT-CHECK-NEXT: hash: 0x6AF7E61EA3966722
YAML-BAT-CHECK-NEXT: exec: 25
YAML-BAT-CHECK-NEXT: nblocks: 15
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/bolt-address-translation.test
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes
# CHECK: BOLT-INFO: BAT section size (bytes): 924
# CHECK: BOLT-INFO: BAT section size (bytes): 928
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions
Expand Down
23 changes: 22 additions & 1 deletion bolt/test/X86/patch-entries.test
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,25 @@ REQUIRES: system-linux

RUN: %clang %cflags -no-pie -g %p/Inputs/patch-entries.c -fuse-ld=lld -o %t.exe \
RUN: -Wl,-q -I%p/../Inputs
RUN: llvm-bolt -relocs %t.exe -o %t.out --update-debug-sections --force-patch
RUN: llvm-bolt -relocs %t.exe -o %t.out --update-debug-sections --force-patch \
RUN: --enable-bat

# Check that patched functions can be disassembled (override FDE from the
# original function)
# PREAGG: B X:0 #foo.org.0# 1 0
RUN: link_fdata %s %t.out %t.preagg PREAGG
RUN: perf2bolt %t.out -p %t.preagg --pa -o %t.yaml --profile-format=yaml \
RUN: -print-disasm -print-only=foo.org.0/1 2>&1 | FileCheck %s
CHECK-NOT: BOLT-WARNING: sizes differ for function foo.org.0/1
CHECK: Binary Function "foo.org.0/1(*2)" after disassembly {

# Check the expected eh_frame contents
RUN: llvm-nm --print-size %t.out > %t.foo
RUN: llvm-objdump %t.out --dwarf=frames >> %t.foo
RUN: FileCheck %s --input-file %t.foo --check-prefix=CHECK-FOO
CHECK-FOO: 0000000000[[#%x,FOO:]] [[#%x,OPTSIZE:]] t foo
CHECK-FOO: 0000000000[[#%x,ORG:]] [[#%x,ORGSIZE:]] t foo.org.0
# patched FDE comes first
CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+ORGSIZE]]
# original FDE comes second
CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+OPTSIZE]]
85 changes: 78 additions & 7 deletions bolt/test/X86/yaml-secondary-entry-discriminator.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This reproduces a bug with BOLT setting incorrect discriminator for
# secondary entry points in YAML profile.
## This reproduces a bug with BOLT setting incorrect discriminator for
## secondary entry points in YAML profile.

# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
Expand All @@ -11,20 +11,20 @@
# RUN: FileCheck %s -input-file %t.yaml
# CHECK: - name: main
# CHECK-NEXT: fid: 2
# CHECK-NEXT: hash: 0xADF270D550151185
# CHECK-NEXT: hash: {{.*}}
# CHECK-NEXT: exec: 0
# CHECK-NEXT: nblocks: 4
# CHECK-NEXT: blocks:
# CHECK: - bid: 1
# CHECK-NEXT: insns: 1
# CHECK-NEXT: hash: 0x36A303CBA4360014
# CHECK-NEXT: hash: {{.*}}
# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ]
# CHECK: - bid: 2
# CHECK-NEXT: insns: 5
# CHECK-NEXT: hash: 0x8B2F5747CD0019
# CHECK-NEXT: hash: {{.*}}
# CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ]

# Make sure that the profile is attached correctly
## Make sure that the profile is attached correctly
# RUN: llvm-bolt %t.exe -o %t.out --data %t.yaml --print-profile \
# RUN: --print-only=main | FileCheck %s --check-prefix=CHECK-CFG

Expand All @@ -33,15 +33,80 @@
# CHECK-CFG: callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) :
# CHECK-CFG-NEXT: { secondary_entry: 1 (1 misses) }

## YAML BAT test of calling BAT secondary entry from non-BAT function
## Now force-split func and skip main (making it call secondary entries)
# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

## Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat %t.preagg PREAGG
## Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat

## Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml

## Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null

## Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML

## Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML

## YAML BAT test of calling BAT secondary entry from BAT function
# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

## Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat2 %t.preagg2 PREAGG2

## Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat2

## Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat2 -p %t.preagg2 --pa -o %t.bat2.fdata -w %t.bat2.yaml

## Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat2.fdata -w %t.bat2.fdata-yaml -o /dev/null

## Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.fdata-yaml -check-prefix CHECK-BAT-YAML

## Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.yaml --check-prefix CHECK-BAT-YAML

# CHECK-BAT-YAML: - name: main
# CHECK-BAT-YAML-NEXT: fid: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0xADF270D550151185
# CHECK-BAT-YAML-NEXT: exec: 0
# CHECK-BAT-YAML-NEXT: nblocks: 4
# CHECK-BAT-YAML-NEXT: blocks:
# CHECK-BAT-YAML: - bid: 1
# CHECK-BAT-YAML-NEXT: insns: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0x36A303CBA4360018
# CHECK-BAT-YAML-NEXT: calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1

.globl func
.type func, @function
func:
# FDATA: 0 [unknown] 0 1 func 0 1 0
# PREAGG: B X:0 #func# 1 1
# PREAGG2: B X:0 #func# 1 1
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
## Placeholder code to make splitting profitable
.rept 5
testq %rax, %rax
.endr
.globl secondary_entry
secondary_entry:
## Placeholder code to make splitting profitable
.rept 5
testq %rax, %rax
.endr
popq %rbp
retq
nopl (%rax)
Expand All @@ -58,17 +123,23 @@ main:
movl $0, -4(%rbp)
testq %rax, %rax
jne Lindcall
.globl Lcall
Lcall:
call secondary_entry
# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.0# #func.cold.0# 1 1
.globl Lindcall
Lindcall:
callq *%rax
# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lindcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.1# #func.cold.0# 1 1
xorl %eax, %eax
addq $16, %rsp
popq %rbp
retq
# For relocations against .text
## For relocations against .text
call exit
.cfi_endproc
.size main, .-main
164 changes: 164 additions & 0 deletions bolt/test/runtime/X86/jt-confusion.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags -no-pie -nostartfiles -nostdlib -lc %t.o -o %t.exe -Wl,-q

# RUN: llvm-bolt %t.exe -o %t.exe.bolt --relocs=1 --lite=0

# RUN: %t.exe.bolt

## Check that BOLT's jump table detection diffrentiates between
## __builtin_unreachable() targets and function pointers.

## The test case was built from the following two source files and
## modiffied for standalone build. main became _start, etc.
## $ $(CC) a.c -O1 -S -o a.s
## $ $(CC) b.c -O0 -S -o b.s

## a.c:

## typedef int (*fptr)(int);
## void check_fptr(fptr, int);
##
## int foo(int a) {
## check_fptr(foo, 0);
## switch (a) {
## default:
## __builtin_unreachable();
## case 0:
## return 3;
## case 1:
## return 5;
## case 2:
## return 7;
## case 3:
## return 11;
## case 4:
## return 13;
## case 5:
## return 17;
## }
## return 0;
## }
##
## int main(int argc) {
## check_fptr(main, 1);
## return foo(argc);
## }
##
## const fptr funcs[2] = {foo, main};

## b.c.:

## typedef int (*fptr)(int);
## extern const fptr funcs[2];
##
## #define assert(C) { if (!(C)) (*(unsigned long long *)0) = 0; }
## void check_fptr(fptr f, int i) {
## assert(f == funcs[i]);
## }


.text
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl %edi, %ebx
movl $0, %esi
movl $foo, %edi
call check_fptr
movl %ebx, %ebx
jmp *.L4(,%rbx,8)
.L8:
movl $5, %eax
jmp .L1
.L7:
movl $7, %eax
jmp .L1
.L6:
movl $11, %eax
jmp .L1
.L5:
movl $13, %eax
jmp .L1
.L3:
movl $17, %eax
jmp .L1
.L10:
movl $3, %eax
.L1:
popq %rbx
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.globl _start
.type _start, @function
_start:
.LFB1:
.cfi_startproc
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset 3, -16
movl %edi, %ebx
movl $1, %esi
movl $_start, %edi
call check_fptr
movl $1, %edi
call foo
popq %rbx
.cfi_def_cfa_offset 8
callq exit@PLT
.cfi_endproc
.LFE1:
.size _start, .-_start
.globl check_fptr
.type check_fptr, @function
check_fptr:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl -12(%rbp), %eax
cltq
movq funcs(,%rax,8), %rax
cmpq %rax, -8(%rbp)
je .L33
movl $0, %eax
movq $0, (%rax)
.L33:
nop
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc

.section .rodata
.align 8
.align 4
.L4:
.quad .L10
.quad .L8
.quad .L7
.quad .L6
.quad .L5
.quad .L3

.globl funcs
.type funcs, @object
.size funcs, 16
funcs:
.quad foo
.quad _start
12 changes: 6 additions & 6 deletions clang-tools-extra/clang-tidy/add_new_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def adapt_module(module_path, module, check_name, check_name_camel):
f.write(check_decl)
else:
match = re.search(
'registerCheck<(.*)> *\( *(?:"([^"]*)")?', line
r'registerCheck<(.*)> *\( *(?:"([^"]*)")?', line
)
prev_line = None
if match:
Expand Down Expand Up @@ -383,7 +383,7 @@ def filename_from_module(module_name, check_name):
if stmt_start_pos == -1:
return ""
stmt = code[stmt_start_pos + 1 : stmt_end_pos]
matches = re.search('registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt)
matches = re.search(r'registerCheck<([^>:]*)>\(\s*"([^"]*)"\s*\)', stmt)
if matches and matches[2] == full_check_name:
class_name = matches[1]
if "::" in class_name:
Expand All @@ -401,8 +401,8 @@ def filename_from_module(module_name, check_name):
# Examine code looking for a c'tor definition to get the base class name.
def get_base_class(code, check_file):
check_class_name = os.path.splitext(os.path.basename(check_file))[0]
ctor_pattern = check_class_name + "\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
matches = re.search("\s+" + check_class_name + "::" + ctor_pattern, code)
ctor_pattern = check_class_name + r"\([^:]*\)\s*:\s*([A-Z][A-Za-z0-9]*Check)\("
matches = re.search(r"\s+" + check_class_name + "::" + ctor_pattern, code)

# The constructor might be inline in the header.
if not matches:
Expand Down Expand Up @@ -476,7 +476,7 @@ def process_doc(doc_file):
# Orphan page, don't list it.
return "", ""

match = re.search(".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content)
match = re.search(r".*:http-equiv=refresh: \d+;URL=(.*).html(.*)", content)
# Is it a redirect?
return check_name, match

Expand Down Expand Up @@ -505,7 +505,7 @@ def format_link_alias(doc_file):
ref_begin = ""
ref_end = "_"
else:
redirect_parts = re.search("^\.\./([^/]*)/([^/]*)$", match.group(1))
redirect_parts = re.search(r"^\.\./([^/]*)/([^/]*)$", match.group(1))
title = redirect_parts[1] + "-" + redirect_parts[2]
target = redirect_parts[1] + "/" + redirect_parts[2]
autofix = has_auto_fix(title)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void ForwardingReferenceOverloadCheck::registerMatchers(MatchFinder *Finder) {

DeclarationMatcher FindOverload =
cxxConstructorDecl(
hasParameter(0, ForwardingRefParm),
hasParameter(0, ForwardingRefParm), unless(isDeleted()),
unless(hasAnyParameter(
// No warning: enable_if as constructor parameter.
parmVarDecl(hasType(isEnableIf())))),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#include "MissingStdForwardCheck.h"
#include "../utils/Matchers.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ExprConcepts.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/IdentifierTable.h"

using namespace clang::ast_matchers;

Expand Down Expand Up @@ -79,6 +79,11 @@ AST_MATCHER_P(LambdaExpr, hasCaptureDefaultKind, LambdaCaptureDefault, Kind) {
return Node.getCaptureDefault() == Kind;
}

AST_MATCHER(VarDecl, hasIdentifier) {
const IdentifierInfo *ID = Node.getIdentifier();
return ID != NULL && !ID->isPlaceholder();
}

} // namespace

void MissingStdForwardCheck::registerMatchers(MatchFinder *Finder) {
Expand Down Expand Up @@ -125,12 +130,14 @@ void MissingStdForwardCheck::registerMatchers(MatchFinder *Finder) {
hasAncestor(expr(hasUnevaluatedContext())))));

Finder->addMatcher(
parmVarDecl(parmVarDecl().bind("param"), isTemplateTypeParameter(),
hasAncestor(functionDecl().bind("func")),
hasAncestor(functionDecl(
isDefinition(), equalsBoundNode("func"), ToParam,
unless(anyOf(isDeleted(), hasDescendant(std::move(
ForwardCallMatcher))))))),
parmVarDecl(
parmVarDecl().bind("param"), hasIdentifier(),
unless(hasAttr(attr::Kind::Unused)), isTemplateTypeParameter(),
hasAncestor(functionDecl().bind("func")),
hasAncestor(functionDecl(
isDefinition(), equalsBoundNode("func"), ToParam,
unless(anyOf(isDeleted(),
hasDescendant(std::move(ForwardCallMatcher))))))),
this);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ void ProTypeMemberInitCheck::checkMissingMemberInitializer(
if (!F->hasInClassInitializer() &&
utils::type_traits::isTriviallyDefaultConstructible(F->getType(),
Context) &&
!isEmpty(Context, F->getType()) && !F->isUnnamedBitfield() &&
!isEmpty(Context, F->getType()) && !F->isUnnamedBitField() &&
!AnyMemberHasInitPerUnion)
FieldsToInit.insert(F);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ IgnoredRemoveResultCheck::IgnoredRemoveResultCheck(llvm::StringRef Name,
ClangTidyContext *Context)
: UnusedReturnValueCheck(Name, Context,
{
"::std::remove",
"::std::remove_if",
"::std::unique",
"::std::remove$",
"::std::remove_if$",
"::std::unique$",
}) {
// The constructor for ClangTidyCheck needs to have been called
// before we can access options via Options.get().
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,8 @@ namespace clang::tidy::linuxkernel {
/// linux/err.h. Also checks to see if code uses the results from functions that
/// directly return a value from one of these error functions.
///
/// This is important in the Linux kernel because ERR_PTR, PTR_ERR, IS_ERR,
/// IS_ERR_OR_NULL, ERR_CAST, and PTR_ERR_OR_ZERO return values must be checked,
/// since positive pointers and negative error codes are being used in the same
/// context. These functions are marked with
/// __attribute__((warn_unused_result)), but some kernel versions do not have
/// this warning enabled for clang.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/linuxkernel/must-use-errs.html
/// http://clang.llvm.org/extra/clang-tidy/checks/linuxkernel/must-check-errs.html
class MustCheckErrsCheck : public ClangTidyCheck {
public:
MustCheckErrsCheck(StringRef Name, ClangTidyContext *Context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ getAllNamedFields(const CXXRecordDecl *Record) {
std::set<const FieldDecl *> Result;
for (const auto *Field : Record->fields()) {
// Static data members are not in this range.
if (Field->isUnnamedBitfield())
if (Field->isUnnamedBitField())
continue;
Result.insert(Field);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include <array>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <initializer_list>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ void UnnecessaryValueParamCheck::check(const MatchFinder::MatchResult &Result) {

TraversalKindScope RAII(*Result.Context, TK_AsIs);

FunctionParmMutationAnalyzer &Analyzer =
MutationAnalyzers.try_emplace(Function, *Function, *Result.Context)
.first->second;
if (Analyzer.isMutated(Param))
FunctionParmMutationAnalyzer *Analyzer =
FunctionParmMutationAnalyzer::getFunctionParmMutationAnalyzer(
*Function, *Result.Context, MutationAnalyzerCache);
if (Analyzer->isMutated(Param))
return;

const bool IsConstQualified =
Expand Down Expand Up @@ -169,7 +169,7 @@ void UnnecessaryValueParamCheck::storeOptions(
}

void UnnecessaryValueParamCheck::onEndOfTranslationUnit() {
MutationAnalyzers.clear();
MutationAnalyzerCache.clear();
}

void UnnecessaryValueParamCheck::handleMoveFix(const ParmVarDecl &Var,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ class UnnecessaryValueParamCheck : public ClangTidyCheck {
void handleMoveFix(const ParmVarDecl &Var, const DeclRefExpr &CopyArgument,
const ASTContext &Context);

llvm::DenseMap<const FunctionDecl *, FunctionParmMutationAnalyzer>
MutationAnalyzers;
ExprMutationAnalyzer::Memoized MutationAnalyzerCache;
utils::IncludeInserter Inserter;
const std::vector<StringRef> AllowedTypes;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
//===----------------------------------------------------------------------===//

#include "AvoidReturnWithVoidValueCheck.h"
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "../utils/BracesAroundStatement.h"
#include "../utils/LexerUtils.h"

using namespace clang::ast_matchers;

namespace clang::tidy::readability {

static constexpr auto IgnoreMacrosName = "IgnoreMacros";
static constexpr auto IgnoreMacrosDefault = true;
static constexpr char IgnoreMacrosName[] = "IgnoreMacros";
static const bool IgnoreMacrosDefault = true;

static constexpr auto StrictModeName = "StrictMode";
static constexpr auto StrictModeDefault = true;
static constexpr char StrictModeName[] = "StrictMode";
static const bool StrictModeDefault = true;

AvoidReturnWithVoidValueCheck::AvoidReturnWithVoidValueCheck(
StringRef Name, ClangTidyContext *Context)
Expand All @@ -32,7 +31,10 @@ void AvoidReturnWithVoidValueCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(
returnStmt(
hasReturnValue(allOf(hasType(voidType()), unless(initListExpr()))),
optionally(hasParent(compoundStmt().bind("compound_parent"))))
optionally(hasParent(
compoundStmt(
optionally(hasParent(functionDecl().bind("function_parent"))))
.bind("compound_parent"))))
.bind("void_return"),
this);
}
Expand All @@ -42,10 +44,30 @@ void AvoidReturnWithVoidValueCheck::check(
const auto *VoidReturn = Result.Nodes.getNodeAs<ReturnStmt>("void_return");
if (IgnoreMacros && VoidReturn->getBeginLoc().isMacroID())
return;
if (!StrictMode && !Result.Nodes.getNodeAs<CompoundStmt>("compound_parent"))
const auto *SurroundingBlock =
Result.Nodes.getNodeAs<CompoundStmt>("compound_parent");
if (!StrictMode && !SurroundingBlock)
return;
diag(VoidReturn->getBeginLoc(), "return statement within a void function "
"should not have a specified return value");
DiagnosticBuilder Diag = diag(VoidReturn->getBeginLoc(),
"return statement within a void function "
"should not have a specified return value");
const SourceLocation SemicolonPos = utils::lexer::findNextTerminator(
VoidReturn->getEndLoc(), *Result.SourceManager, getLangOpts());
if (SemicolonPos.isInvalid())
return;
if (!SurroundingBlock) {
const auto BraceInsertionHints = utils::getBraceInsertionsHints(
VoidReturn, getLangOpts(), *Result.SourceManager,
VoidReturn->getBeginLoc());
if (BraceInsertionHints)
Diag << BraceInsertionHints.openingBraceFixIt()
<< BraceInsertionHints.closingBraceFixIt();
}
Diag << FixItHint::CreateRemoval(VoidReturn->getReturnLoc());
if (!Result.Nodes.getNodeAs<FunctionDecl>("function_parent") ||
SurroundingBlock->body_back() != VoidReturn)
Diag << FixItHint::CreateInsertion(SemicolonPos.getLocWithOffset(1),
" return;", true);
}

void AvoidReturnWithVoidValueCheck::storeOptions(
Expand Down
157 changes: 25 additions & 132 deletions clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "BracesAroundStatementsCheck.h"
#include "../utils/BracesAroundStatement.h"
#include "../utils/LexerUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchers.h"
Expand All @@ -17,12 +18,10 @@ using namespace clang::ast_matchers;
namespace clang::tidy::readability {

static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,
const ASTContext *Context) {
const LangOptions &LangOpts) {
Token Tok;
SourceLocation Beginning =
Lexer::GetBeginningOfToken(Loc, SM, Context->getLangOpts());
const bool Invalid =
Lexer::getRawToken(Beginning, Tok, SM, Context->getLangOpts());
SourceLocation Beginning = Lexer::GetBeginningOfToken(Loc, SM, LangOpts);
const bool Invalid = Lexer::getRawToken(Beginning, Tok, SM, LangOpts);
assert(!Invalid && "Expected a valid token.");

if (Invalid)
Expand All @@ -33,64 +32,21 @@ static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,

static SourceLocation
forwardSkipWhitespaceAndComments(SourceLocation Loc, const SourceManager &SM,
const ASTContext *Context) {
const LangOptions &LangOpts) {
assert(Loc.isValid());
for (;;) {
while (isWhitespace(*SM.getCharacterData(Loc)))
Loc = Loc.getLocWithOffset(1);

tok::TokenKind TokKind = getTokenKind(Loc, SM, Context);
tok::TokenKind TokKind = getTokenKind(Loc, SM, LangOpts);
if (TokKind != tok::comment)
return Loc;

// Fast-forward current token.
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts());
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
}
}

static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM,
const ASTContext *Context) {
SourceLocation Loc =
utils::lexer::getUnifiedEndLoc(S, SM, Context->getLangOpts());
if (!Loc.isValid())
return Loc;

// Start searching right after S.
Loc = Loc.getLocWithOffset(1);

for (;;) {
assert(Loc.isValid());
while (isHorizontalWhitespace(*SM.getCharacterData(Loc))) {
Loc = Loc.getLocWithOffset(1);
}

if (isVerticalWhitespace(*SM.getCharacterData(Loc))) {
// EOL, insert brace before.
break;
}
tok::TokenKind TokKind = getTokenKind(Loc, SM, Context);
if (TokKind != tok::comment) {
// Non-comment token, insert brace before.
break;
}

SourceLocation TokEndLoc =
Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts());
SourceRange TokRange(Loc, TokEndLoc);
StringRef Comment = Lexer::getSourceText(
CharSourceRange::getTokenRange(TokRange), SM, Context->getLangOpts());
if (Comment.starts_with("/*") && Comment.contains('\n')) {
// Multi-line block comment, insert brace before.
break;
}
// else: Trailing comment, insert brace after the newline.

// Fast-forward current token.
Loc = TokEndLoc;
}
return Loc;
}

BracesAroundStatementsCheck::BracesAroundStatementsCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
Expand Down Expand Up @@ -124,7 +80,7 @@ void BracesAroundStatementsCheck::check(
} else if (const auto *S = Result.Nodes.getNodeAs<DoStmt>("do")) {
checkStmt(Result, S->getBody(), S->getDoLoc(), S->getWhileLoc());
} else if (const auto *S = Result.Nodes.getNodeAs<WhileStmt>("while")) {
SourceLocation StartLoc = findRParenLoc(S, SM, Context);
SourceLocation StartLoc = findRParenLoc(S, SM, Context->getLangOpts());
if (StartLoc.isInvalid())
return;
checkStmt(Result, S->getBody(), StartLoc);
Expand All @@ -133,7 +89,7 @@ void BracesAroundStatementsCheck::check(
if (S->isConsteval())
return;

SourceLocation StartLoc = findRParenLoc(S, SM, Context);
SourceLocation StartLoc = findRParenLoc(S, SM, Context->getLangOpts());
if (StartLoc.isInvalid())
return;
if (ForceBracesStmts.erase(S))
Expand All @@ -156,7 +112,7 @@ template <typename IfOrWhileStmt>
SourceLocation
BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S,
const SourceManager &SM,
const ASTContext *Context) {
const LangOptions &LangOpts) {
// Skip macros.
if (S->getBeginLoc().isMacroID())
return {};
Expand All @@ -170,14 +126,14 @@ BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S,
}

SourceLocation PastCondEndLoc =
Lexer::getLocForEndOfToken(CondEndLoc, 0, SM, Context->getLangOpts());
Lexer::getLocForEndOfToken(CondEndLoc, 0, SM, LangOpts);
if (PastCondEndLoc.isInvalid())
return {};
SourceLocation RParenLoc =
forwardSkipWhitespaceAndComments(PastCondEndLoc, SM, Context);
forwardSkipWhitespaceAndComments(PastCondEndLoc, SM, LangOpts);
if (RParenLoc.isInvalid())
return {};
tok::TokenKind TokKind = getTokenKind(RParenLoc, SM, Context);
tok::TokenKind TokKind = getTokenKind(RParenLoc, SM, LangOpts);
if (TokKind != tok::r_paren)
return {};
return RParenLoc;
Expand All @@ -188,86 +144,23 @@ BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S,
bool BracesAroundStatementsCheck::checkStmt(
const MatchFinder::MatchResult &Result, const Stmt *S,
SourceLocation StartLoc, SourceLocation EndLocHint) {

while (const auto *AS = dyn_cast<AttributedStmt>(S))
S = AS->getSubStmt();

const SourceManager &SM = *Result.SourceManager;
const ASTContext *Context = Result.Context;

// 1) If there's a corresponding "else" or "while", the check inserts "} "
// right before that token.
// 2) If there's a multi-line block comment starting on the same line after
// the location we're inserting the closing brace at, or there's a non-comment
// token, the check inserts "\n}" right before that token.
// 3) Otherwise the check finds the end of line (possibly after some block or
// line comments) and inserts "\n}" right before that EOL.
if (!S || isa<CompoundStmt>(S)) {
// Already inside braces.
return false;
}

// When TreeTransform, Stmt in constexpr IfStmt will be transform to NullStmt.
// This NullStmt can be detected according to beginning token.
const SourceLocation StmtBeginLoc = S->getBeginLoc();
if (isa<NullStmt>(S) && StmtBeginLoc.isValid() &&
getTokenKind(StmtBeginLoc, SM, Context) == tok::l_brace)
return false;

if (StartLoc.isInvalid())
return false;

// Convert StartLoc to file location, if it's on the same macro expansion
// level as the start of the statement. We also need file locations for
// Lexer::getLocForEndOfToken working properly.
StartLoc = Lexer::makeFileCharRange(
CharSourceRange::getCharRange(StartLoc, S->getBeginLoc()), SM,
Context->getLangOpts())
.getBegin();
if (StartLoc.isInvalid())
return false;
StartLoc =
Lexer::getLocForEndOfToken(StartLoc, 0, SM, Context->getLangOpts());

// StartLoc points at the location of the opening brace to be inserted.
SourceLocation EndLoc;
std::string ClosingInsertion;
if (EndLocHint.isValid()) {
EndLoc = EndLocHint;
ClosingInsertion = "} ";
} else {
EndLoc = findEndLocation(*S, SM, Context);
ClosingInsertion = "\n}";
}

assert(StartLoc.isValid());

// Don't require braces for statements spanning less than certain number of
// lines.
if (ShortStatementLines && !ForceBracesStmts.erase(S)) {
unsigned StartLine = SM.getSpellingLineNumber(StartLoc);
unsigned EndLine = SM.getSpellingLineNumber(EndLoc);
if (EndLine - StartLine < ShortStatementLines)
const auto BraceInsertionHints = utils::getBraceInsertionsHints(
S, Result.Context->getLangOpts(), *Result.SourceManager, StartLoc,
EndLocHint);
if (BraceInsertionHints) {
if (ShortStatementLines && !ForceBracesStmts.erase(S) &&
BraceInsertionHints.resultingCompoundLineExtent(*Result.SourceManager) <
ShortStatementLines)
return false;
auto Diag = diag(BraceInsertionHints.DiagnosticPos,
"statement should be inside braces");
if (BraceInsertionHints.offersFixIts())
Diag << BraceInsertionHints.openingBraceFixIt()
<< BraceInsertionHints.closingBraceFixIt();
}

auto Diag = diag(StartLoc, "statement should be inside braces");

// Change only if StartLoc and EndLoc are on the same macro expansion level.
// This will also catch invalid EndLoc.
// Example: LLVM_DEBUG( for(...) do_something() );
// In this case fix-it cannot be provided as the semicolon which is not
// visible here is part of the macro. Adding braces here would require adding
// another semicolon.
if (Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(SourceRange(
SM.getSpellingLoc(StartLoc), SM.getSpellingLoc(EndLoc))),
SM, Context->getLangOpts())
.isInvalid())
return false;

Diag << FixItHint::CreateInsertion(StartLoc, " {")
<< FixItHint::CreateInsertion(EndLoc, ClosingInsertion);
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class BracesAroundStatementsCheck : public ClangTidyCheck {
SourceLocation EndLocHint = SourceLocation());
template <typename IfOrWhileStmt>
SourceLocation findRParenLoc(const IfOrWhileStmt *S, const SourceManager &SM,
const ASTContext *Context);
const LangOptions &LangOpts);
std::optional<TraversalKind> getCheckTraversalKind() const override {
return TK_IgnoreUnlessSpelledInSource;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ void DuplicateIncludeCallbacks::InclusionDirective(
bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule,
bool ModuleImported, SrcMgr::CharacteristicKind FileType) {
// Skip includes behind macros
if (FilenameRange.getBegin().isMacroID() ||
FilenameRange.getEnd().isMacroID())
return;
if (llvm::is_contained(Files.back(), FileName)) {
// We want to delete the entire line, so make sure that [Start,End] covers
// everything.
Expand Down
168 changes: 168 additions & 0 deletions clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
//===--- BracesAroundStatement.cpp - clang-tidy -------- ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file provides utilities to put braces around a statement.
///
//===----------------------------------------------------------------------===//

#include "BracesAroundStatement.h"
#include "../utils/LexerUtils.h"
#include "LexerUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Lex/Lexer.h"

namespace clang::tidy::utils {

BraceInsertionHints::operator bool() const { return DiagnosticPos.isValid(); }

bool BraceInsertionHints::offersFixIts() const {
return OpeningBracePos.isValid() && ClosingBracePos.isValid();
}

unsigned BraceInsertionHints::resultingCompoundLineExtent(
const SourceManager &SourceMgr) const {
return SourceMgr.getSpellingLineNumber(ClosingBracePos) -
SourceMgr.getSpellingLineNumber(OpeningBracePos);
}

FixItHint BraceInsertionHints::openingBraceFixIt() const {
return OpeningBracePos.isValid()
? FixItHint::CreateInsertion(OpeningBracePos, " {")
: FixItHint();
}

FixItHint BraceInsertionHints::closingBraceFixIt() const {
return ClosingBracePos.isValid()
? FixItHint::CreateInsertion(ClosingBracePos, ClosingBrace)
: FixItHint();
}

static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM,
const LangOptions &LangOpts) {
Token Tok;
SourceLocation Beginning = Lexer::GetBeginningOfToken(Loc, SM, LangOpts);
const bool Invalid = Lexer::getRawToken(Beginning, Tok, SM, LangOpts);
assert(!Invalid && "Expected a valid token.");

if (Invalid)
return tok::NUM_TOKENS;

return Tok.getKind();
}

static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM,
const LangOptions &LangOpts) {
SourceLocation Loc = lexer::getUnifiedEndLoc(S, SM, LangOpts);
if (!Loc.isValid())
return Loc;

// Start searching right after S.
Loc = Loc.getLocWithOffset(1);

for (;;) {
assert(Loc.isValid());
while (isHorizontalWhitespace(*SM.getCharacterData(Loc))) {
Loc = Loc.getLocWithOffset(1);
}

if (isVerticalWhitespace(*SM.getCharacterData(Loc))) {
// EOL, insert brace before.
break;
}
tok::TokenKind TokKind = getTokenKind(Loc, SM, LangOpts);
if (TokKind != tok::comment) {
// Non-comment token, insert brace before.
break;
}

SourceLocation TokEndLoc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
SourceRange TokRange(Loc, TokEndLoc);
StringRef Comment = Lexer::getSourceText(
CharSourceRange::getTokenRange(TokRange), SM, LangOpts);
if (Comment.starts_with("/*") && Comment.contains('\n')) {
// Multi-line block comment, insert brace before.
break;
}
// else: Trailing comment, insert brace after the newline.

// Fast-forward current token.
Loc = TokEndLoc;
}
return Loc;
}

BraceInsertionHints getBraceInsertionsHints(const Stmt *const S,
const LangOptions &LangOpts,
const SourceManager &SM,
SourceLocation StartLoc,
SourceLocation EndLocHint) {
// 1) If there's a corresponding "else" or "while", the check inserts "} "
// right before that token.
// 2) If there's a multi-line block comment starting on the same line after
// the location we're inserting the closing brace at, or there's a non-comment
// token, the check inserts "\n}" right before that token.
// 3) Otherwise the check finds the end of line (possibly after some block or
// line comments) and inserts "\n}" right before that EOL.
if (!S || isa<CompoundStmt>(S)) {
// Already inside braces.
return {};
}

// When TreeTransform, Stmt in constexpr IfStmt will be transform to NullStmt.
// This NullStmt can be detected according to beginning token.
const SourceLocation StmtBeginLoc = S->getBeginLoc();
if (isa<NullStmt>(S) && StmtBeginLoc.isValid() &&
getTokenKind(StmtBeginLoc, SM, LangOpts) == tok::l_brace)
return {};

if (StartLoc.isInvalid())
return {};

// Convert StartLoc to file location, if it's on the same macro expansion
// level as the start of the statement. We also need file locations for
// Lexer::getLocForEndOfToken working properly.
StartLoc = Lexer::makeFileCharRange(
CharSourceRange::getCharRange(StartLoc, S->getBeginLoc()), SM,
LangOpts)
.getBegin();
if (StartLoc.isInvalid())
return {};
StartLoc = Lexer::getLocForEndOfToken(StartLoc, 0, SM, LangOpts);

// StartLoc points at the location of the opening brace to be inserted.
SourceLocation EndLoc;
std::string ClosingInsertion;
if (EndLocHint.isValid()) {
EndLoc = EndLocHint;
ClosingInsertion = "} ";
} else {
EndLoc = findEndLocation(*S, SM, LangOpts);
ClosingInsertion = "\n}";
}

assert(StartLoc.isValid());

// Change only if StartLoc and EndLoc are on the same macro expansion level.
// This will also catch invalid EndLoc.
// Example: LLVM_DEBUG( for(...) do_something() );
// In this case fix-it cannot be provided as the semicolon which is not
// visible here is part of the macro. Adding braces here would require adding
// another semicolon.
if (Lexer::makeFileCharRange(
CharSourceRange::getTokenRange(SourceRange(
SM.getSpellingLoc(StartLoc), SM.getSpellingLoc(EndLoc))),
SM, LangOpts)
.isInvalid())
return {StartLoc};
return {StartLoc, EndLoc, ClosingInsertion};
}

} // namespace clang::tidy::utils
75 changes: 75 additions & 0 deletions clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//===--- BracesAroundStatement.h - clang-tidy ------- -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file provides utilities to put braces around a statement.
///
//===----------------------------------------------------------------------===//

#include "clang/AST/Stmt.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"

namespace clang::tidy::utils {

/// A provider of fix-it hints to insert opening and closing braces. An instance
/// of this type is the result of calling `getBraceInsertionsHints` below.
struct BraceInsertionHints {
/// The position of a potential diagnostic. It coincides with the position of
/// the opening brace to insert, but can also just be the place to show a
/// diagnostic in case braces cannot be inserted automatically.
SourceLocation DiagnosticPos;

/// Constructor for a no-hint.
BraceInsertionHints() = default;

/// Constructor for a valid hint that cannot insert braces automatically.
BraceInsertionHints(SourceLocation DiagnosticPos)
: DiagnosticPos(DiagnosticPos) {}

/// Constructor for a hint offering fix-its for brace insertion. Both
/// positions must be valid.
BraceInsertionHints(SourceLocation OpeningBracePos,
SourceLocation ClosingBracePos, std::string ClosingBrace)
: DiagnosticPos(OpeningBracePos), OpeningBracePos(OpeningBracePos),
ClosingBracePos(ClosingBracePos), ClosingBrace(ClosingBrace) {
assert(offersFixIts());
}

/// Indicates whether the hint provides at least the position of a diagnostic.
operator bool() const;

/// Indicates whether the hint provides fix-its to insert braces.
bool offersFixIts() const;

/// The number of lines between the inserted opening brace and its closing
/// counterpart.
unsigned resultingCompoundLineExtent(const SourceManager &SourceMgr) const;

/// Fix-it to insert an opening brace.
FixItHint openingBraceFixIt() const;

/// Fix-it to insert a closing brace.
FixItHint closingBraceFixIt() const;

private:
SourceLocation OpeningBracePos;
SourceLocation ClosingBracePos;
std::string ClosingBrace;
};

/// Create fix-it hints for braces that wrap the given statement when applied.
/// The algorithm computing them respects comment before and after the statement
/// and adds line breaks before the braces accordingly.
BraceInsertionHints
getBraceInsertionsHints(const Stmt *const S, const LangOptions &LangOpts,
const SourceManager &SM, SourceLocation StartLoc,
SourceLocation EndLocHint = SourceLocation());

} // namespace clang::tidy::utils
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
add_clang_library(clangTidyUtils
Aliasing.cpp
ASTUtils.cpp
BracesAroundStatement.cpp
DeclRefExprUtils.cpp
DesignatedInitializers.cpp
ExceptionAnalyzer.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ExceptionSpecAnalyzer::analyzeRecord(const CXXRecordDecl *RecordDecl,
}

for (const auto *FDecl : RecordDecl->fields())
if (!FDecl->isInvalidDecl() && !FDecl->isUnnamedBitfield()) {
if (!FDecl->isInvalidDecl() && !FDecl->isUnnamedBitField()) {
State Result = analyzeFieldDecl(FDecl, Kind);
if (Result == State::Throwing || Result == State::Unknown)
return Result;
Expand Down
68 changes: 45 additions & 23 deletions clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@ struct DenseMapInfo<clang::tidy::RenamerClangTidyCheck::NamingCheckId> {
using NamingCheckId = clang::tidy::RenamerClangTidyCheck::NamingCheckId;

static inline NamingCheckId getEmptyKey() {
return {DenseMapInfo<clang::SourceLocation>::getEmptyKey(),
"EMPTY"};
return {DenseMapInfo<clang::SourceLocation>::getEmptyKey(), "EMPTY"};
}

static inline NamingCheckId getTombstoneKey() {
return {DenseMapInfo<clang::SourceLocation>::getTombstoneKey(),
"TOMBSTONE"};
"TOMBSTONE"};
}

static unsigned getHashValue(NamingCheckId Val) {
Expand Down Expand Up @@ -170,14 +169,14 @@ class RenamerClangTidyCheckPPCallbacks : public PPCallbacks {
return;
if (SM.isWrittenInCommandLineFile(MacroNameTok.getLocation()))
return;
Check->checkMacro(SM, MacroNameTok, Info);
Check->checkMacro(MacroNameTok, Info, SM);
}

/// MacroExpands calls expandMacro for macros in the main file
void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD,
SourceRange /*Range*/,
const MacroArgs * /*Args*/) override {
Check->expandMacro(MacroNameTok, MD.getMacroInfo());
Check->expandMacro(MacroNameTok, MD.getMacroInfo(), SM);
}

private:
Expand All @@ -188,7 +187,7 @@ class RenamerClangTidyCheckPPCallbacks : public PPCallbacks {
class RenamerClangTidyVisitor
: public RecursiveASTVisitor<RenamerClangTidyVisitor> {
public:
RenamerClangTidyVisitor(RenamerClangTidyCheck *Check, const SourceManager *SM,
RenamerClangTidyVisitor(RenamerClangTidyCheck *Check, const SourceManager &SM,
bool AggressiveDependentMemberLookup)
: Check(Check), SM(SM),
AggressiveDependentMemberLookup(AggressiveDependentMemberLookup) {}
Expand Down Expand Up @@ -259,7 +258,7 @@ class RenamerClangTidyVisitor
// Fix overridden methods
if (const auto *Method = dyn_cast<CXXMethodDecl>(Decl)) {
if (const CXXMethodDecl *Overridden = getOverrideMethod(Method)) {
Check->addUsage(Overridden, Method->getLocation());
Check->addUsage(Overridden, Method->getLocation(), SM);
return true; // Don't try to add the actual decl as a Failure.
}
}
Expand All @@ -269,7 +268,7 @@ class RenamerClangTidyVisitor
if (isa<ClassTemplateSpecializationDecl>(Decl))
return true;

Check->checkNamedDecl(Decl, *SM);
Check->checkNamedDecl(Decl, SM);
return true;
}

Expand Down Expand Up @@ -367,9 +366,26 @@ class RenamerClangTidyVisitor
return true;
}

bool VisitDesignatedInitExpr(DesignatedInitExpr *Expr) {
for (const DesignatedInitExpr::Designator &D : Expr->designators()) {
if (!D.isFieldDesignator())
continue;
const FieldDecl *FD = D.getFieldDecl();
if (!FD)
continue;
const IdentifierInfo *II = FD->getIdentifier();
if (!II)
continue;
SourceRange FixLocation{D.getFieldLoc(), D.getFieldLoc()};
Check->addUsage(FD, FixLocation, SM);
}

return true;
}

private:
RenamerClangTidyCheck *Check;
const SourceManager *SM;
const SourceManager &SM;
const bool AggressiveDependentMemberLookup;
};

Expand Down Expand Up @@ -399,7 +415,7 @@ void RenamerClangTidyCheck::registerPPCallbacks(

void RenamerClangTidyCheck::addUsage(
const RenamerClangTidyCheck::NamingCheckId &Decl, SourceRange Range,
const SourceManager *SourceMgr) {
const SourceManager &SourceMgr) {
// Do nothing if the provided range is invalid.
if (Range.isInvalid())
return;
Expand All @@ -409,8 +425,7 @@ void RenamerClangTidyCheck::addUsage(
// spelling location to different source locations, and we only want to fix
// the token once, before it is expanded by the macro.
SourceLocation FixLocation = Range.getBegin();
if (SourceMgr)
FixLocation = SourceMgr->getSpellingLoc(FixLocation);
FixLocation = SourceMgr.getSpellingLoc(FixLocation);
if (FixLocation.isInvalid())
return;

Expand All @@ -424,15 +439,15 @@ void RenamerClangTidyCheck::addUsage(
if (!Failure.shouldFix())
return;

if (SourceMgr && SourceMgr->isWrittenInScratchSpace(FixLocation))
if (SourceMgr.isWrittenInScratchSpace(FixLocation))
Failure.FixStatus = RenamerClangTidyCheck::ShouldFixStatus::InsideMacro;

if (!utils::rangeCanBeFixed(Range, SourceMgr))
if (!utils::rangeCanBeFixed(Range, &SourceMgr))
Failure.FixStatus = RenamerClangTidyCheck::ShouldFixStatus::InsideMacro;
}

void RenamerClangTidyCheck::addUsage(const NamedDecl *Decl, SourceRange Range,
const SourceManager *SourceMgr) {
const SourceManager &SourceMgr) {
// Don't keep track for non-identifier names.
auto *II = Decl->getIdentifier();
if (!II)
Expand Down Expand Up @@ -473,18 +488,24 @@ void RenamerClangTidyCheck::checkNamedDecl(const NamedDecl *Decl,
}

Failure.Info = std::move(Info);
addUsage(Decl, Range);
addUsage(Decl, Range, SourceMgr);
}

void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) {
RenamerClangTidyVisitor Visitor(this, Result.SourceManager,
if (!Result.SourceManager) {
// In principle SourceManager is not null but going only by the definition
// of MatchResult it must be handled. Cannot rename anything without a
// SourceManager.
return;
}
RenamerClangTidyVisitor Visitor(this, *Result.SourceManager,
AggressiveDependentMemberLookup);
Visitor.TraverseAST(*Result.Context);
}

void RenamerClangTidyCheck::checkMacro(const SourceManager &SourceMgr,
const Token &MacroNameTok,
const MacroInfo *MI) {
void RenamerClangTidyCheck::checkMacro(const Token &MacroNameTok,
const MacroInfo *MI,
const SourceManager &SourceMgr) {
std::optional<FailureInfo> MaybeFailure =
getMacroFailureInfo(MacroNameTok, SourceMgr);
if (!MaybeFailure)
Expand All @@ -499,11 +520,12 @@ void RenamerClangTidyCheck::checkMacro(const SourceManager &SourceMgr,
Failure.FixStatus = ShouldFixStatus::FixInvalidIdentifier;

Failure.Info = std::move(Info);
addUsage(ID, Range);
addUsage(ID, Range, SourceMgr);
}

void RenamerClangTidyCheck::expandMacro(const Token &MacroNameTok,
const MacroInfo *MI) {
const MacroInfo *MI,
const SourceManager &SourceMgr) {
StringRef Name = MacroNameTok.getIdentifierInfo()->getName();
NamingCheckId ID(MI->getDefinitionLoc(), Name);

Expand All @@ -512,7 +534,7 @@ void RenamerClangTidyCheck::expandMacro(const Token &MacroNameTok,
return;

SourceRange Range(MacroNameTok.getLocation(), MacroNameTok.getEndLoc());
addUsage(ID, Range);
addUsage(ID, Range, SourceMgr);
}

static std::string
Expand Down
11 changes: 6 additions & 5 deletions clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,19 @@ class RenamerClangTidyCheck : public ClangTidyCheck {
llvm::DenseMap<NamingCheckId, NamingCheckFailure>;

/// Check Macros for style violations.
void checkMacro(const SourceManager &SourceMgr, const Token &MacroNameTok,
const MacroInfo *MI);
void checkMacro(const Token &MacroNameTok, const MacroInfo *MI,
const SourceManager &SourceMgr);

/// Add a usage of a macro if it already has a violation.
void expandMacro(const Token &MacroNameTok, const MacroInfo *MI);
void expandMacro(const Token &MacroNameTok, const MacroInfo *MI,
const SourceManager &SourceMgr);

void addUsage(const RenamerClangTidyCheck::NamingCheckId &Decl,
SourceRange Range, const SourceManager *SourceMgr = nullptr);
SourceRange Range, const SourceManager &SourceMgr);

/// Convenience method when the usage to be added is a NamedDecl.
void addUsage(const NamedDecl *Decl, SourceRange Range,
const SourceManager *SourceMgr = nullptr);
const SourceManager &SourceMgr);

void checkNamedDecl(const NamedDecl *Decl, const SourceManager &SourceMgr);

Expand Down
8 changes: 7 additions & 1 deletion clang-tools-extra/clangd/ClangdServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "refactor/Rename.h"
#include "refactor/Tweak.h"
#include "support/Cancellation.h"
#include "support/Context.h"
#include "support/Logger.h"
#include "support/MemoryTree.h"
#include "support/ThreadsafeFS.h"
Expand Down Expand Up @@ -112,7 +113,12 @@ struct UpdateIndexCallbacks : public ParsingCallbacks {
// Index outlives TUScheduler (declared first)
FIndex(FIndex),
// shared_ptr extends lifetime
Stdlib(Stdlib)]() mutable {
Stdlib(Stdlib),
// We have some FS implementations that rely on information in
// the context.
Ctx(Context::current().clone())]() mutable {
// Make sure we install the context into current thread.
WithContext C(std::move(Ctx));
clang::noteBottomOfStack();
IndexFileIn IF;
IF.Symbols = indexStandardLibrary(std::move(CI), Loc, *TFS);
Expand Down
16 changes: 13 additions & 3 deletions clang-tools-extra/clangd/CodeComplete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ const CodeCompleteOptions::CodeCompletionRankingModel

namespace {

CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
// Note: changes to this function should also be reflected in the
// CodeCompletionResult overload where appropriate.
CompletionItemKind
toCompletionItemKind(index::SymbolKind Kind,
const llvm::StringRef *Signature = nullptr) {
using SK = index::SymbolKind;
switch (Kind) {
case SK::Unknown:
Expand All @@ -99,7 +103,10 @@ CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
case SK::NamespaceAlias:
return CompletionItemKind::Module;
case SK::Macro:
return CompletionItemKind::Text;
// Use macro signature (if provided) to tell apart function-like and
// object-like macros.
return Signature && Signature->contains('(') ? CompletionItemKind::Function
: CompletionItemKind::Constant;
case SK::Enum:
return CompletionItemKind::Enum;
case SK::Struct:
Expand Down Expand Up @@ -150,6 +157,8 @@ CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
llvm_unreachable("Unhandled clang::index::SymbolKind.");
}

// Note: changes to this function should also be reflected in the
// index::SymbolKind overload where appropriate.
CompletionItemKind toCompletionItemKind(const CodeCompletionResult &Res,
CodeCompletionContext::Kind CtxKind) {
if (Res.Declaration)
Expand Down Expand Up @@ -379,7 +388,8 @@ struct CodeCompletionBuilder {
if (Completion.Scope.empty())
Completion.Scope = std::string(C.IndexResult->Scope);
if (Completion.Kind == CompletionItemKind::Missing)
Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind);
Completion.Kind = toCompletionItemKind(C.IndexResult->SymInfo.Kind,
&C.IndexResult->Signature);
if (Completion.Name.empty())
Completion.Name = std::string(C.IndexResult->Name);
if (Completion.FilterText.empty())
Expand Down
6 changes: 4 additions & 2 deletions clang-tools-extra/clangd/CompileCommands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,8 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
static constexpr llvm::ArrayRef<llvm::StringLiteral> NAME( \
NAME##_init, std::size(NAME##_init) - 1);
#define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \
FLAGS, VISIBILITY, PARAM, HELP, METAVAR, VALUES) \
FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \
METAVAR, VALUES) \
Prefixes[DriverID::OPT_##ID] = PREFIX;
#include "clang/Driver/Options.inc"
#undef OPTION
Expand All @@ -478,7 +479,8 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
const void *AliasArgs;
} AliasTable[] = {
#define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \
FLAGS, VISIBILITY, PARAM, HELP, METAVAR, VALUES) \
FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \
METAVAR, VALUES) \
{DriverID::OPT_##ID, DriverID::OPT_##ALIAS, ALIASARGS},
#include "clang/Driver/Options.inc"
#undef OPTION
Expand Down
9 changes: 0 additions & 9 deletions clang-tools-extra/clangd/IncludeCleaner.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,6 @@ issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
const ThreadsafeFS &TFS,
HeaderFilter IgnoreHeader = {});

/// Affects whether standard library includes should be considered for
/// removal. This is off by default for now due to implementation limitations:
/// - macros are not tracked
/// - symbol names without a unique associated header are not tracked
/// - references to std-namespaced C types are not properly tracked:
/// instead of std::size_t -> <cstddef> we see ::size_t -> <stddef.h>
/// FIXME: remove this hack once the implementation is good enough.
void setIncludeCleanerAnalyzesStdlib(bool B);

/// Converts the clangd include representation to include-cleaner
/// include representation.
include_cleaner::Includes convertIncludes(const ParsedAST &);
Expand Down
Loading