diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md index 186b0e5ea89d3..436593478a398 100644 --- a/bolt/docs/BAT.md +++ b/bolt/docs/BAT.md @@ -90,11 +90,12 @@ current function. ### Address translation table Delta encoding means that only the difference with the previous corresponding entry is encoded. Input offsets implicitly start at zero. -| Entry | Encoding | Description | -| ------ | ------| ----------- | -| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | -| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | -| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary | +| Entry | Encoding | Description | Branch/BB | +| ------ | ------| ----------- | ------ | +| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both | +| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both | +| `BBHash` | Optional, 8b | Basic block hash in input binary | BB | +| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB | `BRANCHENTRY` bit denotes whether a given offset pair is a control flow source (branch or call instruction). If not set, it signifies a control flow target diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index 1f53f6d344ad7..eda2b318f0d0a 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -122,6 +122,10 @@ class BoltAddressTranslation { /// Returns BF hash by function output address (after BOLT). size_t getBFHash(uint64_t OutputAddress) const; + /// Returns BB index by function output address (after BOLT) and basic block + /// input offset. + unsigned getBBIndex(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const; + /// True if a given \p Address is a function with translation table entry. bool isBATFunction(uint64_t Address) const { return Maps.count(Address); } @@ -154,7 +158,8 @@ class BoltAddressTranslation { std::map Maps; - using BBHashMap = std::unordered_map; + /// Map basic block input offset to a basic block index and hash pair. + using BBHashMap = std::unordered_map>; std::unordered_map> FuncHashes; /// Links outlined cold bocks to their original function diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index 1d61a1b735b40..8fe976cc00e53 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -45,6 +45,8 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map, LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBBHash(HotFuncAddress, BBInputOffset))); (void)HotFuncAddress; + LLVM_DEBUG(dbgs() << formatv(" Index: {0}\n", + getBBIndex(HotFuncAddress, BBInputOffset))); // In case of conflicts (same Key mapping to different Vals), the last // update takes precedence. Of course it is not ideal to have conflicts and // those happen when we have an empty BB that either contained only @@ -217,6 +219,7 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, } size_t Index = 0; uint64_t InOffset = 0; + size_t PrevBBIndex = 0; // Output and Input addresses and delta-encoded for (std::pair &KeyVal : Map) { const uint64_t OutputAddress = KeyVal.first + Address; @@ -226,11 +229,15 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, encodeSLEB128(KeyVal.second - InOffset, OS); InOffset = KeyVal.second; // Keeping InOffset as if BRANCHENTRY is encoded if ((InOffset & BRANCHENTRY) == 0) { - // Basic block hash - size_t BBHash = FuncHashPair.second[InOffset >> 1]; + unsigned BBIndex; + size_t BBHash; + std::tie(BBIndex, BBHash) = FuncHashPair.second[InOffset >> 1]; OS.write(reinterpret_cast(&BBHash), 8); - LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x}\n", KeyVal.first, - InOffset >> 1, BBHash)); + // Basic block index in the input binary + encodeULEB128(BBIndex - PrevBBIndex, OS); + PrevBBIndex = BBIndex; + LLVM_DEBUG(dbgs() << formatv("{0:x} -> {1:x} {2:x} {3}\n", KeyVal.first, + InOffset >> 1, BBHash, BBIndex)); } } } @@ -316,6 +323,7 @@ void BoltAddressTranslation::parseMaps(std::vector &HotFuncs, LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x" << Twine::utohexstr(Address) << "\n"); uint64_t InputOffset = 0; + size_t BBIndex = 0; for (uint32_t J = 0; J < NumEntries; ++J) { const uint64_t OutputDelta = DE.getULEB128(&Offset, &Err); const uint64_t OutputAddress = PrevAddress + OutputDelta; @@ -330,19 +338,25 @@ void BoltAddressTranslation::parseMaps(std::vector &HotFuncs, } Map.insert(std::pair(OutputOffset, InputOffset)); size_t BBHash = 0; + size_t BBIndexDelta = 0; const bool IsBranchEntry = InputOffset & BRANCHENTRY; if (!IsBranchEntry) { BBHash = DE.getU64(&Offset, &Err); + BBIndexDelta = DE.getULEB128(&Offset, &Err); + BBIndex += BBIndexDelta; // Map basic block hash to hot fragment by input offset - FuncHashes[HotAddress].second.emplace(InputOffset >> 1, BBHash); + FuncHashes[HotAddress].second.emplace(InputOffset >> 1, + std::pair(BBIndex, BBHash)); } LLVM_DEBUG({ dbgs() << formatv( "{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}", OutputOffset, InputOffset, OutputDelta, getULEB128Size(OutputDelta), InputDelta, (J < EqualElems) ? 0 : getSLEB128Size(InputDelta), OutputAddress); - if (BBHash) - dbgs() << formatv(" {0:x}", BBHash); + if (!IsBranchEntry) { + dbgs() << formatv(" {0:x} {1}/{2}b", BBHash, BBIndex, + getULEB128Size(BBIndexDelta)); + } dbgs() << '\n'; }); } @@ -494,14 +508,19 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) { FuncHashes[BF.getAddress()].first = BF.computeHash(); BF.computeBlockHashes(); for (const BinaryBasicBlock &BB : BF) - FuncHashes[BF.getAddress()].second.emplace(BB.getInputOffset(), - BB.getHash()); + FuncHashes[BF.getAddress()].second.emplace( + BB.getInputOffset(), std::pair(BB.getIndex(), BB.getHash())); } } +unsigned BoltAddressTranslation::getBBIndex(uint64_t FuncOutputAddress, + uint32_t BBInputOffset) const { + return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).first; +} + size_t BoltAddressTranslation::getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const { - return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset); + return FuncHashes.at(FuncOutputAddress).second.at(BBInputOffset).second; } size_t BoltAddressTranslation::getBFHash(uint64_t OutputAddress) const { diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test index 25ff4e7fbfcc5..4516a662697ac 100644 --- a/bolt/test/X86/bolt-address-translation-yaml.test +++ b/bolt/test/X86/bolt-address-translation-yaml.test @@ -18,7 +18,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes -WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 344 +WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 376 READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test index 4277b4e0d0fef..5c1db89e3c6b2 100644 --- a/bolt/test/X86/bolt-address-translation.test +++ b/bolt/test/X86/bolt-address-translation.test @@ -37,7 +37,7 @@ # CHECK: BOLT: 3 out of 7 functions were overwritten. # CHECK: BOLT-INFO: Wrote 6 BAT maps # CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes -# CHECK: BOLT-INFO: BAT section size (bytes): 816 +# CHECK: BOLT-INFO: BAT section size (bytes): 920 # # usqrt mappings (hot part). We match against any key (left side containing # the bolted binary offsets) because BOLT may change where it puts instructions diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 86a287db72a4e..bc9cc8ce6cf5a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5863,8 +5863,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } else if (Triple.getArch() == llvm::Triple::x86_64) { Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"}, CM); - } else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) { - // NVPTX/AMDGPU/SPIRV does not care about the code model and will accept + } else if (Triple.isNVPTX() || Triple.isAMDGPU()) { + // NVPTX/AMDGPU does not care about the code model and will accept // whatever works for the host. Ok = true; } else if (Triple.isSPARC64()) { diff --git a/clang/test/Driver/unsupported-option-gpu.c b/clang/test/Driver/unsupported-option-gpu.c index 5618b2cba72e1..f23cb71ebfb08 100644 --- a/clang/test/Driver/unsupported-option-gpu.c +++ b/clang/test/Driver/unsupported-option-gpu.c @@ -2,5 +2,4 @@ // DEFINE: %{check} = %clang -### --target=x86_64-linux-gnu -c -mcmodel=medium // RUN: %{check} -x cuda %s --cuda-path=%S/Inputs/CUDA/usr/local/cuda --offload-arch=sm_60 --no-cuda-version-check -fbasic-block-sections=all -// RUN: %{check} -x hip %s --offload=spirv64 -nogpulib -nogpuinc // RUN: %{check} -x hip %s --rocm-path=%S/Inputs/rocm -nogpulib -nogpuinc diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 919a14b8bcf08..36248925d65ad 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -612,7 +612,7 @@ static void replaceCommonSymbols() { if (!osec) osec = ConcatOutputSection::getOrCreateForInput(isec); isec->parent = osec; - addInputSection(isec); + inputSections.push_back(isec); // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip // and pass them on here. @@ -1220,18 +1220,53 @@ static void createFiles(const InputArgList &args) { static void gatherInputSections() { TimeTraceScope timeScope("Gathering input sections"); + int inputOrder = 0; for (const InputFile *file : inputFiles) { for (const Section *section : file->sections) { // Compact unwind entries require special handling elsewhere. (In // contrast, EH frames are handled like regular ConcatInputSections.) if (section->name == section_names::compactUnwind) continue; - for (const Subsection &subsection : section->subsections) - addInputSection(subsection.isec); + ConcatOutputSection *osec = nullptr; + for (const Subsection &subsection : section->subsections) { + if (auto *isec = dyn_cast(subsection.isec)) { + if (isec->isCoalescedWeak()) + continue; + if (config->emitInitOffsets && + sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { + in.initOffsets->addInput(isec); + continue; + } + isec->outSecOff = inputOrder++; + if (!osec) + osec = ConcatOutputSection::getOrCreateForInput(isec); + isec->parent = osec; + inputSections.push_back(isec); + } else if (auto *isec = + dyn_cast(subsection.isec)) { + if (isec->getName() == section_names::objcMethname) { + if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) + in.objcMethnameSection->inputOrder = inputOrder++; + in.objcMethnameSection->addInput(isec); + } else { + if (in.cStringSection->inputOrder == UnspecifiedInputOrder) + in.cStringSection->inputOrder = inputOrder++; + in.cStringSection->addInput(isec); + } + } else if (auto *isec = + dyn_cast(subsection.isec)) { + if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) + in.wordLiteralSection->inputOrder = inputOrder++; + in.wordLiteralSection->addInput(isec); + } else { + llvm_unreachable("unexpected input section kind"); + } + } } if (!file->objCImageInfo.empty()) in.objCImageInfo->addFile(file); } + assert(inputOrder <= UnspecifiedInputOrder); } static void foldIdenticalLiterals() { @@ -1387,7 +1422,6 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, concatOutputSections.clear(); inputFiles.clear(); inputSections.clear(); - inputSectionsOrder = 0; loadedArchives.clear(); loadedObjectFrameworks.clear(); missingAutolinkWarnings.clear(); diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 22930d52dd1db..8f5affb1dc21d 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -37,44 +37,6 @@ static_assert(sizeof(void *) != 8 || "instances of it"); std::vector macho::inputSections; -int macho::inputSectionsOrder = 0; - -// Call this function to add a new InputSection and have it routed to the -// appropriate container. Depending on its type and current config, it will -// either be added to 'inputSections' vector or to a synthetic section. -void lld::macho::addInputSection(InputSection *inputSection) { - if (auto *isec = dyn_cast(inputSection)) { - if (isec->isCoalescedWeak()) - return; - if (config->emitInitOffsets && - sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { - in.initOffsets->addInput(isec); - return; - } - isec->outSecOff = inputSectionsOrder++; - auto *osec = ConcatOutputSection::getOrCreateForInput(isec); - isec->parent = osec; - inputSections.push_back(isec); - } else if (auto *isec = dyn_cast(inputSection)) { - if (isec->getName() == section_names::objcMethname) { - if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) - in.objcMethnameSection->inputOrder = inputSectionsOrder++; - in.objcMethnameSection->addInput(isec); - } else { - if (in.cStringSection->inputOrder == UnspecifiedInputOrder) - in.cStringSection->inputOrder = inputSectionsOrder++; - in.cStringSection->addInput(isec); - } - } else if (auto *isec = dyn_cast(inputSection)) { - if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) - in.wordLiteralSection->inputOrder = inputSectionsOrder++; - in.wordLiteralSection->addInput(isec); - } else { - llvm_unreachable("unexpected input section kind"); - } - - assert(inputSectionsOrder <= UnspecifiedInputOrder); -} uint64_t InputSection::getFileSize() const { return isZeroFill(getFlags()) ? 0 : getSize(); diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 694bdf734907b..b25f0638f4c6c 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -302,8 +302,6 @@ bool isEhFrameSection(const InputSection *); bool isGccExceptTabSection(const InputSection *); extern std::vector inputSections; -// This is used as a counter for specyfing input order for input sections -extern int inputSectionsOrder; namespace section_names { @@ -371,7 +369,6 @@ constexpr const char addrSig[] = "__llvm_addrsig"; } // namespace section_names -void addInputSection(InputSection *inputSection); } // namespace macho std::string toString(const macho::InputSection *); diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 5902b82d30f55..40df2243b26f0 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -790,7 +790,7 @@ void ObjcCategoryMerger::emitAndLinkProtocolList( infoCategoryWriter.catPtrListInfo.align); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; listSec->live = true; - addInputSection(listSec); + allInputSections.push_back(listSec); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; @@ -848,7 +848,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList( infoCategoryWriter.catPtrListInfo.align); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; listSec->live = true; - addInputSection(listSec); + allInputSections.push_back(listSec); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; @@ -889,7 +889,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCateogryName, bodyData, infoCategoryWriter.catListInfo.align); newCatList->parent = infoCategoryWriter.catListInfo.outputSection; newCatList->live = true; - addInputSection(newCatList); + allInputSections.push_back(newCatList); newCatList->parent = infoCategoryWriter.catListInfo.outputSection; @@ -927,7 +927,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, bodyData, infoCategoryWriter.catBodyInfo.align); newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; newBodySec->live = true; - addInputSection(newBodySec); + allInputSections.push_back(newBodySec); std::string symName = objc::symbol_names::category + baseClassName + "_$_(" + name + ")"; @@ -1132,7 +1132,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories( infoCategoryWriter.catListInfo.align); listSec->parent = infoCategoryWriter.catListInfo.outputSection; listSec->live = true; - addInputSection(listSec); + allInputSections.push_back(listSec); std::string slotSymName = "<__objc_catlist slot for category "; slotSymName += nonErasedCatBody->getName(); @@ -1221,11 +1221,9 @@ void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } StringRef ObjcCategoryMerger::newStringData(const char *str) { uint32_t len = strlen(str); - uint32_t bufSize = len + 1; - auto &data = newSectionData(bufSize); + auto &data = newSectionData(len + 1); char *strData = reinterpret_cast(data.data()); - // Copy the string chars and null-terminator - memcpy(strData, str, bufSize); + strncpy(strData, str, len); return StringRef(strData, len); } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 1b3694528de1d..7ee3261ce3075 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -793,7 +793,7 @@ void StubHelperSection::setUp() { in.imageLoaderCache->parent = ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache); - addInputSection(in.imageLoaderCache); + inputSections.push_back(in.imageLoaderCache); // Since this isn't in the symbol table or in any input file, the noDeadStrip // argument doesn't matter. dyldPrivate = @@ -855,7 +855,7 @@ ConcatInputSection *ObjCSelRefsSection::makeSelRef(StringRef methname) { /*addend=*/static_cast(methnameOffset), /*referent=*/in.objcMethnameSection->isec}); objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref); - addInputSection(objcSelref); + inputSections.push_back(objcSelref); objcSelref->isFinal = true; methnameToSelref[CachedHashStringRef(methname)] = objcSelref; return objcSelref;