From 49c5f229c8f4829551462bc1ad335c752d0fc643 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Sat, 8 Nov 2025 19:54:21 +0000 Subject: [PATCH 01/24] feat(AsmPrinter): Add support for emitting prefetch target symbols --- .../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 50 ++++++++++++++++++- .../BasicBlockSectionsProfileReader.cpp | 44 ++++++++++++++++ llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++ 5 files changed, 174 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index ee1f28377f7e4..5b230db30aec4 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,6 +42,17 @@ struct BBClusterInfo { unsigned PositionInCluster; }; +struct BBPosition { + UniqueBBID BBID; + unsigned BBOffset; +}; + +struct PrefetchHint { + BBPosition SitePosition; + StringRef TargetFunctionName; + BBPosition TargetPosition; +}; + // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; + SmallVector PrefetchHints; + DenseSet PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; - // Edge counts for each edge, stored as a nested map. + // Edge counts for each edge. DenseMap> EdgeCounts; // Hash for each basic block. The Hashes are stored for every original block // (not cloned blocks), hence the map key being unsigned instead of @@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; +// Provides DenseMapInfo BBPosition. +template <> struct DenseMapInfo { + static inline BBPosition getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static inline BBPosition getTombstoneKey() { + return BBPosition{DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const BBPosition &Val) { + std::pair PairVal = std::make_pair( + DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) { + return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && + DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); + } +}; + class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + + DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; + SmallVector + getPrefetchHintsForFunction(StringRef FuncName) const; + + DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index fcf7bab09fcff..e6c6bc26ae9e6 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,6 +100,12 @@ template <> struct DenseMapInfo { } }; +struct PrefetchTarget { + StringRef TargetFunction; + UniqueBBID TargetBBID; + unsigned TargetBBOffset; +}; + template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -213,6 +219,8 @@ class MachineBasicBlock /// basic block sections and basic block labels. std::optional BBID; + SmallVector PrefetchTargets; + /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -229,6 +237,8 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; + mutable SmallVector CallInstSymbols; + /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -710,6 +720,14 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } + const SmallVector &getPrefetchTargets() const { + return PrefetchTargets; + } + + void setPrefetchTargets(const SmallVector &V) { + PrefetchTargets = V; + } + /// Returns the section ID of this basic block. MBBSectionID getSectionID() const { return SectionID; } @@ -1275,6 +1293,12 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; + MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; + + const SmallVector& getCallInstSymbols() const { + return CallInstSymbols; + } + /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3aa245b7f3f1e..a204bba5789a8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" +#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -178,6 +179,11 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); +static cl::opt InsertNoopsForPrefetch( + "insert-noops-for-prefetch", + cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), + cl::Hidden); + // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); for (auto &MBB : *MF) { + int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; + unsigned NumCallsInBlock = 0; for (auto &MI : MBB) { + if (NextPrefetchTargetIndex != -1 && + NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) { + + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + + Twine("_") + + utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); + if (MF->getFunction().isWeakForLinker()) { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak); + errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n"; + } else { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); + errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n"; + } + // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); + // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n"; + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++NextPrefetchTargetIndex; + if (NextPrefetchTargetIndex >= + static_cast(MBB.getPrefetchTargets().size())) + NextPrefetchTargetIndex = -1; + } // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. @@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } + while (NextPrefetchTargetIndex != -1) { + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + + Twine("_") + + utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); + if (MF->getFunction().hasWeakLinkage()) { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition); + } else { + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); + } + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++NextPrefetchTargetIndex; + if (NextPrefetchTargetIndex >= + static_cast(MBB.getPrefetchTargets().size())) + NextPrefetchTargetIndex = -1; + } + // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c234c0f1b0b34..de146e172c174 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } +SmallVector +BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; +} + +DenseSet +BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) + .PrefetchTargets; +} + // Reads the version 1 basic block sections profile. Profile for each function // is encoded as follows: // m @@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 't': { // Prefetch target specifier. + // Skip the profile when we the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + assert(Values.size() == 1); + SmallVector PrefetchTargetStr; + Values[0].split(PrefetchTargetStr, '@'); + assert(PrefetchTargetStr.size() == 2); + auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); + if (!TargetBBID) + return TargetBBID.takeError(); + unsigned long long TargetBBOffset; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset)) + return createProfileParseError(Twine("unsigned integer expected: '") + + PrefetchTargetStr[1]); + FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast(TargetBBOffset)}); + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); @@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } +SmallVector +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchHintsForFunction(FuncName); +} + +DenseSet +BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( + StringRef FuncName) const { + return BBSPR.getPrefetchTargetsForFunction(FuncName); +} + BasicBlockSectionsProfileReader & BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { return BBSPR; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index ba0b025167307..19b218a2879dd 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } +MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { + if (CallInstSymbols.size() <= CallInstNumber) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + CallInstSymbols.resize(CallInstNumber + 1); + CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + + Twine(CallInstNumber), + /*AlwaysEmit=*/true); + } + return CallInstSymbols[CallInstNumber]; +} + MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); From b25adef3703b8bb3813609f8282ebf8a53b6686d Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Tue, 11 Nov 2025 21:30:47 +0000 Subject: [PATCH 02/24] feat: Add prefetch-profile.txt for testing --- build-release/prefetch-profile.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 build-release/prefetch-profile.txt diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt new file mode 100644 index 0000000000000..294f57a46920b --- /dev/null +++ b/build-release/prefetch-profile.txt @@ -0,0 +1,3 @@ +v1 +f f +t 0@1 From bbfb7ba90a5192bb78549a5cd202368872dfd09a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 12 Nov 2025 18:58:18 +0000 Subject: [PATCH 03/24] Everything else. --- build-release/prefetch-profile.txt | 3 -- .../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++---------------- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 12 +++++++- .../BasicBlockSectionsProfileReader.cpp | 10 +++---- 4 files changed, 20 insertions(+), 34 deletions(-) delete mode 100644 build-release/prefetch-profile.txt diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt deleted file mode 100644 index 294f57a46920b..0000000000000 --- a/build-release/prefetch-profile.txt +++ /dev/null @@ -1,3 +0,0 @@ -v1 -f f -t 0@1 diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 5b230db30aec4..fbf9b89754cd7 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -44,7 +44,7 @@ struct BBClusterInfo { struct BBPosition { UniqueBBID BBID; - unsigned BBOffset; + unsigned CallsiteIndex; }; struct PrefetchHint { @@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo { // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; SmallVector PrefetchHints; - DenseSet PrefetchTargets; + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo { DenseMap BBHashes; }; -// Provides DenseMapInfo BBPosition. -template <> struct DenseMapInfo { - static inline BBPosition getEmptyKey() { - return {DenseMapInfo::getEmptyKey(), - DenseMapInfo::getEmptyKey()}; - } - static inline BBPosition getTombstoneKey() { - return BBPosition{DenseMapInfo::getTombstoneKey(), - DenseMapInfo::getTombstoneKey()}; - } - static unsigned getHashValue(const BBPosition &Val) { - std::pair PairVal = std::make_pair( - DenseMapInfo::getHashValue(Val.BBID), Val.BBOffset); - return DenseMapInfo>::getHashValue(PairVal); - } - static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) { - return DenseMapInfo::isEqual(LHS.BBID, RHS.BBID) && - DenseMapInfo::isEqual(LHS.BBOffset, RHS.BBOffset); - } -}; - class BasicBlockSectionsProfileReader { public: friend class BasicBlockSectionsProfileReaderWrapperPass; @@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseSet getPrefetchTargetsForFunction(StringRef FuncName) const; + DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a204bba5789a8..90445fedd5db3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); if (EmitBBHash) AU.addRequired(); + AU.addUsedIfAvailable(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { + DenseMap> FunctionPrefetchTargets; + if (auto *BBSPRPass = + getAnalysisIfAvailable()) { + FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName()); +} + + for (auto &MBB : *MF) { + + SmallVector BBPrefetchTargets; + = FunctionPrefetchTargets.lookup(MBB.g); int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index de146e172c174..c4784a6039c09 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; } -DenseSet +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetBBOffset; - if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset)) + unsigned long long TargetCallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); - FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast(TargetBBOffset)}); + FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: @@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( return BBSPR.getPrefetchHintsForFunction(FuncName); } -DenseSet +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); From 3e6b04f94548b6a61219eae6b32ba5a46ac1461b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 12 Nov 2025 18:58:27 +0000 Subject: [PATCH 04/24] Add test. --- llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll new file mode 100644 index 0000000000000..979db7942ff2c --- /dev/null +++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll @@ -0,0 +1,42 @@ +;; Check that specifying the function in the basic block sections profile +;; without any other directives is a noop. +;; +;; Specify the bb sections profile: +; RUN: echo 'v1' > %t +; RUN: echo 'f _Z3foob' >> %t +; RUN: echo 't 0@0' >> %t +;; +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck + +define i32 @_Z3foob(i1 zeroext %0) nounwind { + %2 = alloca i32, align 4 + %3 = alloca i8, align 1 + %4 = zext i1 %0 to i8 + store i8 %4, ptr %3, align 1 + %5 = load i8, ptr %3, align 1 + %6 = trunc i8 %5 to i1 + %7 = zext i1 %6 to i32 + %8 = icmp sgt i32 %7, 0 + br i1 %8, label %9, label %11 + +9: ; preds = %1 + %10 = call i32 @_Z3barv() + store i32 %10, ptr %2, align 4 + br label %13 + +11: ; preds = %1 + %12 = call i32 @_Z3bazv() + store i32 %12, ptr %2, align 4 + br label %13 + +13: ; preds = %11, %9 + %14 = load i32, ptr %2, align 4 + ret i32 %14 +} + +declare i32 @_Z3barv() #1 +declare i32 @_Z3bazv() #1 + + +; CHECK: _Z3foob +; CHECK: llvm_prefetch_target From 996736018c889a680e2ff2d262d4496f398c9d1a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 00:10:17 +0000 Subject: [PATCH 05/24] Fix everything --- .../CodeGen/BasicBlockSectionsProfileReader.h | 4 +- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +---- llvm/include/llvm/CodeGen/Passes.h | 2 + llvm/include/llvm/InitializePasses.h | 1 + llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 73 ++++---------- llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/InsertCodePrefetch.cpp | 96 +++++++++++++++++++ llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 --- llvm/lib/CodeGen/TargetPassConfig.cpp | 1 + 9 files changed, 127 insertions(+), 88 deletions(-) create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index fbf9b89754cd7..1fd904d64ab9d 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - DenseMap> getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index e6c6bc26ae9e6..4be008bbf4bf1 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -100,12 +100,6 @@ template <> struct DenseMapInfo { } }; -struct PrefetchTarget { - StringRef TargetFunction; - UniqueBBID TargetBBID; - unsigned TargetBBOffset; -}; - template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. @@ -219,8 +213,6 @@ class MachineBasicBlock /// basic block sections and basic block labels. std::optional BBID; - SmallVector PrefetchTargets; - /// With basic block sections, this stores the Section ID of the basic block. MBBSectionID SectionID{0}; @@ -237,7 +229,7 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - mutable SmallVector CallInstSymbols; + SmallVector PrefetchTargetIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -720,12 +712,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargets() const { - return PrefetchTargets; + const SmallVector &getPrefetchTargetIndexes() const { + return PrefetchTargetIndexes; } - void setPrefetchTargets(const SmallVector &V) { - PrefetchTargets = V; + void setPrefetchTargetIndexes(const SmallVector &V) { + PrefetchTargetIndexes = V; } /// Returns the section ID of this basic block. @@ -1293,12 +1285,6 @@ class MachineBasicBlock /// Return the MCSymbol for this basic block. LLVM_ABI MCSymbol *getSymbol() const; - MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const; - - const SmallVector& getCallInstSymbols() const { - return CallInstSymbols; - } - /// Return the Windows EH Continuation Symbol for this basic block. LLVM_ABI MCSymbol *getEHContSymbol() const; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index a8525554b142e..f148d050a5772 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass(); LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass(); +LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass(); + /// createMachineBlockHashInfoPass - This pass computes basic block hashes. LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 10a4d8525a9e8..35d5ab14dc226 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &); LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &); LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &); LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &); +LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 90445fedd5db3..933fe6f7d177f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -179,11 +179,6 @@ static cl::opt EmitJumpTableSizesSection( cl::desc("Emit a section containing jump table addresses and sizes"), cl::Hidden, cl::init(false)); -static cl::opt InsertNoopsForPrefetch( - "insert-noops-for-prefetch", - cl::desc("Whether to insert noops instead of prefetches."), cl::init(false), - cl::Hidden); - // This isn't turned on by default, since several of the scheduling models are // not completely accurate, and we don't want to be misleading. static cl::opt PrintLatency( @@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); if (EmitBBHash) AU.addRequired(); - AU.addUsedIfAvailable(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - DenseMap> FunctionPrefetchTargets; - if (auto *BBSPRPass = - getAnalysisIfAvailable()) { - FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName()); -} for (auto &MBB : *MF) { - - SmallVector BBPrefetchTargets; - = FunctionPrefetchTargets.lookup(MBB.g); - int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0; // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - unsigned NumCallsInBlock = 0; - for (auto &MI : MBB) { - if (NextPrefetchTargetIndex != -1 && - NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) { - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + SmallVector PrefetchTargets = MBB.getPrefetchTargetIndexes(); + auto PrefetchTargetIt = PrefetchTargets.begin(); + unsigned NumCalls = 0; + auto EmitPrefetchTargetSymbolIfNeeded = [&]() { + if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt) + return; + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); - if (MF->getFunction().isWeakForLinker()) { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak); - errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n"; - } else { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); - errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n"; - } - // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); - // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n"; + utostr(*PrefetchTargetIt)); + OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); OutStreamer->emitLabel(PrefetchTargetSymbol); - ++NextPrefetchTargetIndex; - if (NextPrefetchTargetIndex >= - static_cast(MBB.getPrefetchTargets().size())) - NextPrefetchTargetIndex = -1; - } + ++PrefetchTargetIt; + }; + + for (auto &MI : MBB) { + EmitPrefetchTargetSymbolIfNeeded(); // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() { break; } - if (MI.isCall() && MF->getTarget().Options.BBAddrMap) + if (MI.isCall()) { + if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + ++NumCalls; + } if (TM.Options.EmitCallGraphSection && MI.isCall()) handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI); @@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } - while (NextPrefetchTargetIndex != -1) { - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + - Twine("_") + - utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex])); - if (MF->getFunction().hasWeakLinkage()) { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition); - } else { - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global); - } - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++NextPrefetchTargetIndex; - if (NextPrefetchTargetIndex >= - static_cast(MBB.getPrefetchTargets().size())) - NextPrefetchTargetIndex = -1; - } - + EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 1cf0b4964760b..fcf28247179ca 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen IndirectBrExpandPass.cpp InitUndef.cpp InlineSpiller.cpp + InsertCodePrefetch.cpp InterferenceCache.cpp InterleavedAccessPass.cpp InterleavedLoadCombinePass.cpp diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp new file mode 100644 index 0000000000000..7cb52302ac7db --- /dev/null +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -0,0 +1,96 @@ +//===-- InsertCodePrefetch.cpp ---=========-----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Prefetch insertion pass implementation. +//===----------------------------------------------------------------------===// +/// Prefetch insertion pass. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +#define DEBUG_TYPE "prefetchinsertion" + +namespace { +class InsertCodePrefetch : public MachineFunctionPass { +public: + static char ID; + + InsertCodePrefetch() : MachineFunctionPass(ID) { + initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Identify basic blocks that need separate sections and prepare to emit them + /// accordingly. + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char InsertCodePrefetch::ID = 0; +INITIALIZE_PASS_BEGIN( + InsertCodePrefetch, DEBUG_TYPE, + "Reads prefetch", true, + false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) +INITIALIZE_PASS_END( + InsertCodePrefetch, DEBUG_TYPE, + "Reads prefetch", true, + false) + +bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { + assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && + "BB Sections list not enabled!"); + if (hasInstrProfHashMismatch(MF)) + return false; + SmallVector PrefetchTargets = + getAnalysis() + .getPrefetchTargetsForFunction(MF.getName()); + DenseMap> PrefetchTargetsByBBID; + for (const auto &Target: PrefetchTargets) + PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + for (auto &MBB: MF) { + auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); + if (R == PrefetchTargetsByBBID.end()) continue; + MBB.setPrefetchTargetIndexes(R->second); + } + + return false; +} + +void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineFunctionPass *llvm::createInsertCodePrefetchPass() { + return new InsertCodePrefetch(); +} diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 19b218a2879dd..ba0b025167307 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const { return CachedMCSymbol; } -MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const { - if (CallInstSymbols.size() <= CallInstNumber) { - const MachineFunction *MF = getParent(); - MCContext &Ctx = MF->getContext(); - CallInstSymbols.resize(CallInstNumber + 1); - CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol( - "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" + - Twine(CallInstNumber), - /*AlwaysEmit=*/true); - } - return CallInstSymbols[CallInstNumber]; -} - MCSymbol *MachineBasicBlock::getEHContSymbol() const { if (!CachedEHContMCSymbol) { const MachineFunction *MF = getParent(); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index ceae0d29eea90..5334c5596d018 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1291,6 +1291,7 @@ void TargetPassConfig::addMachinePasses() { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); addPass(llvm::createBasicBlockPathCloningPass()); + addPass(llvm::createInsertCodePrefetchPass()); } addPass(llvm::createBasicBlockSectionsPass()); } From a08b65a6c93963ad84ae98820973fae245637ea2 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 00:10:36 +0000 Subject: [PATCH 06/24] clang-format. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 6 +++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 25 +++++++++++-------- .../BasicBlockSectionsProfileReader.cpp | 3 ++- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 23 ++++++++--------- 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 1fd904d64ab9d..2b8ee578cd917 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; private: StringRef getAliasName(StringRef FuncName) const { @@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { SmallVector getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; + SmallVector + getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and // then reads the profile for the matching functions. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 933fe6f7d177f..4368cd4d256c9 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,7 +18,6 @@ #include "WasmException.h" #include "WinCFGuard.h" #include "WinException.h" -#include "llvm/Support/SMLoc.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitmaskEnum.h" @@ -120,6 +119,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { + for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); DenseMap MnemonicCounts; @@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() { auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt) + if (PrefetchTargetIt == PrefetchTargets.end() || + NumCalls < *PrefetchTargetIt) return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + - Twine("_") + - utostr(*PrefetchTargetIt)); - OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++PrefetchTargetIt; + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + + utostr(MBB.getBBID()->BaseID) + Twine("_") + + utostr(*PrefetchTargetIt)); + OutStreamer->emitSymbolAttribute( + PrefetchTargetSymbol, + MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++PrefetchTargetIt; }; for (auto &MI : MBB) { @@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() { break; } default: - emitInstruction(&MI); + emitInstruction(&MI); auto CountInstruction = [&](const MachineInstr &MI) { // Skip Meta instructions inside bundles. @@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) - OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); + OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); ++NumCalls; } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index c4784a6039c09..9b54dd6803cf6 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); - FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); + FI->second.PrefetchTargets.push_back( + BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 7cb52302ac7db..91cb6e599215d 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass { initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry()); } - StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; } + StringRef getPassName() const override { + return "X86 Cide Prefetch Inserter Pass"; + } void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN( - InsertCodePrefetch, DEBUG_TYPE, - "Reads prefetch", true, - false) +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, + false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END( - InsertCodePrefetch, DEBUG_TYPE, - "Reads prefetch", true, - false) +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, + false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && @@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); DenseMap> PrefetchTargetsByBBID; - for (const auto &Target: PrefetchTargets) + for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); - for (auto &MBB: MF) { + for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); - if (R == PrefetchTargetsByBBID.end()) continue; + if (R == PrefetchTargetsByBBID.end()) + continue; MBB.setPrefetchTargetIndexes(R->second); } From d988a3c374d6f3212ccb4081a9321279d54ad92b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 03:46:13 +0000 Subject: [PATCH 07/24] Fix the prefetch test. --- llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll index 979db7942ff2c..3eb91dfdabd27 100644 --- a/llvm/test/CodeGen/X86/prefetch-symbols.ll +++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll @@ -1,12 +1,14 @@ -;; Check that specifying the function in the basic block sections profile -;; without any other directives is a noop. +;; Check prefetch directives in basic block section profiles. ;; ;; Specify the bb sections profile: ; RUN: echo 'v1' > %t ; RUN: echo 'f _Z3foob' >> %t ; RUN: echo 't 0@0' >> %t +; RUN: echo 't 1@0' >> %t +; RUN: echo 't 1@1' >> %t +; RUN: echo 't 2@1' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck +; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { %7 = zext i1 %6 to i32 %8 = icmp sgt i32 %7, 0 br i1 %8, label %9, label %11 +; CHECK: _Z3foob: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0: 9: ; preds = %1 %10 = call i32 @_Z3barv() store i32 %10, ptr %2, align 4 br label %13 +; CHECK: .globl __llvm_prefetch_target__Z3foob_1_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0: +; CHECK-NEXT: callq _Z3barv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_1_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1: 11: ; preds = %1 %12 = call i32 @_Z3bazv() store i32 %12, ptr %2, align 4 br label %13 +; CHECK: callq _Z3bazv@PLT +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1: 13: ; preds = %11, %9 %14 = load i32, ptr %2, align 4 @@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { declare i32 @_Z3barv() #1 declare i32 @_Z3bazv() #1 - - -; CHECK: _Z3foob -; CHECK: llvm_prefetch_target From 40084459b2edf0ba4d46f2c958856c33bca19d54 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 03:48:53 +0000 Subject: [PATCH 08/24] Rename the test. --- ...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%) diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll similarity index 100% rename from llvm/test/CodeGen/X86/prefetch-symbols.ll rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll From e3b501f198f0640b7c43d73c81df74185532098e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 06:59:43 +0000 Subject: [PATCH 09/24] Remove unrelated changes. --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 12 ------------ llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 2b8ee578cd917..801588509d340 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -47,12 +47,6 @@ struct BBPosition { unsigned CallsiteIndex; }; -struct PrefetchHint { - BBPosition SitePosition; - StringRef TargetFunctionName; - BBPosition TargetPosition; -}; - // This represents the raw input profile for one function. struct FunctionPathAndClusterInfo { // BB Cluster information specified by `UniqueBBID`s. @@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - SmallVector PrefetchHints; SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; @@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - SmallVector - getPrefetchHintsForFunction(StringRef FuncName) const; - SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; @@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector - getPrefetchHintsForFunction(StringRef FuncName) const; SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 9b54dd6803cf6..5b12c85f7eeef 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector -BasicBlockSectionsProfileReader::getPrefetchHintsForFunction( - StringRef FuncName) const { - return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints; -} - SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { @@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector -BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction( - StringRef FuncName) const { - return BBSPR.getPrefetchHintsForFunction(FuncName); -} - SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { From 715f1b8504521ac749324db8d70c7a98d8ddb2b3 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 18:58:09 +0000 Subject: [PATCH 10/24] Add some comments. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++---- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 -- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 26 +++++++++---------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 801588509d340..784bf8dd8f2a9 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,9 +42,14 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -struct BBPosition { +// Assuming a block is split into subblocks across its callsites, this struct +// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or +// the beginning of the block if `SubblockIndex` is zero) to the call number +// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex` +// calls in the basic block). +struct SubblockID { UniqueBBID BBID; - unsigned CallsiteIndex; + unsigned SubblockIndex; }; // This represents the raw input profile for one function. @@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - SmallVector PrefetchTargets; + // Code prefetch targets, specified by the subblock ID of which beginning must + // be targetted for prefetching. + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - SmallVector + // Returns the prefetch targets (identified by their containing subblocks) for + // function `FuncName`. + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: @@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4368cd4d256c9..2d6f6687fe456 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -119,7 +119,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SMLoc.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() { FunctionCallGraphInfo FuncCGInfo; const auto &CallSitesInfoMap = MF->getCallSitesInfo(); - for (auto &MBB : *MF) { // Print a label for the basic block. emitBasicBlockStart(MBB); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 91cb6e599215d..df3d63098390b 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -1,4 +1,4 @@ -//===-- InsertCodePrefetch.cpp ---=========-----------------------------===// +//===-- InsertCodePrefetch.cpp ---=========--------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,9 +7,14 @@ //===----------------------------------------------------------------------===// // /// \file -/// Prefetch insertion pass implementation. +/// Code Prefetch Insertion Pass. //===----------------------------------------------------------------------===// -/// Prefetch insertion pass. +/// This pass inserts code prefetch instructions according to the prefetch +/// directives in the basic block section profile. The target of a prefetch can +/// be the beginning of any dynamic basic block, that is the beginning of a +/// machine basic block, or immediately after a callsite. A global symbol will +/// be emitted at the position of the target so it can be addressed from the +/// prefetch instruction. //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" @@ -20,15 +25,11 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; -#define DEBUG_TYPE "prefetchinsertion" +#define DEBUG_TYPE "insert-code-prefetch" namespace { class InsertCodePrefetch : public MachineFunctionPass { @@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass { } StringRef getPassName() const override { - return "X86 Cide Prefetch Inserter Pass"; + return "Code Prefetch Inserter Pass"; } void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Identify basic blocks that need separate sections and prepare to emit them - /// accordingly. + // Sets prefetch targets based on the bb section profile. bool runOnMachineFunction(MachineFunction &MF) override; }; @@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true, +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { From a1e1e00d73a6fc8c7038a10cf0577823a74a66d6 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 18:58:22 +0000 Subject: [PATCH 11/24] clang-format. --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 8 ++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 784bf8dd8f2a9..88f3e8b620bce 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -43,10 +43,10 @@ struct BBClusterInfo { }; // Assuming a block is split into subblocks across its callsites, this struct -// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or -// the beginning of the block if `SubblockIndex` is zero) to the call number -// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex` -// calls in the basic block). +// uniquely identifies the subblock in block `BBID` which starts from right +// after call number `SubblockIndex` (or the beginning of the block if +// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of +// the block if there are are `SubblockIndex` calls in the basic block). struct SubblockID { UniqueBBID BBID; unsigned SubblockIndex; diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index df3d63098390b..29afb46a317a8 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass { //===----------------------------------------------------------------------===// char InsertCodePrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, - false) +INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) -INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true, - false) +INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", + true, false) bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List && From 717e6fee27b902a14f11d1acb373188a8e17445e Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:16:54 +0000 Subject: [PATCH 12/24] Add comments and rename functions. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 2 +- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 ++++++++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 13 +++++++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 11 ++++++----- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 88f3e8b620bce..c2bc7559b9fb4 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -46,7 +46,7 @@ struct BBClusterInfo { // uniquely identifies the subblock in block `BBID` which starts from right // after call number `SubblockIndex` (or the beginning of the block if // `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of -// the block if there are are `SubblockIndex` calls in the basic block). +// the block if `SubblockIndex` is the last call in the block). struct SubblockID { UniqueBBID BBID; unsigned SubblockIndex; diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 4be008bbf4bf1..20427954d22e4 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,7 +229,12 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - SmallVector PrefetchTargetIndexes; + /// Contains the subblock indices in this block that are targets of code prefetching. + /// The subblock indexed `i` specifies that region after the `i`th call (or the + /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the + /// end of the block). The prefetch target is always the beginning of the + /// subblock. + SmallVector PrefetchTargetSubblockIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -712,12 +717,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetIndexes() const { - return PrefetchTargetIndexes; + const SmallVector &getPrefetchTargetSubblockIndexes() const { + return PrefetchTargetSubblockIndexes; } - void setPrefetchTargetIndexes(const SmallVector &V) { - PrefetchTargetIndexes = V; + void setPrefetchTargetSubblockIndexes(const SmallVector &V) { + PrefetchTargetSubblockIndexes = V; } /// Returns the section ID of this basic block. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 2d6f6687fe456..72cf557d51e03 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; + // Helper to emit a symbol for the prefetch target and proceed to the next + // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end() || - NumCalls < *PrefetchTargetIt) - return; + if (PrefetchTargetIt == PrefetchTargets.end()) return; + if (NumCalls < *PrefetchTargetIt) return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(*PrefetchTargetIt)); + // If the function is weak-linkage it may be replaced by a strong version, + // in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); @@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } + // If the block ends with a call, we may need to emit a prefetch target + // at the end. EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 29afb46a317a8..e241ccbbee263 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -12,9 +12,9 @@ /// This pass inserts code prefetch instructions according to the prefetch /// directives in the basic block section profile. The target of a prefetch can /// be the beginning of any dynamic basic block, that is the beginning of a -/// machine basic block, or immediately after a callsite. A global symbol will -/// be emitted at the position of the target so it can be addressed from the -/// prefetch instruction. +/// machine basic block, or immediately after a callsite. A global symbol is +/// emitted at the position of the target so it can be addressed from the +/// prefetch instruction from any module. //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" @@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { "BB Sections list not enabled!"); if (hasInstrProfHashMismatch(MF)) return false; + // Set each block's prefetch targets so AsmPrinter can emit a special symbol + // there. SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); @@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) continue; - MBB.setPrefetchTargetIndexes(R->second); + MBB.setPrefetchTargetSubblockIndexes(R->second); } - return false; } From 3605b0dddd7d66df9cd5b31d9d535f61fc8729fe Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:17:04 +0000 Subject: [PATCH 13/24] clang-format. --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 20427954d22e4..a13fcb2bb841d 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,11 +229,11 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - /// Contains the subblock indices in this block that are targets of code prefetching. - /// The subblock indexed `i` specifies that region after the `i`th call (or the - /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the - /// end of the block). The prefetch target is always the beginning of the - /// subblock. + /// Contains the subblock indices in this block that are targets of code + /// prefetching. The subblock indexed `i` specifies that region after the + /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th + /// callsite (or the end of the block). The prefetch target is always the + /// beginning of the subblock. SmallVector PrefetchTargetSubblockIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 72cf557d51e03..fb250c4b5308a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetSubblockIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned NumCalls = 0; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end()) return; - if (NumCalls < *PrefetchTargetIt) return; + if (PrefetchTargetIt == PrefetchTargets.end()) + return; + if (NumCalls < *PrefetchTargetIt) + return; MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + From 6408bd7070d47c10c92bae014e088e358f43ce99 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 19:34:26 +0000 Subject: [PATCH 14/24] Add optimization remarks for when prefetch targets cannot be mapped. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++++++ llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++--- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fb250c4b5308a..97234f3859ca7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() { // If the block ends with a call, we may need to emit a prefetch target // at the end. EmitPrefetchTargetSymbolIfNeeded(); + if (PrefetchTargetIt != PrefetchTargets.end()) { + MachineOptimizationRemarkMissed R( + "insert-code-prefetch", "MissingPrefetchTarget", + MF->getFunction().getSubprogram(), &MBB); + R << "failed to map " + << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt) + << " prefetch targets"; + } // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 5b12c85f7eeef..9319854f53289 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { return createProfileParseError(Twine("unsigned integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - BBPosition{*TargetBBID, static_cast(TargetCallsiteIndex)}); + SubblockID{*TargetBBID, static_cast(TargetCallsiteIndex)}); continue; } default: @@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index e241ccbbee263..57037fd818479 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { return false; // Set each block's prefetch targets so AsmPrinter can emit a special symbol // there. - SmallVector PrefetchTargets = + SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) - PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex); for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) From a06cb9d59cdc6292e2e3e4a3c6955b771f9f690a Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 13 Nov 2025 22:01:34 +0000 Subject: [PATCH 15/24] Expand test to weak symbols. --- .../X86/basic-block-sections-code-prefetch.ll | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 3eb91dfdabd27..35e25952aa2f8 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,6 +7,9 @@ ; RUN: echo 't 1@0' >> %t ; RUN: echo 't 1@1' >> %t ; RUN: echo 't 2@1' >> %t +; RUN: echo 'f _Z3barv' >> %t +; RUN: echo 't 0@0' >> %t +; RUN: echo 't 21@1' >> %t ;; ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s @@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { ret i32 %14 } -declare i32 @_Z3barv() #1 +define weak i32 @_Z3barv() nounwind { + %1 = call i32 @_Z3bazv() + ret i32 %1 +; CHECK: _Z3barv: +; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0: +} + declare i32 @_Z3bazv() #1 From ceefc56d610004bebd5515a7bd88e6a6117aee48 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 21:56:48 +0000 Subject: [PATCH 16/24] Change prefetch directive format to use , instead of @ --- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 7 ++++--- .../X86/basic-block-sections-code-prefetch.ll | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 9319854f53289..3a37982387f59 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -320,10 +320,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) continue; - assert(Values.size() == 1); SmallVector PrefetchTargetStr; - Values[0].split(PrefetchTargetStr, '@'); - assert(PrefetchTargetStr.size() == 2); + Values[0].split(PrefetchTargetStr, ','); + if (PrefetchTargetStr.size() != 2) + return createProfileParseError( + Twine("Prefetch target target expected: ") + Value); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 35e25952aa2f8..280bfef1c79b6 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -3,13 +3,13 @@ ;; Specify the bb sections profile: ; RUN: echo 'v1' > %t ; RUN: echo 'f _Z3foob' >> %t -; RUN: echo 't 0@0' >> %t -; RUN: echo 't 1@0' >> %t -; RUN: echo 't 1@1' >> %t -; RUN: echo 't 2@1' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 1,0' >> %t +; RUN: echo 't 1,1' >> %t +; RUN: echo 't 2,1' >> %t ; RUN: echo 'f _Z3barv' >> %t -; RUN: echo 't 0@0' >> %t -; RUN: echo 't 21@1' >> %t +; RUN: echo 't 0,0' >> %t +; RUN: echo 't 21,1' >> %t ;; ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s From 639efd746fb00a06712ec7cb5afea30b233c1254 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Mon, 17 Nov 2025 22:05:27 +0000 Subject: [PATCH 17/24] Fix the error. --- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 3a37982387f59..05b6c1c3917e5 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -324,7 +324,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) return createProfileParseError( - Twine("Prefetch target target expected: ") + Value); + Twine("Prefetch target target expected: ") + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); From cc4e3333f3d27683817a12ce4b3987263fb82bb7 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 06:42:17 +0000 Subject: [PATCH 18/24] Remove optimization remarks. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 97234f3859ca7..fb250c4b5308a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2168,14 +2168,6 @@ void AsmPrinter::emitFunctionBody() { // If the block ends with a call, we may need to emit a prefetch target // at the end. EmitPrefetchTargetSymbolIfNeeded(); - if (PrefetchTargetIt != PrefetchTargets.end()) { - MachineOptimizationRemarkMissed R( - "insert-code-prefetch", "MissingPrefetchTarget", - MF->getFunction().getSubprogram(), &MBB); - R << "failed to map " - << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt) - << " prefetch targets"; - } // We must emit temporary symbol for the end of this basic block, if either // we have BBLabels enabled or if this basic blocks marks the end of a From 6d8bdb19d705e04cae2169a08500ab52a5bfe6cf Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 20:38:00 +0000 Subject: [PATCH 19/24] Refine and polish. --- .../CodeGen/BasicBlockSectionsProfileReader.h | 25 +++++------ llvm/include/llvm/CodeGen/MachineBasicBlock.h | 18 ++++---- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 40 ++++++++--------- .../BasicBlockSectionsProfileReader.cpp | 45 +++++++++++++++---- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++-- .../X86/basic-block-sections-code-prefetch.ll | 5 +++ 6 files changed, 84 insertions(+), 57 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index c2bc7559b9fb4..20e1b7ab68bbe 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,14 +42,11 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -// Assuming a block is split into subblocks across its callsites, this struct -// uniquely identifies the subblock in block `BBID` which starts from right -// after call number `SubblockIndex` (or the beginning of the block if -// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of -// the block if `SubblockIndex` is the last call in the block). -struct SubblockID { +// The prefetch symbol is emitted immediately after the call of the given index +// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1). +struct CallsiteID { UniqueBBID BBID; - unsigned SubblockIndex; + int CallsiteIndex; }; // This represents the raw input profile for one function. @@ -60,9 +57,9 @@ struct FunctionPathAndClusterInfo { // the edge a -> b (a is not cloned). The index of the path in this vector // determines the `UniqueBBID::CloneID` of the cloned blocks in that path. SmallVector> ClonePaths; - // Code prefetch targets, specified by the subblock ID of which beginning must - // be targetted for prefetching. - SmallVector PrefetchTargets; + // Code prefetch targets, specified by the callsite ID immediately after + // which beginning must be targetted for prefetching. + SmallVector PrefetchTargets; // Node counts for each basic block. DenseMap NodeCounts; // Edge counts for each edge. @@ -99,9 +96,9 @@ class BasicBlockSectionsProfileReader { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const; - // Returns the prefetch targets (identified by their containing subblocks) for - // function `FuncName`. - SmallVector + // Returns the prefetch targets (identified by their containing callsite IDs) + // for function `FuncName`. + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; private: @@ -213,7 +210,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const; - SmallVector + SmallVector getPrefetchTargetsForFunction(StringRef FuncName) const; // Initializes the FunctionNameToDIFilename map for the current module and diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index a13fcb2bb841d..6f48e36b55660 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -229,12 +229,10 @@ class MachineBasicBlock /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - /// Contains the subblock indices in this block that are targets of code - /// prefetching. The subblock indexed `i` specifies that region after the - /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th - /// callsite (or the end of the block). The prefetch target is always the - /// beginning of the subblock. - SmallVector PrefetchTargetSubblockIndexes; + /// Contains the callsite indices in this block that are targets of code + /// prefetching. The index `i` specifies the `i`th call, with `-1` + /// representing the beginning of the block. + SmallVector PrefetchTargetCallsiteIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -717,12 +715,12 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetSubblockIndexes() const { - return PrefetchTargetSubblockIndexes; + const SmallVector &getPrefetchTargetCallsiteIndexes() const { + return PrefetchTargetCallsiteIndexes; } - void setPrefetchTargetSubblockIndexes(const SmallVector &V) { - PrefetchTargetSubblockIndexes = V; + void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { + PrefetchTargetCallsiteIndexes = V; } /// Returns the section ID of this basic block. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fb250c4b5308a..1fb1bd51f6d31 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,32 +1986,31 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = - MBB.getPrefetchTargetSubblockIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); - unsigned NumCalls = 0; + int CurrentCallsiteIndex = -1; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { - if (PrefetchTargetIt == PrefetchTargets.end()) - return; - if (NumCalls < *PrefetchTargetIt) - return; - MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( - Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + - utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(*PrefetchTargetIt)); - // If the function is weak-linkage it may be replaced by a strong version, - // in which case the prefetch targets should also be replaced. - OutStreamer->emitSymbolAttribute( - PrefetchTargetSymbol, - MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); - OutStreamer->emitLabel(PrefetchTargetSymbol); - ++PrefetchTargetIt; + if (PrefetchTargetIt != PrefetchTargets.end() && + *PrefetchTargetIt == CurrentCallsiteIndex) { + MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( + Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + + utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast(*PrefetchTargetIt + 1))); + // If the function is weak-linkage it may be replaced by a strong + // version, in which case the prefetch targets should also be replaced. + OutStreamer->emitSymbolAttribute( + PrefetchTargetSymbol, + MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global); + OutStreamer->emitLabel(PrefetchTargetSymbol); + ++PrefetchTargetIt; + } }; for (auto &MI : MBB) { EmitPrefetchTargetSymbolIfNeeded(); + // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugInstr()) { @@ -2152,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); - ++NumCalls; + CurrentCallsiteIndex++; } if (TM.Options.EmitCallGraphSection && MI.isCall()) @@ -2165,8 +2164,7 @@ void AsmPrinter::emitFunctionBody() { for (auto &Handler : Handlers) Handler->endInstruction(); } - // If the block ends with a call, we may need to emit a prefetch target - // at the end. + // Emit the last prefetch target in case the last instruction was a call. EmitPrefetchTargetSymbolIfNeeded(); // We must emit temporary symbol for the end of this basic block, if either diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 05b6c1c3917e5..0f440d6a53612 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount( return EdgeIt->second; } -SmallVector +SmallVector BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( StringRef FuncName) const { return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)) @@ -155,6 +155,35 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( // +-->: 5 : // .... // **************************************************************************** +// This profile can also specify prefetch targets (starting with 't') which +// instruct the compiler to emit a prefetch symbol for the given target. +// A prefetch target is specified by a pair "," where +// bbid specifies the target basic block and subblock_index is a zero-based +// index. Subblock 0 refers to the region at the beginning of the block up to +// the first callsite. Subblock `i > 0` refers to the region immediately after +// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block). +// The prefetch target is always emitted at the beginning of the subblock. +// This is the beginning of the basic block for `i = 0` and immediately after +// the `i`-th call for every `i > 0`. +// +// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B). +// This block is conceptually split into subblocks, with the prefetch target +// symbol emitted at the beginning of each subblock. +// +// +----------------------------------+ +// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) +// | Instruction 1 | +// | Instruction 2 | +// | call_A (Callsite 0) | +// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A, +// | | before call_B) +// | Instruction 3 | +// | call_B (Callsite 1) | +// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B, +// | | before call_C) +// | Instruction 4 | +// +----------------------------------+ +// Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto FI = ProgramPathAndClusterInfo.end(); @@ -315,7 +344,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } - case 't': { // Prefetch target specifier. + case 't': { // Callsite target specifier. // Skip the profile when we the profile iterator (FI) refers to the // past-the-end element. if (FI == ProgramPathAndClusterInfo.end()) @@ -324,16 +353,16 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) return createProfileParseError( - Twine("Prefetch target target expected: ") + Values[0]); + Twine("Callsite target expected: ") + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - unsigned long long TargetCallsiteIndex; - if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex)) - return createProfileParseError(Twine("unsigned integer expected: '") + + long long CallsiteIndex; + if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) + return createProfileParseError(Twine("signed integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - SubblockID{*TargetBBID, static_cast(TargetCallsiteIndex)}); + CallsiteID{*TargetBBID, static_cast(CallsiteIndex - 1)}); continue; } default: @@ -542,7 +571,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount( return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID); } -SmallVector +SmallVector BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction( StringRef FuncName) const { return BBSPR.getPrefetchTargetsForFunction(FuncName); diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 57037fd818479..d4c25c22417b5 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -70,17 +70,17 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { return false; // Set each block's prefetch targets so AsmPrinter can emit a special symbol // there. - SmallVector PrefetchTargets = + SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); - DenseMap> PrefetchTargetsByBBID; + DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) - PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex); + PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) continue; - MBB.setPrefetchTargetSubblockIndexes(R->second); + MBB.setPrefetchTargetCallsiteIndexes(R->second); } return false; } diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 280bfef1c79b6..8e23a30e273b5 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,6 +7,7 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t +; RUN: echo 't 4,0' >> %t ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t @@ -48,6 +49,10 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { 13: ; preds = %11, %9 %14 = load i32, ptr %2, align 4 ret i32 %14 +; CHECK: .LBB0_3: +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0: + } define weak i32 @_Z3barv() nounwind { From d93a5ecfd7cf0603f6f1c30a6b37d487251f5c88 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 20:38:13 +0000 Subject: [PATCH 20/24] clang-format. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++---- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 11 ++++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1fb1bd51f6d31..f87896c03536a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,18 +1986,18 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = - MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); int CurrentCallsiteIndex = -1; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { if (PrefetchTargetIt != PrefetchTargets.end() && - *PrefetchTargetIt == CurrentCallsiteIndex) { + *PrefetchTargetIt == CurrentCallsiteIndex) { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + - utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast(*PrefetchTargetIt + 1))); + utostr(MBB.getBBID()->BaseID) + Twine("_") + + utostr(static_cast(*PrefetchTargetIt + 1))); // If the function is weak-linkage it may be replaced by a strong // version, in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 0f440d6a53612..708080d7bbf0a 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -166,9 +166,10 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction( // This is the beginning of the basic block for `i = 0` and immediately after // the `i`-th call for every `i > 0`. // -// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B). -// This block is conceptually split into subblocks, with the prefetch target -// symbol emitted at the beginning of each subblock. +// Example: A basic block in function "foo" with BBID 10 and two call +// instructions (call_A, call_B). This block is conceptually split into +// subblocks, with the prefetch target symbol emitted at the beginning of each +// subblock. // // +----------------------------------+ // | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A) @@ -352,8 +353,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { SmallVector PrefetchTargetStr; Values[0].split(PrefetchTargetStr, ','); if (PrefetchTargetStr.size() != 2) - return createProfileParseError( - Twine("Callsite target expected: ") + Values[0]); + return createProfileParseError(Twine("Callsite target expected: ") + + Values[0]); auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); From 7cb4f6be1f9a0dce4592e6db859fb84773bb5e06 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:00:00 +0000 Subject: [PATCH 21/24] Change to using unsigned values for CallsiteIndex --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 7 ++++--- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 11 ++++++----- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++---- llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++--- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 7 ++++++- 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 20e1b7ab68bbe..161a810298d69 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -42,11 +42,12 @@ struct BBClusterInfo { unsigned PositionInCluster; }; -// The prefetch symbol is emitted immediately after the call of the given index -// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1). +// The prefetch symbol is emitted immediately after the call of the given index, +// in block `BBID` (First call has an index of 1). Zero callsite index means the +// start of the block. struct CallsiteID { UniqueBBID BBID; - int CallsiteIndex; + unsigned CallsiteIndex; }; // This represents the raw input profile for one function. diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 6f48e36b55660..48248bd0461bc 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -230,9 +230,10 @@ class MachineBasicBlock mutable MCSymbol *CachedMCSymbol = nullptr; /// Contains the callsite indices in this block that are targets of code - /// prefetching. The index `i` specifies the `i`th call, with `-1` - /// representing the beginning of the block. - SmallVector PrefetchTargetCallsiteIndexes; + /// prefetching. The index `i` specifies the `i`th call, with zero + /// representing the beginning of the block and ` representing the first call. + /// Must be in ascending order and without duplicates. + SmallVector PrefetchTargetCallsiteIndexes; /// Cached MCSymbol for this block (used if IsEHContTarget). mutable MCSymbol *CachedEHContMCSymbol = nullptr; @@ -715,11 +716,11 @@ class MachineBasicBlock std::optional getBBID() const { return BBID; } - const SmallVector &getPrefetchTargetCallsiteIndexes() const { + const SmallVector &getPrefetchTargetCallsiteIndexes() const { return PrefetchTargetCallsiteIndexes; } - void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { + void setPrefetchTargetCallsiteIndexes(const SmallVector &V) { PrefetchTargetCallsiteIndexes = V; } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f87896c03536a..20e3c63b163c0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,14 +1986,14 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); - int CurrentCallsiteIndex = -1; + unsigned LastCallsiteIndex = 0; // Helper to emit a symbol for the prefetch target and proceed to the next // one. auto EmitPrefetchTargetSymbolIfNeeded = [&]() { if (PrefetchTargetIt != PrefetchTargets.end() && - *PrefetchTargetIt == CurrentCallsiteIndex) { + *PrefetchTargetIt == LastCallsiteIndex) { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + @@ -2151,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() { if (MI.isCall()) { if (MF->getTarget().Options.BBAddrMap) OutStreamer->emitLabel(createCallsiteEndSymbol(MBB)); - CurrentCallsiteIndex++; + LastCallsiteIndex++; } if (TM.Options.EmitCallGraphSection && MI.isCall()) diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 708080d7bbf0a..8762f982f72ea 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -358,12 +358,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]); if (!TargetBBID) return TargetBBID.takeError(); - long long CallsiteIndex; - if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) + unsigned long long CallsiteIndex; + if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex)) return createProfileParseError(Twine("signed integer expected: '") + PrefetchTargetStr[1]); FI->second.PrefetchTargets.push_back( - CallsiteID{*TargetBBID, static_cast(CallsiteIndex - 1)}); + CallsiteID{*TargetBBID, static_cast(CallsiteIndex)}); continue; } default: diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index d4c25c22417b5..5c3055f4ca1ea 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -73,9 +73,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { SmallVector PrefetchTargets = getAnalysis() .getPrefetchTargetsForFunction(MF.getName()); - DenseMap> PrefetchTargetsByBBID; + DenseMap> PrefetchTargetsByBBID; for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); + // Sort and uniquify the callsite indices for every block. + for (auto &[K, V]: PrefetchTargetsByBBID) { + llvm::sort(V); + V.erase(llvm::unique(V), V.end()); + } for (auto &MBB : MF) { auto R = PrefetchTargetsByBBID.find(*MBB.getBBID()); if (R == PrefetchTargetsByBBID.end()) From 9fdf7d0b90dcba196bfd3e5de62d1b901a797939 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:00:12 +0000 Subject: [PATCH 22/24] clang-format. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++- llvm/lib/CodeGen/InsertCodePrefetch.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 20e3c63b163c0..d9b2450cb8a6f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1986,7 +1986,8 @@ void AsmPrinter::emitFunctionBody() { emitBasicBlockStart(MBB); DenseMap MnemonicCounts; - SmallVector PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes(); + SmallVector PrefetchTargets = + MBB.getPrefetchTargetCallsiteIndexes(); auto PrefetchTargetIt = PrefetchTargets.begin(); unsigned LastCallsiteIndex = 0; // Helper to emit a symbol for the prefetch target and proceed to the next diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp index 5c3055f4ca1ea..44864cbc99c52 100644 --- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp +++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp @@ -77,7 +77,7 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) { for (const auto &Target : PrefetchTargets) PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex); // Sort and uniquify the callsite indices for every block. - for (auto &[K, V]: PrefetchTargetsByBBID) { + for (auto &[K, V] : PrefetchTargetsByBBID) { llvm::sort(V); V.erase(llvm::unique(V), V.end()); } From 0c17e45c8973ee66126f326ed85b7788319727a0 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:04:07 +0000 Subject: [PATCH 23/24] Fix AsmPrinter. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d9b2450cb8a6f..a48d333b538ec 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1998,7 +1998,7 @@ void AsmPrinter::emitFunctionBody() { MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol( Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) + Twine("_") + - utostr(static_cast(*PrefetchTargetIt + 1))); + utostr(static_cast(*PrefetchTargetIt))); // If the function is weak-linkage it may be replaced by a strong // version, in which case the prefetch targets should also be replaced. OutStreamer->emitSymbolAttribute( From 500b53670976838a277ac617013e8342ae98c65b Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Wed, 19 Nov 2025 21:47:28 +0000 Subject: [PATCH 24/24] use -O0 --- .../CodeGen/X86/basic-block-sections-code-prefetch.ll | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll index 8e23a30e273b5..e5778b4b77fc2 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll @@ -7,12 +7,12 @@ ; RUN: echo 't 1,0' >> %t ; RUN: echo 't 1,1' >> %t ; RUN: echo 't 2,1' >> %t -; RUN: echo 't 4,0' >> %t +; RUN: echo 't 3,0' >> %t ; RUN: echo 'f _Z3barv' >> %t ; RUN: echo 't 0,0' >> %t ; RUN: echo 't 21,1' >> %t ;; -; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O0 | FileCheck %s define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 @@ -50,9 +50,8 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind { %14 = load i32, ptr %2, align 4 ret i32 %14 ; CHECK: .LBB0_3: -; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0 -; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0: - +; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_3_0 +; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0: } define weak i32 @_Z3barv() nounwind {