From 28c8172d1ebee698d528c873b9c52d918693f343 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Fri, 3 Oct 2025 13:59:29 +0000 Subject: [PATCH 1/4] [AMDGPU] Add register usage diagnostics at the point of maximum register pressure. --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 132 +++++++++++++++++++- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 31 ++++- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 30 +++++ 3 files changed, 187 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index ef63acc6355d2..fd550f4ab7f28 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -14,6 +14,7 @@ #include "GCNRegPressure.h" #include "AMDGPU.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -459,10 +460,14 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI) { + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind) { GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = Register::index2VirtReg(I); + if (RegKind != GCNRegPressure::TOTAL_KINDS && + GCNRegPressure::getRegKind(Reg, MRI) != RegKind) + continue; if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); @@ -986,3 +991,128 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { #undef PFX } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI) { + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + auto &OS = dbgs(); + const char *RegName = GCNRegPressure::getName(Kind); + + unsigned MaxNumRegs = 0; + MachineInstr *MaxPressureMI = nullptr; + GCNUpwardRPTracker RPT(LIS); + for (auto &MBB : MF) { + RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot()); + for (auto &MI : reverse(MBB)) { + RPT.recede(MI); + unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind); + if (NumRegs > MaxNumRegs) { + MaxNumRegs = NumRegs; + MaxPressureMI = &MI; + } + } + } + + SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI); + + // Max pressure can occur at either the early-clobber or register slot. + // Choose the maximum liveset between both slots. This is ugly but this is + // diagnostic code. + SlotIndex ECSlot = MISlot.getRegSlot(true); + SlotIndex RSlot = MISlot.getRegSlot(false); + GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind); + GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind); + unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind); + unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind); + GCNRPTracker::LiveRegSet *LiveSet = + ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet; + SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot; + assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs); + + // Split live registers into single-def and multi-def sets. + GCNRegPressure SDefPressure, MDefPressure; + SmallVector SDefRegs, MDefRegs; + for (auto [Reg, LaneMask] : *LiveSet) { + assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind); + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.getNumValNums() == 1 || + (LI.hasSubRanges() && + llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) { + return SR.getNumValNums() == 1; + }))) { + SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + SDefRegs.push_back(Reg); + } else { + MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI); + MDefRegs.push_back(Reg); + } + } + unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind); + unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind); + assert(SDefNumRegs + MDefNumRegs == MaxNumRegs); + + auto printLoc = [&](MachineBasicBlock *MBB, SlotIndex SI) { + return Printable([&, MBB, SI](raw_ostream &OS) { + OS << SI << "@BB." << MBB->getNumber(); + if (MLI) + if (const MachineLoop *ML = MLI->getLoopFor(MBB)) + OS << " (LoopHdr BB." << ML->getHeader()->getNumber() << ", Depth " + << ML->getLoopDepth() << ")"; + }); + }; + + auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) { + GCNRegPressure RegPressure; + RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI); + OS << " " << printReg(Reg, TRI) << ':' + << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask " + << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' ' + << RegName << "s)\n"; + + // Use std::map to sort def/uses by SlotIndex. + std::map Instrs; + for (auto &MI : MRI.reg_nodbg_instructions(Reg)) { + Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI; + } + + for (const auto &[SI, MI] : Instrs) { + OS << " "; + if (MI->definesRegister(Reg, TRI)) + OS << "def "; + if (MI->readsRegister(Reg, TRI)) + OS << "use "; + OS << printLoc(MI->getParent(), SI) << ": " << *MI; + } + }; + + OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName() + << " ***\n"; + OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at " + << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": " + << *MaxPressureMI; + + OS << "\nLive registers with single definition (" << SDefNumRegs << ' ' + << RegName << "s):\n"; + + // Sort OneDefRegs by number of uses (smallest first) + llvm::sort(SDefRegs, [&](Register A, Register B) { + return std::distance(MRI.use_begin(A), MRI.use_end()) < + std::distance(MRI.use_begin(B), MRI.use_end()); + }); + + for (const auto Reg : SDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } + + OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' ' + << RegName << "s):\n"; + for (const auto Reg : MDefRegs) { + PrintRegInfo(Reg, LiveSet->lookup(Reg)); + } +} +#endif diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index a9c58bb90ef03..0cf5ed0513fae 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -41,6 +41,11 @@ struct GCNRegPressure { void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } + unsigned getNumRegs(RegKind Kind) const { + assert(Kind < TOTAL_KINDS); + return Value[Kind]; + } + /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure @@ -138,6 +143,18 @@ struct GCNRegPressure { void dump() const; + static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const SIRegisterInfo *STI = static_cast(TRI); + return (RegKind)getRegKind(MRI.getRegClass(Reg), STI); + } + + static const char *getName(RegKind Kind) { + const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; + assert(Kind < TOTAL_KINDS); + return Names[Kind]; + } + private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; @@ -294,8 +311,10 @@ class GCNRPTracker { } }; -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); +GCNRPTracker::LiveRegSet +getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS); //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker @@ -428,9 +447,6 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -524,6 +540,11 @@ struct GCNRegPressurePrinter : public MachineFunctionPass { } }; +LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, + GCNRegPressure::RegKind Kind, + LiveIntervals &LIS, + const MachineLoopInfo *MLI); + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index bdc08101c7119..c29d65e0671dc 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -69,6 +69,21 @@ static cl::opt GCNTrackers( cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +#define DUMP_MAX_REG_PRESSURE +static cl::opt PrintMaxRPRegUsageBeforeScheduler( + "amdgpu-print-maxrp-regusage-before-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure before scheduling."), + cl::init(false)); + +static cl::opt PrintMaxRPRegUsageAfterScheduler( + "amdgpu-print-maxrp-regusage-after-scheduler", cl::Hidden, + cl::desc("Print a list of live registers along with their def/uses at the " + "point of maximum register pressure after scheduling."), + cl::init(false)); +#endif + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) @@ -960,6 +975,16 @@ void GCNScheduleDAGMILive::runSchedStages() { RegionLiveOuts.buildLiveRegMap(); } +#ifdef DUMP_MAX_REG_PRESSURE + auto dumpRegUsage = [this]() { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + }; + if (PrintMaxRPRegUsageBeforeScheduler) + dumpRegUsage(); +#endif + GCNSchedStrategy &S = static_cast(*SchedImpl); while (S.advanceStage()) { auto Stage = createSchedStage(S.getCurrentStage()); @@ -995,6 +1020,11 @@ void GCNScheduleDAGMILive::runSchedStages() { Stage->finalizeGCNSchedStage(); } + +#ifdef DUMP_MAX_REG_PRESSURE + if (PrintMaxRPRegUsageAfterScheduler) + dumpRegUsage(); +#endif } #ifndef NDEBUG From 335007f2b36b006f84cb1bbacb8ba66408f9f18c Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Fri, 10 Oct 2025 12:56:14 +0000 Subject: [PATCH 2/4] per review fixes --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 28 ++++++++++----------- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 12 ++++----- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 +-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index fd550f4ab7f28..39d9b9fa20d97 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -1004,11 +1004,11 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, const char *RegName = GCNRegPressure::getName(Kind); unsigned MaxNumRegs = 0; - MachineInstr *MaxPressureMI = nullptr; + const MachineInstr *MaxPressureMI = nullptr; GCNUpwardRPTracker RPT(LIS); - for (auto &MBB : MF) { + for (const MachineBasicBlock &MBB : MF) { RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot()); - for (auto &MI : reverse(MBB)) { + for (const MachineInstr &MI : reverse(MBB)) { RPT.recede(MI); unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind); if (NumRegs > MaxNumRegs) { @@ -1056,13 +1056,13 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind); assert(SDefNumRegs + MDefNumRegs == MaxNumRegs); - auto printLoc = [&](MachineBasicBlock *MBB, SlotIndex SI) { + auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) { return Printable([&, MBB, SI](raw_ostream &OS) { - OS << SI << "@BB." << MBB->getNumber(); + OS << SI << ':' << printMBBReference(*MBB); if (MLI) if (const MachineLoop *ML = MLI->getLoopFor(MBB)) - OS << " (LoopHdr BB." << ML->getHeader()->getNumber() << ", Depth " - << ML->getLoopDepth() << ")"; + OS << " (LoopHdr " << printMBBReference(*ML->getHeader()) + << ", Depth " << ML->getLoopDepth() << ")"; }); }; @@ -1075,8 +1075,8 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, << RegName << "s)\n"; // Use std::map to sort def/uses by SlotIndex. - std::map Instrs; - for (auto &MI : MRI.reg_nodbg_instructions(Reg)) { + std::map Instrs; + for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) { Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI; } @@ -1099,19 +1099,19 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF, OS << "\nLive registers with single definition (" << SDefNumRegs << ' ' << RegName << "s):\n"; - // Sort OneDefRegs by number of uses (smallest first) + // Sort SDefRegs by number of uses (smallest first) llvm::sort(SDefRegs, [&](Register A, Register B) { - return std::distance(MRI.use_begin(A), MRI.use_end()) < - std::distance(MRI.use_begin(B), MRI.use_end()); + return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) < + std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end()); }); - for (const auto Reg : SDefRegs) { + for (const Register Reg : SDefRegs) { PrintRegInfo(Reg, LiveSet->lookup(Reg)); } OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' ' << RegName << "s):\n"; - for (const auto Reg : MDefRegs) { + for (const Register Reg : MDefRegs) { PrintRegInfo(Reg, LiveSet->lookup(Reg)); } } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 0cf5ed0513fae..d5c4dcf33f03b 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -31,6 +31,12 @@ class SlotIndex; struct GCNRegPressure { enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; + static constexpr const char *getName(RegKind Kind) { + const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; + assert(Kind < TOTAL_KINDS); + return Names[Kind]; + } + GCNRegPressure() { clear(); } @@ -149,12 +155,6 @@ struct GCNRegPressure { return (RegKind)getRegKind(MRI.getRegClass(Reg), STI); } - static const char *getName(RegKind Kind) { - const char *Names[] = {"SGPR", "VGPR", "AGPR", "AVGPR"}; - assert(Kind < TOTAL_KINDS); - return Names[Kind]; - } - private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index c29d65e0671dc..0da8fb41791e5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -72,13 +72,13 @@ static cl::opt GCNTrackers( #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) #define DUMP_MAX_REG_PRESSURE static cl::opt PrintMaxRPRegUsageBeforeScheduler( - "amdgpu-print-maxrp-regusage-before-scheduler", cl::Hidden, + "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false)); static cl::opt PrintMaxRPRegUsageAfterScheduler( - "amdgpu-print-maxrp-regusage-after-scheduler", cl::Hidden, + "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false)); From 9349a0b760abd371d00e53ab835d31f7b2cda965 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Mon, 13 Oct 2025 12:27:12 +0000 Subject: [PATCH 3/4] remove lambda --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 0da8fb41791e5..f164c5f147bd0 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -976,13 +976,11 @@ void GCNScheduleDAGMILive::runSchedStages() { } #ifdef DUMP_MAX_REG_PRESSURE - auto dumpRegUsage = [this]() { + if (PrintMaxRPRegUsageBeforeScheduler) { dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); LIS->dump(); - }; - if (PrintMaxRPRegUsageBeforeScheduler) - dumpRegUsage(); + } #endif GCNSchedStrategy &S = static_cast(*SchedImpl); @@ -1022,8 +1020,11 @@ void GCNScheduleDAGMILive::runSchedStages() { } #ifdef DUMP_MAX_REG_PRESSURE - if (PrintMaxRPRegUsageAfterScheduler) - dumpRegUsage(); + if (PrintMaxRPRegUsageAfterScheduler) { + dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); + dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); + LIS->dump(); + } #endif } From 5ebe1d0ad0c928d0d9ff650dd4e387ae6b0fbb25 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Mon, 13 Oct 2025 12:34:42 +0000 Subject: [PATCH 4/4] formatting --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index f164c5f147bd0..58482ea69d0b0 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1020,7 +1020,7 @@ void GCNScheduleDAGMILive::runSchedStages() { } #ifdef DUMP_MAX_REG_PRESSURE - if (PrintMaxRPRegUsageAfterScheduler) { + if (PrintMaxRPRegUsageAfterScheduler) { dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI); dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI); LIS->dump();