diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 97a413296c55e..2c29710f8c8cb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID; void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); extern char &AMDGPUPerfHintAnalysisID; +void initializeGCNRegPressurePrinterPass(PassRegistry &); +extern char &GCNRegPressurePrinterID; + // Passes common to R600 and SI FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index dc7321cd5de9f..375df27206f7b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGCNPreRAOptimizationsPass(*PR); initializeGCNPreRALongBranchRegPass(*PR); initializeGCNRewritePartialRegUsesPass(*PR); + initializeGCNRegPressurePrinterPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 1ca0f3b6e06b8..a04c470b7b976 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" +#include "AMDGPU.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -31,7 +32,6 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1, return true; } - /////////////////////////////////////////////////////////////////////////////// // GCNRegPressure @@ -135,8 +135,6 @@ bool GCNRegPressure::less(const GCNSubtarget &ST, O.getVGPRNum(ST.hasGFX90AInsts())); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) { return Printable([&RP, ST](raw_ostream &OS) { OS << "VGPRs: " << RP.Value[GCNRegPressure::VGPR32] << ' ' @@ -155,7 +153,6 @@ Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST) { OS << '\n'; }); } -#endif static LaneBitmask getDefRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI) { @@ -269,6 +266,13 @@ void GCNUpwardRPTracker::reset(const MachineInstr &MI, GCNRPTracker::reset(MI, LiveRegsCopy, true); } +void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_, + const LiveRegSet &LiveRegs_) { + MRI = &MRI_; + LiveRegs = LiveRegs_; + MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_); +} + void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -418,19 +422,17 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin, return advance(End); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedLR, - const TargetRegisterInfo *TRI) { - return Printable([&LISLR, &TrackedLR, TRI](raw_ostream &OS) { + const TargetRegisterInfo *TRI, StringRef Pfx) { + return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) { for (auto const &P : TrackedLR) { auto I = LISLR.find(P.first); if (I == LISLR.end()) { - OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) + OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) << " isn't found in LIS reported set\n"; } else if (I->second != P.second) { - OS << " " << printReg(P.first, TRI) + OS << Pfx << printReg(P.first, TRI) << " masks doesn't match: LIS reported " << PrintLaneMask(I->second) << ", tracked " << PrintLaneMask(P.second) << '\n'; } @@ -438,7 +440,7 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, for (auto const &P : LISLR) { auto I = TrackedLR.find(P.first); if (I == TrackedLR.end()) { - OS << " " << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) + OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second) << " isn't found in tracked set\n"; } } @@ -467,7 +469,6 @@ bool GCNUpwardRPTracker::isValid() const { return true; } -LLVM_DUMP_METHOD Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI) { return Printable([&LiveRegs, &MRI](raw_ostream &OS) { @@ -483,7 +484,122 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs, }); } -LLVM_DUMP_METHOD void GCNRegPressure::dump() const { dbgs() << print(*this); } -#endif +static cl::opt UseDownwardTracker( + "amdgpu-print-rp-downward", + cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"), + cl::init(false), cl::Hidden); + +char llvm::GCNRegPressurePrinter::ID = 0; +char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID; + +INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true) + +bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + const LiveIntervals &LIS = getAnalysis(); + + auto &OS = dbgs(); + +// Leading spaces are important for YAML syntax. +#define PFX " " + + OS << "---\nname: " << MF.getName() << "\nbody: |\n"; + + auto printRP = [](const GCNRegPressure &RP) { + return Printable([&RP](raw_ostream &OS) { + OS << format(PFX " %-5d", RP.getSGPRNum()) + << format(" %-5d", RP.getVGPRNum(false)); + }); + }; + + auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR, + const GCNRPTracker::LiveRegSet &LISLR) { + if (LISLR != TrackedLR) { + OS << PFX " mis LIS: " << llvm::print(LISLR, MRI) + << reportMismatch(LISLR, TrackedLR, TRI, PFX " "); + } + }; + + // Register pressure before and at an instruction (in program order). + SmallVector, 16> RP; + + for (auto &MBB : MF) { + RP.clear(); + RP.reserve(MBB.size()); + + OS << PFX; + MBB.printName(OS); + OS << ":\n"; + + SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB); + SlotIndex MBBEndSlot = LIS.getSlotIndexes()->getMBBEndIdx(&MBB); + + GCNRPTracker::LiveRegSet LiveIn, LiveOut; + GCNRegPressure RPAtMBBEnd; + + if (UseDownwardTracker) { + if (MBB.empty()) { + LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI); + RPAtMBBEnd = getRegPressure(MRI, LiveIn); + } else { + GCNDownwardRPTracker RPT(LIS); + RPT.reset(MBB.front()); + + LiveIn = RPT.getLiveRegs(); + + while (!RPT.advanceBeforeNext()) { + GCNRegPressure RPBeforeMI = RPT.getPressure(); + RPT.advanceToNext(); + RP.emplace_back(RPBeforeMI, RPT.getPressure()); + } + + LiveOut = RPT.getLiveRegs(); + RPAtMBBEnd = RPT.getPressure(); + } + } else { + GCNUpwardRPTracker RPT(LIS); + RPT.reset(MRI, MBBEndSlot); + RPT.moveMaxPressure(); // Clear max pressure. + + LiveOut = RPT.getLiveRegs(); + RPAtMBBEnd = RPT.getPressure(); + + for (auto &MI : reverse(MBB)) { + RPT.recede(MI); + if (!MI.isDebugInstr()) + RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure()); + } + + LiveIn = RPT.getLiveRegs(); + } + + OS << PFX " Live-in: " << llvm::print(LiveIn, MRI); + if (!UseDownwardTracker) + ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI)); + + OS << PFX " SGPR VGPR\n"; + int I = 0; + for (auto &MI : MBB) { + if (!MI.isDebugInstr()) { + auto &[RPBeforeInstr, RPAtInstr] = + RP[UseDownwardTracker ? I : (RP.size() - 1 - I)]; + ++I; + OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " "; + } else + OS << PFX " "; + MI.print(OS); + } + OS << printRP(RPAtMBBEnd) << '\n'; + + OS << PFX " Live-out:" << llvm::print(LiveOut, MRI); + if (UseDownwardTracker) + ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBEndSlot, LIS, MRI)); + } + OS << "...\n"; + return false; + +#undef PFX +} \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 72e18acc1b8e4..c750fe74749e2 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -128,6 +128,8 @@ class GCNRPTracker { void clearMaxPressure() { MaxPressure.clear(); } + GCNRegPressure getPressure() const { return CurPressure; } + // returns MaxPressure, resetting it decltype(MaxPressure) moveMaxPressure() { auto Res = MaxPressure; @@ -140,6 +142,9 @@ class GCNRPTracker { } }; +GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI); + class GCNUpwardRPTracker : public GCNRPTracker { public: GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} @@ -148,6 +153,14 @@ class GCNUpwardRPTracker : public GCNRPTracker { // filling live regs upon this point using LIS void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); + // reset tracker and set live register set to the specified value. + void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_); + + // reset tracker at the specified slot index. + void reset(const MachineRegisterInfo &MRI_, SlotIndex SI) { + reset(MRI_, llvm::getLiveRegs(SI, LIS, MRI_)); + } + // move to the state just above the MI void recede(const MachineInstr &MI); @@ -196,10 +209,6 @@ LaneBitmask getLiveLaneMask(unsigned Reg, const LiveIntervals &LIS, const MachineRegisterInfo &MRI); -GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, - const LiveIntervals &LIS, - const MachineRegisterInfo &MRI); - /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction @@ -275,7 +284,22 @@ Printable print(const GCNRPTracker::LiveRegSet &LiveRegs, Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedL, - const TargetRegisterInfo *TRI); + const TargetRegisterInfo *TRI, StringRef Pfx = " "); + +struct GCNRegPressurePrinter : public MachineFunctionPass { + static char ID; + +public: + GCNRegPressurePrinter() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir new file mode 100644 index 0000000000000..d53050167e98b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir @@ -0,0 +1,462 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD + + +--- +name: trivial +tracksRegLiveness: true +body: | + ; RP-LABEL: name: trivial + ; RP: bb.0: + ; RP-NEXT: Live-in: + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 0 0 + ; RP-NEXT: 0 1 %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec + ; RP-NEXT: 0 1 + ; RP-NEXT: 2 1 %1:sgpr_64 = IMPLICIT_DEF + ; RP-NEXT: 2 1 + ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: bb.1: + ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 1 + ; RP-NEXT: Live-out: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: bb.2: + ; RP-NEXT: Live-in: %0:0000000000000003 %1:000000000000000F + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 1 + ; RP-NEXT: 2 1 S_NOP 0, implicit %0:vgpr_32, implicit %1:sgpr_64 + ; RP-NEXT: 0 0 + ; RP-NEXT: Live-out: + bb.0: + %0:vgpr_32 = V_MOV_B32_e32 42, implicit $exec + %1:sgpr_64 = IMPLICIT_DEF + bb.1: + + bb.2: + S_NOP 0, implicit %0, implicit %1 +... +--- +name: live_through_test +tracksRegLiveness: true +body: | + ; RPU-LABEL: name: live_through_test + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: Live-out: %0:00000000000000F3 + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:00000000000000F3 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RPU-NEXT: 3 0 + ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RPU-NEXT: 2 0 + ; RPU-NEXT: Live-out: %0:00000000000000C3 + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:00000000000000C3 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 2 0 + ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: live_through_test + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: Live-out: %0:00000000000000F3 + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:00000000000000F3 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RPD-NEXT: 3 0 + ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RPD-NEXT: 2 0 + ; RPD-NEXT: Live-out: %0:00000000000000C3 + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:00000000000000C3 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 2 0 + ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + %0:sgpr_128 = IMPLICIT_DEF + bb.1: + + S_NOP 0, implicit %0.sub0 ; kill sub0 + %0.sub0 = IMPLICIT_DEF ; redef sub0 + + %0.sub1:sgpr_128 = IMPLICIT_DEF ; redef sub1 + + S_NOP 0, implicit %0.sub2 ; kill sub2 + %0.sub2:sgpr_128 = IMPLICIT_DEF ; redef sub2 + S_NOP 0, implicit %0.sub2 ; kill sub2 + + S_NOP 0, implicit %0.sub3 ; use sub3, live-through + + bb.2: + S_NOP 0, implicit %0.sub3, implicit %0.sub0 +... + +# This testcase shows the problem with LiveIntervals: it doesn't create +# subranges for undefined but used subregisters. Upward tracker is able to see +# the use of undefined subregister and tracks it correctly. +--- +name: upward_problem_lis_subregs_mismatch +tracksRegLiveness: true +body: | + ; RPU-LABEL: name: upward_problem_lis_subregs_mismatch + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:000000000000000F %1:000000000000000F + ; RPU-NEXT: mis LIS: %0:0000000000000003 %1:000000000000000C + ; RPU-NEXT: %0 masks doesn't match: LIS reported 0000000000000003, tracked 000000000000000F + ; RPU-NEXT: %1 masks doesn't match: LIS reported 000000000000000C, tracked 000000000000000F + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 4 + ; RPU-NEXT: 0 4 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: upward_problem_lis_subregs_mismatch + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 2 undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:0000000000000003 %1:000000000000000C + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vreg_64, implicit %1:vreg_64 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 42, implicit $exec + undef %1.sub1:vreg_64 = V_MOV_B32_e32 33, implicit $exec + + bb.1: + + bb.2: + S_NOP 0, implicit %0, implicit %1 +... +--- +name: only_dbg_value_sched_region +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true +body: | + ; RPU-LABEL: name: only_dbg_value_sched_region + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0 + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 3 + ; RPU-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 5 + ; RPU-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec + ; RPU-NEXT: 0 6 + ; RPU-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; RPU-NEXT: 0 8 + ; RPU-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64 + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 9 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 8 + ; RPU-NEXT: 0 8 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 6 + ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 7 + ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 11 + ; RPU-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 12 + ; RPU-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 13 + ; RPU-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 14 + ; RPU-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 16 + ; RPU-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 18 + ; RPU-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF + ; RPU-NEXT: 0 19 + ; RPU-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPU-NEXT: 0 21 + ; RPU-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 20 + ; RPU-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec + ; RPU-NEXT: 0 16 + ; RPU-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 15 + ; RPU-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 14 + ; RPU-NEXT: 0 14 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 12 + ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 10 + ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 10 + ; RPU-NEXT: 0 11 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec + ; RPU-NEXT: 0 9 + ; RPU-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32 + ; RPU-NEXT: 0 5 + ; RPU-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPU-NEXT: DBG_VALUE + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.2: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: bb.3: + ; RPU-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 2 + ; RPU-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32 + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 0 S_ENDPGM 0 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; + ; RPD-LABEL: name: only_dbg_value_sched_region + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 %0:vgpr_32 = COPY $vgpr0 + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 3 %1:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 3 + ; RPD-NEXT: 0 5 %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 5 + ; RPD-NEXT: 0 6 %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec + ; RPD-NEXT: 0 6 + ; RPD-NEXT: 0 7 undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 8 %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + ; RPD-NEXT: 0 8 + ; RPD-NEXT: 0 10 %5:vreg_64 = COPY %2:vreg_64 + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 10 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 8 + ; RPD-NEXT: 0 9 dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 8 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 6 + ; RPD-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 7 + ; RPD-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 11 %10:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 11 + ; RPD-NEXT: 0 12 undef %11.sub1:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 12 + ; RPD-NEXT: 0 13 %12:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 13 + ; RPD-NEXT: 0 14 %13:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 14 + ; RPD-NEXT: 0 16 %14:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 16 + ; RPD-NEXT: 0 18 %15:vreg_64 = IMPLICIT_DEF + ; RPD-NEXT: 0 18 + ; RPD-NEXT: 0 19 %16:vgpr_32 = IMPLICIT_DEF + ; RPD-NEXT: 0 19 + ; RPD-NEXT: 0 20 %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 21 %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; RPD-NEXT: 0 21 + ; RPD-NEXT: 0 22 undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 21 %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 20 + ; RPD-NEXT: 0 20 GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec + ; RPD-NEXT: 0 16 + ; RPD-NEXT: 0 17 %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 15 + ; RPD-NEXT: 0 16 %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 14 + ; RPD-NEXT: 0 15 dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 12 + ; RPD-NEXT: 0 13 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 10 + ; RPD-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPD-NEXT: 0 10 + ; RPD-NEXT: 0 12 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec + ; RPD-NEXT: 0 9 + ; RPD-NEXT: 0 9 S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32 + ; RPD-NEXT: 0 5 + ; RPD-NEXT: 0 5 GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + ; RPD-NEXT: DBG_VALUE + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.2: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: Live-out: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: bb.3: + ; RPD-NEXT: Live-in: %0:0000000000000003 %16:0000000000000003 + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 2 + ; RPD-NEXT: 0 2 S_NOP 0, implicit %0:vgpr_32 + ; RPD-NEXT: 0 1 + ; RPD-NEXT: 0 1 S_NOP 0, implicit %16:vgpr_32 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 0 S_ENDPGM 0 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + %1:vreg_64 = IMPLICIT_DEF + %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, implicit $exec + undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec + %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec + %5:vreg_64 = COPY %2 + undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec + %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vreg_64 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + undef %11.sub1:vreg_64 = IMPLICIT_DEF + %12:vgpr_32 = IMPLICIT_DEF + %13:vgpr_32 = IMPLICIT_DEF + %14:vreg_64 = IMPLICIT_DEF + %15:vreg_64 = IMPLICIT_DEF + %16:vgpr_32 = IMPLICIT_DEF + %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec + %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec + DBG_VALUE + GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec + %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, implicit $exec + %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, implicit $exec + %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec + DBG_VALUE + DBG_VALUE + %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, implicit $exec + %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, implicit $exec + %23:vreg_64 = V_LSHLREV_B64_e64 2, %8, implicit $exec + S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17 + GLOBAL_STORE_DWORD %15, %18, 0, 0, implicit $exec + + bb.1: + DBG_VALUE + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + DBG_VALUE + DBG_VALUE + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + DBG_VALUE + + bb.3: + + bb.2: + S_NOP 0, implicit %0 + S_NOP 0, implicit %16 + S_ENDPGM 0 +... +