[AMDGPU] GCNRegPressure printing pass for testing. #70031

vpykhtin · 2023-10-24T11:46:16Z

Take a look at the tests to see the output.

This probably should be also enabled for llc tests, I haven't tried it yet.

* Added printing of live-in, live-out and live-through sets. * Empty BB aren't skipped now.

llvmbot · 2023-10-24T11:47:27Z

@llvm/pr-subscribers-backend-amdgpu

Author: Valery Pykhtin (vpykhtin)

Changes

Take a look at the tests to see the output.

This probably should be also enabled for llc tests, I haven't tried it yet.

Patch is 48.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70031.diff

6 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3)
(modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1)
(modified) llvm/lib/Target/AMDGPU/GCNRegPressure.cpp (+75)
(modified) llvm/lib/Target/AMDGPU/GCNRegPressure.h (+17)
(modified) llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir (+125-17)
(modified) llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir (+416-3)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 97a413296c55e55..2c29710f8c8cb46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -191,6 +191,9 @@ extern char &AMDGPUImageIntrinsicOptimizerID;
 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
 extern char &AMDGPUPerfHintAnalysisID;
 
+void initializeGCNRegPressurePrinterPass(PassRegistry &);
+extern char &GCNRegPressurePrinterID;
+
 // Passes common to R600 and SI
 FunctionPass *createAMDGPUPromoteAlloca();
 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dc7321cd5de9fcd..375df27206f7b41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -428,6 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeGCNPreRAOptimizationsPass(*PR);
   initializeGCNPreRALongBranchRegPass(*PR);
   initializeGCNRewritePartialRegUsesPass(*PR);
+  initializeGCNRegPressurePrinterPass(*PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1ca0f3b6e06b823..1db4e9dd151d847 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GCNRegPressure.h"
+#include "AMDGPU.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 
 using namespace llvm;
@@ -487,3 +488,77 @@ LLVM_DUMP_METHOD
 void GCNRegPressure::dump() const { dbgs() << print(*this); }
 
 #endif
+
+char llvm::GCNRegPressurePrinter::ID = 0;
+char &llvm::GCNRegPressurePrinterID = GCNRegPressurePrinter::ID;
+
+INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
+
+bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  GCNUpwardRPTracker RPT(LIS);
+
+  auto &OS = dbgs();
+
+// Leading spaces are important for YAML syntax.
+#define PFX "  "
+
+  OS << "---\nname: " << MF.getName() << "\nbody:             |\n";
+
+  auto printRP = [](const GCNRegPressure &RP) {
+    return Printable([&RP](raw_ostream &OS) {
+      OS << format(PFX "  %-5d", RP.getSGPRNum())
+         << format(" %-5d", RP.getVGPRNum(false));
+    });
+  };
+
+  // Register pressure before and at an instruction (in program order).
+  SmallVector<std::pair<GCNRegPressure, GCNRegPressure>, 16> RP;
+
+  for (auto &MBB : MF) {
+    OS << PFX;
+    MBB.printName(OS);
+    OS << ":\n";
+
+    if (MBB.empty()) {
+      SlotIndex MBBSI = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
+      GCNRPTracker::LiveRegSet LRThrough = getLiveRegs(MBBSI, LIS, MRI);
+      GCNRegPressure RP = getRegPressure(MRI, LRThrough);
+      OS << PFX "  Live-through:" << llvm::print(LRThrough, MRI);
+      OS << PFX "  SGPR  VGPR\n" << printRP(RP) << '\n';
+      continue;
+    }
+
+    RPT.reset(MBB.instr_back());
+    RPT.moveMaxPressure(); // Clear max pressure.
+
+    GCNRPTracker::LiveRegSet LRAtMBBEnd = RPT.getLiveRegs();
+    GCNRegPressure RPAtMBBEnd = RPT.getPressure();
+
+    RP.clear();
+    RP.reserve(MBB.size());
+    for (auto &MI : reverse(MBB)) {
+      RPT.recede(MI);
+      RP.emplace_back(RPT.getPressure(), RPT.moveMaxPressure());
+    }
+
+    OS << PFX "  Live-in:" << llvm::print(RPT.getLiveRegs(), MRI);
+    OS << PFX "  SGPR  VGPR\n";
+    auto I = RP.rbegin();
+    for (auto &MI : MBB) {
+      auto &[RPBeforeInstr, RPAtInstr] = *I++;
+      OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << "  ";
+      MI.print(OS);
+    }
+    OS << printRP(RPAtMBBEnd) << '\n';
+    OS << PFX "  Live-out:" << llvm::print(LRAtMBBEnd, MRI);
+  }
+  OS << "...\n";
+  return false;
+
+#undef PFX
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 72e18acc1b8e494..f2256f68c2c7037 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -128,6 +128,8 @@ class GCNRPTracker {
 
   void clearMaxPressure() { MaxPressure.clear(); }
 
+  GCNRegPressure getPressure() const { return CurPressure; }
+
   // returns MaxPressure, resetting it
   decltype(MaxPressure) moveMaxPressure() {
     auto Res = MaxPressure;
@@ -277,6 +279,21 @@ Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
                          const GCNRPTracker::LiveRegSet &TrackedL,
                          const TargetRegisterInfo *TRI);
 
+struct GCNRegPressurePrinter : public MachineFunctionPass {
+  static char ID;
+
+public:
+  GCNRegPressurePrinter() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index e4f56cc328e4782..138c8e785dec280 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
 
 # The sequence of DBG_VALUEs forms a scheduling region with 0 real
 # instructions. The RegPressure tracker would end up skipping over any
@@ -27,33 +28,33 @@ body:             |
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, implicit $exec
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
-  ; CHECK-NEXT:   undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   undef [[V_ADD_F32_e32_:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   dead undef [[V_ADD_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
-  ; CHECK-NEXT:   undef %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
-  ; CHECK-NEXT:   %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, implicit $exec
-  ; CHECK-NEXT:   undef %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec
+  ; CHECK-NEXT:   undef [[V_ADD_F32_e32_1:%[0-9]+]].sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[V_ADD_F32_e32_1:%[0-9]+]].sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2 [[V_ADD_F32_e32_1]], [[V_MOV_B32_e32_]], 32, 0, implicit $exec
+  ; CHECK-NEXT:   undef [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD2:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK-NEXT:   dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, implicit $exec
-  ; CHECK-NEXT:   dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[GLOBAL_LOAD_DWORD2]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, implicit $exec
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
   ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]]
-  ; CHECK-NEXT:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK-NEXT:   [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64_e64 2, [[DEF2]], implicit $exec
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_e64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_1]]
+  ; CHECK-NEXT:   GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_2]], 0, 0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
@@ -65,9 +66,114 @@ body:             |
   ; CHECK-NEXT:   S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF8]]
   ; CHECK-NEXT:   S_ENDPGM 0
+  ;
+  ; RP-LABEL: name: only_dbg_value_sched_region
+  ; RP: bb.0:
+  ; RP-NEXT:   Live-in:
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     1      %0:vgpr_32 = COPY $vgpr0
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   0     3      %1:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     3
+  ; RP-NEXT:   0     5      %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     5
+  ; RP-NEXT:   0     6      %3:vgpr_32 = GLOBAL_LOAD_DWORD %1:vreg_64, 8, 0, implicit $exec
+  ; RP-NEXT:   0     6
+  ; RP-NEXT:   0     7      undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0:vgpr_32, %0:vgpr_32, implicit $exec
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; RP-NEXT:   0     8
+  ; RP-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     9      undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     8
+  ; RP-NEXT:   0     8      dead %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     8      %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     6
+  ; RP-NEXT:   0     7      %8:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     7
+  ; RP-NEXT:   0     9      %9:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     11     %10:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     11
+  ; RP-NEXT:   0     12     undef %11.sub1:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     12
+  ; RP-NEXT:   0     13     %12:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     13
+  ; RP-NEXT:   0     14     %13:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     14
+  ; RP-NEXT:   0     16     %14:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     16
+  ; RP-NEXT:   0     18     %15:vreg_64 = IMPLICIT_DEF
+  ; RP-NEXT:   0     18
+  ; RP-NEXT:   0     19     %16:vgpr_32 = IMPLICIT_DEF
+  ; RP-NEXT:   0     19
+  ; RP-NEXT:   0     20     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     21     %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; RP-NEXT:   0     21
+  ; RP-NEXT:   0     22     undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7:vgpr_32, %2.sub0:vreg_64, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     21     %19.sub1:vreg_64 = V_ADD_F32_e32 %3:vgpr_32, %3:vgpr_32, implicit $mode, implicit $exec
+  ; RP-NEXT:   0     20
+  ; RP-NEXT:   0     20     GLOBAL_STORE_DWORDX2 %19:vreg_64, %4:vreg_64, 32, 0, implicit $exec
+  ; RP-NEXT:   0     16
+  ; RP-NEXT:   0     17     %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     15
+  ; RP-NEXT:   0     16     %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     14
+  ; RP-NEXT:   0     14     dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     12
+  ; RP-NEXT:   0     12     dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     10
+  ; RP-NEXT:   0     10     dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+  ; RP-NEXT:   0     10
+  ; RP-NEXT:   0     11     %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
+  ; RP-NEXT:   0     9
+  ; RP-NEXT:   0     9      S_NOP 0, implicit %13:vgpr_32, implicit %23.sub0:vreg_64, implicit %12:vgpr_32, implicit %17:vgpr_32
+  ; RP-NEXT:   0     5
+  ; RP-NEXT:   0     5      GLOBAL_STORE_DWORD %15:vreg_64, %18:vgpr_32, 0, 0, implicit $exec
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT: bb.1:
+  ; RP-NEXT:   Live-in: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     0      DBG_VALUE
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   Live-out: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT: bb.2:
+  ; RP-NEXT:   Live-through: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT: bb.3:
+  ; RP-NEXT:   Live-in: %0:0000000000000003 %16:0000000000000003
+  ; RP-NEXT:   SGPR  VGPR
+  ; RP-NEXT:   0     2
+  ; RP-NEXT:   0     2      S_NOP 0, implicit %0:vgpr_32
+  ; RP-NEXT:   0     1
+  ; RP-NEXT:   0     1      S_NOP 0, implicit %16:vgpr_32
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   0     0      S_ENDPGM 0
+  ; RP-NEXT:   0     0
+  ; RP-NEXT:   Live-out:
   bb.0:
     liveins: $vgpr0
 
@@ -111,6 +217,8 @@ body:             |
     DBG_VALUE
     S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
 
+  bb.3:
+
   bb.2:
     S_NOP 0, implicit %0
     S_NOP 0, implicit %16
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index f8c7be8e414ca15..3f33e795e1e6ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,6 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP
 
 --- |
   %struct.widget.0 = type { float, i32, i32 }
@@ -171,8 +173,6 @@
 ...
 ---
 
-# CHECK: name: sched_dbg_value_crash
-# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
 
 name:            sched_dbg_value_crash
 alignment:       1
@@ -197,7 +197,420 @@ constants:
 body:             |
   bb.0.bb:
     liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
-
+    ; CHECK-LABEL: name: sched_dbg_value_crash
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 24, 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[DEF]], [[COPY2]], implicit-def dead $vcc, implicit $exec
+    ; CHECK-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[V_ADD_CO_U32_e32_]], 12, [[S_LOAD_DWORDX2_IMM]], 0, implicit $exec
+    ; CHECK-NEXT: dead [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 32, 0
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY1]], 4, 0
+    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 4, 0, implicit $exec
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_MAD_I64_I32_e64_2:%[0-9]+]]:vreg_64, dead [[V_MAD_I64_I32_e64_3:%[0-9]+]]:sreg_64 = V_MAD_I64_I32_e64 [[GLOBAL_LOAD_DWORD]], [[DEF1]], 0, 0, implicit $exec
+    ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 0, 0
+    ; CHECK-NEXT: undef [[S_LOAD_DWORD_IMM1:%[0-9]+]].sub0:sreg_64_xexec = S_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM4]], 4, 0
+    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[V_MAD_I64_I32_e64_2]], 32, 0, implicit $exec
+    ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM5:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
+    ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]].sub1:sreg_64_xexec = S_MOV_B32 0
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32_xm0 = S_LSHR_B32 [[S_LOAD_DWORDX2_IMM3]].sub0, 16, implicit-def d...
[truncated]

rampitec · 2023-10-24T21:37:54Z

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  GCNUpwardRPTracker RPT(LIS);


I thought we are using GCNDownwardRPTracker in the scheduling.

What kind of difference are we expecting between the two?
Would it makes sense if it was configurable?

Sorry I had to provide more context about this.

Piotr has found the problem with upward tracker D33289 and Jay suggested to make a debug pass to test our RP values before fixing the tracker.

I think we should test both trackers and in theory they should give the same results so we can do an upward and downward runs and check their output with a single Filecheck prefix. If they don't match test update tool will warn about this and we can add a second and third prefixes to see the difference.

I've faced a situation when the trackers disagree, let me add the downward tracker option to this pass and I'll try to illustrate how it works.

Very briefly:

undef %0.sub1:sgpr_64 = ... use %0

LiveIntervals creates only a subrange for sub1 subreg but not a subrange for undefined sub0, but sub0 is used when %0 is accessed.

They disagree from the very first attempt and it is very inconvenient to have two Filecheck prefixes for upward and downward trackers because it's not easy to compare. We can track RP both ways and print the difference at each position if it exists.

perlfu · 2023-10-25T05:09:10Z

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

+    MBB.printName(OS);
+    OS << ":\n";
+
+    if (MBB.empty()) {


I think this could be tweaked to avoid duplication of the printing logic.
It would also be helpful to have live-through output for all blocks, even if this can be inferred from live-in versus live-out, it is helpful to human readers.

Thanks, good idea to add live-through for every block.

The logic cannot be fully deduplicate though, because trackers require an instruction to start with and we don't have it for empty blocks, so I used SlotIndex instead.

perlfu · 2023-10-25T05:10:31Z

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+  GCNUpwardRPTracker RPT(LIS);


What kind of difference are we expecting between the two?
Would it makes sense if it was configurable?

* downward tracker * skip dbg values

vpykhtin · 2023-10-25T21:18:30Z

I've added downward tracker as a separate pass to show how it looks.

github-actions · 2023-10-25T21:28:45Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

vpykhtin · 2023-10-26T14:44:48Z

Improved version:

added live-through registers sets.
added check if tracked live register set matches the set reported by LiveIntervals.
debug values on BB boundaries are correctly skipped now.
minimized special processing of empty basic blocks (I had to modify upward tracker interface).
added testcase to illustrate LiveIntervals problem with undefined subregs,
please take a look at upward_problem_lis_subregs_mismatch testcase.
other review issues solved.

I decided to leave separate passes for upward and downward trackers.

This is close to the final version but I need to try running this pass with llc lit tests.

…nes. * Fixed formatting.

vpykhtin · 2023-10-27T09:52:44Z

but I need to try running this pass with llc lit tests.

I believe my printing pass is inapplicable for .ll tests because I cannot run separate passes there?

I think I'm done with this patch.

P.S. If I understood correctly -print<...> syntax is only available with the new pass manager which isn't available for our target yet?

jayfoad · 2023-10-27T10:05:09Z

P.S. If I understood correctly -print<...> syntax is only available with the new pass manager which isn't available for our target yet?

Yeah, it does not support MachineIR passes (for any target) yet.

jayfoad · 2023-10-27T13:00:18Z

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

+    GCNRPTracker::LiveRegSet LRThr = getIntersection(LRAtMBBBegin, LRAtMBBEnd);
+    OS << PFX "  Live-thr:" << llvm::print(LRThr, MRI);


Is this what people normally mean by "live through"? I wondered if it should only include things that have no defs or uses in the block.

I thought "live through" means that a register isn't redefined in the block. However I forgot that we can have redefinitions after register coalescer and using only intersection isn't enough to tell if a register keeps its value through the block. I can use the result of intersection to check if a register is redefined (or used?)

Sorry, I mean using registers from the intersection I need to perform additional pass on def/used for a given block.

IMHO it shall mean 'no defs or kills in the block'. Uses itself are included.

Should be enough to test whether an MBB is in one liverange segment of reg or subreg.

Fixed live-through register set calculation, assuming live-through register is in live-in and live-out sets, has no redefinitions but may have uses in the block.

jayfoad · 2023-10-27T13:08:25Z

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

+# This testcase shows the problem with LiveIntervals: it doesn't create
+# subranges for undefined but used subregisters. Upward tracker is able to see
+# the use of undefined subregister and tracks it correctly.


I would say that undef-read values should not contribute to register pressure, because the register allocator does not have to assign them a distinct register. So LIS is "right" and RPU is "wrong".

But it cannot allocate half of register if it's required by an instruction?

Between the first V_MOV and the last S_NOP, only half of %0:vreg_64 is live so the allocator only needs to dedicate one VGPR to it. All other VGPRs can be reused for other things, so they should not contribute to the register pressure.

Right, given that both trackers are already optimistic in that they believe the allocator can reuse for example, unused subregs of %0.sub0:sgpr_128 use, so this is the same situation here.

jayfoad · 2023-10-27T13:11:32Z

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

+  ; RPU-NEXT:   0     7
+  ; RPU-NEXT:   0     8      %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
+  ; RPU-NEXT:   0     8
+  ; RPU-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64


RPU and RPD are both wrong here. Pressure "at" this instruction should be 9.

jayfoad · 2023-10-27T13:13:03Z

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

+  ; RPD-NEXT:   0     8
+  ; RPD-NEXT:   0     10     %5:vreg_64 = COPY %2:vreg_64
+  ; RPD-NEXT:   0     9
+  ; RPD-NEXT:   0     10     undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0:vreg_64, %5.sub0:vreg_64, implicit $mode, implicit $exec


RPD seems particularly wrong here. Pressure does not go as high as 10 here.

vpykhtin · 2023-11-01T11:25:31Z

I'm really sorry for spam here, wrong move within VSCode.

vpykhtin · 2023-11-01T12:47:43Z

Should we move on and submit this patch? @jayfoad do you have concerns about live-through register set computation or others? I believe fixing trackers should go to another PR.

jayfoad

Should we move on and submit this patch?

Yes!

@jayfoad do you have concerns about live-through register set computation or others?

I personally have no interest in the live-through part. You could remove it from this patch, but I don't mind if others want to keep it.

I believe fixing trackers should go to another PR.

Agreed.

vpykhtin · 2023-11-01T22:03:39Z

I've removed live-through registers printing from this PR and will submit it separately.

vpykhtin added 2 commits October 19, 2023 10:07

[AMDGPU] GCNRegPressure printing pass for testing.

89aec40

Various improvents:

a9a33cd

* Added printing of live-in, live-out and live-through sets. * Empty BB aren't skipped now.

vpykhtin requested review from jayfoad, arsenm, kerbowa and rampitec October 24, 2023 11:46

llvmbot added the backend:AMDGPU label Oct 24, 2023

vpykhtin self-assigned this Oct 24, 2023

vpykhtin requested review from perlfu and piotrAMD October 24, 2023 11:53

rampitec reviewed Oct 24, 2023

View reviewed changes

perlfu reviewed Oct 25, 2023

View reviewed changes

Added:

814ccb9

* downward tracker * skip dbg values

arsenm requested changes Oct 26, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp Show resolved Hide resolved

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp Outdated Show resolved Hide resolved

vpykhtin added 2 commits October 26, 2023 16:12

restore tests

abb8cb2

Improved version + comments.

cd242e6

* Fixed release build by removing NDEBUG guards around printing routi…

8f8857a

…nes. * Fixed formatting.

arsenm approved these changes Oct 27, 2023

View reviewed changes

jayfoad reviewed Oct 27, 2023

View reviewed changes

Fixed live-throught register set calculation, added a test.

839e325

vpykhtin requested review from JDevlieghere and Endilll as code owners November 1, 2023 11:18

vpykhtin requested review from nikic and a team as code owners November 1, 2023 11:18

vpykhtin force-pushed the rp_printer branch from 8ff2697 to 839e325 Compare November 1, 2023 11:23

vpykhtin removed request for a team, nikic, JDevlieghere and Endilll November 1, 2023 11:24

Merge branch 'main' into rp_printer

6e64f6e

jayfoad approved these changes Nov 1, 2023

View reviewed changes

vpykhtin added 2 commits November 1, 2023 19:50

Move live-through register set printing to another patch.

c3d37bd

Merge branch 'main' into rp_printer

4685d87

vpykhtin merged commit e808f8a into llvm:main Nov 1, 2023

vpykhtin deleted the rp_printer branch November 1, 2023 22:01

		GCNRPTracker::LiveRegSet LRThr = getIntersection(LRAtMBBBegin, LRAtMBBEnd);
		OS << PFX " Live-thr:" << llvm::print(LRThr, MRI);

[AMDGPU] GCNRegPressure printing pass for testing. #70031

[AMDGPU] GCNRegPressure printing pass for testing. #70031

Uh oh!

Conversation

vpykhtin commented Oct 24, 2023

Uh oh!

llvmbot commented Oct 24, 2023

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

vpykhtin commented Oct 25, 2023

Uh oh!

github-actions bot commented Oct 25, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

vpykhtin commented Oct 26, 2023

Uh oh!

vpykhtin commented Oct 27, 2023

Uh oh!

jayfoad commented Oct 27, 2023

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

vpykhtin commented Nov 1, 2023

Uh oh!

vpykhtin commented Nov 1, 2023

Uh oh!

jayfoad left a comment

Choose a reason for hiding this comment

Uh oh!

vpykhtin commented Nov 1, 2023

Uh oh!

Uh oh!

github-actions bot commented Oct 25, 2023 •

edited

Loading