-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Fix GCNUpwardRPTracker: max register pressure on defs. #74422
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Valery Pykhtin (vpykhtin) ChangesTreat a defined register as fully live "at" the instruction and update maximum pressure accordingly. Fixes #73786. First part from #74328. Tests will be presubmitted separately, please take a look at 6223a02 for the diff. Full diff: https://github.com/llvm/llvm-project/pull/74422.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5ebf834377f2c..fd8f0bebd3bec 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto DecrementDef = [this](const MachineOperand &MO) {
+ // Kill all defs.
+ GCNRegPressure DefPressure, ECDefPressure;
+ bool HasECDefs = false;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+
Register Reg = MO.getReg();
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+
+ // Treat a def as fully live at the moment of definition: keep a record.
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ HasECDefs = true;
+ } else
+ DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
- return;
+ continue;
LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO, *MRI);
+ LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
- };
-
- // Decrement non-early-clobber defs.
- SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
- for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual())
- continue;
- if (!MO.isEarlyClobber())
- DecrementDef(MO);
- else
- EarlyClobberDefs.push_back(&MO);
}
- // Increment uses.
+ // Update MaxPressure with defs pressure.
+ DefPressure += CurPressure;
+ if (HasECDefs)
+ DefPressure += ECDefPressure;
+ MaxPressure = max(DefPressure, MaxPressure);
+
+ // Make uses alive.
SmallVector<RegisterMaskPair, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
for (const RegisterMaskPair &U : RegUses) {
@@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
}
- // Point of maximum pressure: non-early-clobber defs are decremented and uses
- // are incremented.
- MaxPressure = max(CurPressure, MaxPressure);
-
- // Now decrement early clobber defs.
- for (const MachineOperand *MO : EarlyClobberDefs)
- DecrementDef(*MO);
+ // Update MaxPressure with uses plus early-clobber defs pressure.
+ MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+ : max(CurPressure, MaxPressure);
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10d795ba..4100970fe1a96 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}
+ GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] += RHS.Value[I];
+ return *this;
+ }
+
+ GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] -= RHS.Value[I];
+ return *this;
+ }
+
void dump() const;
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Sum = P1;
+ Sum += P2;
+ return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Diff = P1;
+ Diff -= P2;
+ return Diff;
+}
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index a1722c42b189f..f0c5ba489ef3d 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null -verify-machineinstrs --run-pass=amdgpu-print-rp %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null -verify-machineinstrs --run-pass=amdgpu-print-rp -amdgpu-print-rp-downward %s 2>&1 >/dev/null | FileCheck %s --check-prefix=RP --check-prefix=RPD
---
@@ -47,87 +47,46 @@ body: |
name: live_through_test
tracksRegLiveness: true
body: |
- ; RPU-LABEL: name: live_through_test
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: Live-out: %0:00000000000000F3
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:00000000000000F3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: Live-out: %0:00000000000000C3
- ; RPU-NEXT: Live-thr: %0:00000000000000C0
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-in: %0:00000000000000C3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: live_through_test
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: Live-out: %0:00000000000000F3
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:00000000000000F3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: Live-out: %0:00000000000000C3
- ; RPD-NEXT: Live-thr: %0:00000000000000C0
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-in: %0:00000000000000C3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: live_through_test
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: Live-out: %0:00000000000000F3
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:00000000000000F3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: Live-out: %0:00000000000000C3
+ ; RP-NEXT: Live-thr: %0:00000000000000C0
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:00000000000000C3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
@@ -223,7 +182,7 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
; RPU-NEXT: 0 12
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
+ ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
+ ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 1
; RPU-NEXT: Live-out: %0:0000000000000C00
; RPU-NEXT: Live-thr:
@@ -666,3 +625,100 @@ body: |
EXP_DONE 0, %49:vgpr_32, undef %51:vgpr_32, undef %53:vgpr_32, undef %55:vgpr_32, -1, 0, 1, implicit $exec
S_ENDPGM 0
...
+---
+name: test_partially_used_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-LABEL: name: test_partially_used_def
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 4 0 %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPU-NEXT: 1 0
+ ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: test_partially_used_def
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 8 0 %1:sgpr_128 = COPY %0:sgpr_128
+ ; RPD-NEXT: 1 0
+ ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = COPY %0:sgpr_128
+ S_NOP 0, implicit %1.sub1
+...
+---
+name: test_partially_used_early_clobber_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RP-LABEL: name: test_partially_used_early_clobber_def
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ S_NOP 0, implicit %1.sub1
+...
+---
+name: test_partially_used_def_and_early_clobber_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-LABEL: name: test_partially_used_def_and_early_clobber_def
+ ; RPU: Live-in:
+ ; RPU-NEXT: SGPR VGPR
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPU-NEXT: 4 0
+ ; RPU-NEXT: 16 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPU-NEXT: 6 0
+ ; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
+ ; RPU-NEXT: 0 0
+ ; RPU-NEXT: Live-out:
+ ; RPU-NEXT: Live-thr:
+ ; RPU-NEXT: 0 0
+ ;
+ ; RPD-LABEL: name: test_partially_used_def_and_early_clobber_def
+ ; RPD: Live-in:
+ ; RPD-NEXT: SGPR VGPR
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RPD-NEXT: 4 0
+ ; RPD-NEXT: 20 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPD-NEXT: 6 0
+ ; RPD-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
+ ; RPD-NEXT: 0 0
+ ; RPD-NEXT: Live-out:
+ ; RPD-NEXT: Live-thr:
+ ; RPD-NEXT: 0 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def early-clobber %4:sgpr_128
+ S_NOP 0, implicit %1.sub1, implicit %2.sub0_sub1, implicit %3.sub0_sub1_sub2
+...
|
auto DecrementDef = [this](const MachineOperand &MO) { | ||
// Kill all defs. | ||
GCNRegPressure DefPressure, ECDefPressure; | ||
bool HasECDefs = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to check my understanding - adding the variable HasECDefs
is not really needed, but it saves some cpu cycles as it avoids adding pressures with all zeros in a common non-clobber case (DefPressure += ECDefPressure
). Is that right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, I decided to add it because early-clobbers are rare.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe not worth it, since GCNRegPressure::operator+
is just 6 integer add instructions - but I don't really mind either way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thank you, @piotrAMD! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
The idea here is that all defs (whether they are early-clobber or not) are simultaneously live "at" the instruction, right? Hopefully that matches how the register allocator handles them.
auto DecrementDef = [this](const MachineOperand &MO) { | ||
// Kill all defs. | ||
GCNRegPressure DefPressure, ECDefPressure; | ||
bool HasECDefs = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe not worth it, since GCNRegPressure::operator+
is just 6 integer add instructions - but I don't really mind either way.
right, thank you Jay! |
822ba7a
to
f05d16c
Compare
f05d16c
to
9ea29dc
Compare
Treat a defined register as fully live "at" the instruction and update maximum pressure accordingly.
Fixes #73786. First part from #74328.
Tests will be presubmitted separately, please take a look at 6223a02 for the diff.