Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
10a0962
Scoring system for rematerializations
lucas-rami Aug 11, 2025
48d1231
Format
lucas-rami Aug 11, 2025
3f55867
Address feedback + fix failing test
lucas-rami Aug 12, 2025
c459495
Remove REMAT_DEBUG and break ties in score
lucas-rami Aug 13, 2025
64e077e
Fix failing tests and rollback mask change
lucas-rami Aug 13, 2025
15faedc
Address more feedback
lucas-rami Sep 30, 2025
2793aa8
Rebase for new test + improve comment
lucas-rami Sep 30, 2025
029a2c6
Refactor scoring system + Remove always benef/latency calc
lucas-rami Oct 7, 2025
cdba9b8
Fix tests
lucas-rami Oct 7, 2025
5552bf4
Improve code for frequency-based calculations
lucas-rami Oct 8, 2025
f57f3bd
Add empty region test
lucas-rami Oct 9, 2025
fde49a3
Correctly derive (sub)reg size and frequency fix
lucas-rami Oct 10, 2025
b805a71
Walk over all regions to compute AchievedOcc
lucas-rami Oct 14, 2025
b0f4cd6
Use std::max for freq calc.
lucas-rami Oct 15, 2025
43c740c
Simplify score calculation and improve debug
lucas-rami Oct 20, 2025
b5f0950
Early exit on maxWavesPerEU and rebase
lucas-rami Nov 4, 2025
5f07ff3
Fix tests and remove arg from TII.rematerialize
lucas-rami Nov 17, 2025
bbc341d
[NFC][AMDGPU] Refactor common code computing excess register preassur…
jmmartinez Nov 12, 2025
e331b29
Unacceptably large test
jmmartinez Nov 18, 2025
cd41eb1
[AMDGPU] Rematerialize VGPR candidates when SGPR spills to VGPR over …
jmmartinez Nov 12, 2025
f013974
Remove undef from test (it still preserves the test behavour before a…
jmmartinez Nov 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 61 additions & 53 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,46 @@ void GCNRegPressure::inc(unsigned Reg,
Value[RegKind] += Sign;
}

struct RegExcess {
unsigned SGPR = 0;
unsigned VGPR = 0;
unsigned ArchVGPR = 0;
unsigned AGPR = 0;

bool anyExcess() const { return SGPR || VGPR || ArchVGPR || AGPR; }
bool spillsToMemory() const { return VGPR || ArchVGPR || AGPR; }

RegExcess(const MachineFunction &MF, const GCNRegPressure &RP,
unsigned MaxSGPRs, unsigned MaxVGPRs) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
SGPR = std::max(static_cast<int>(RP.getSGPRNum() - MaxSGPRs), 0);

// The number of virtual VGPRs required to handle excess SGPR
unsigned WaveSize = ST.getWavefrontSize();
unsigned VGPRForSGPRSpills = divideCeil(SGPR, WaveSize);

unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();

// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
VGPR = std::max(static_cast<int>(RP.getVGPRNum(ST.hasGFX90AInsts()) +
VGPRForSGPRSpills - MaxVGPRs),
0);

// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
ArchVGPR = std::max(static_cast<int>(RP.getVGPRNum(false) +
VGPRForSGPRSpills - MaxArchVGPRs),
0);

// AGPR excess pressure conditions
AGPR = std::max(static_cast<int>(ST.hasGFX90AInsts()
? (RP.getAGPRNum() - MaxArchVGPRs)
: (RP.getAGPRNum() - MaxVGPRs)),
0);
}
};

bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Expand Down Expand Up @@ -125,61 +165,24 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);

// SGPR excess pressure conditions
unsigned ExcessSGPR = std::max(static_cast<int>(getSGPRNum() - MaxSGPRs), 0);
unsigned OtherExcessSGPR =
std::max(static_cast<int>(O.getSGPRNum() - MaxSGPRs), 0);

auto WaveSize = ST.getWavefrontSize();
// The number of virtual VGPRs required to handle excess SGPR
unsigned VGPRForSGPRSpills = (ExcessSGPR + (WaveSize - 1)) / WaveSize;
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
RegExcess Excess(MF, *this, MaxSGPRs, MaxVGPRs);
RegExcess OtherExcess(MF, O, MaxSGPRs, MaxVGPRs);

unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();

// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
VGPRForSGPRSpills - MaxVGPRs),
0);
unsigned OtherExcessVGPR =
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
OtherVGPRForSGPRSpills - MaxVGPRs),
0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessArchVGPR = std::max(
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
0);
unsigned OtherExcessArchVGPR =
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
MaxArchVGPRs),
0);
// AGPR excess pressure conditions
unsigned ExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
: (getAGPRNum() - MaxVGPRs)),
0);
unsigned OtherExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
: (O.getAGPRNum() - MaxVGPRs)),
0);

bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR ||
OtherExcessArchVGPR || OtherExcessAGPR;
bool ExcessRP = Excess.anyExcess();
bool OtherExcessRP = OtherExcess.anyExcess();

// Give second precedence to the reduced number of spills to hold the register
// pressure.
if (ExcessRP || OtherExcessRP) {
// The difference in excess VGPR pressure, after including VGPRs used for
// SGPR spills
int VGPRDiff = ((OtherExcessVGPR + OtherExcessArchVGPR + OtherExcessAGPR) -
(ExcessVGPR + ExcessArchVGPR + ExcessAGPR));
int VGPRDiff =
((OtherExcess.VGPR + OtherExcess.ArchVGPR + OtherExcess.AGPR) -
(Excess.VGPR + Excess.ArchVGPR + Excess.AGPR));

int SGPRDiff = OtherExcessSGPR - ExcessSGPR;
int SGPRDiff = OtherExcess.SGPR - Excess.SGPR;

if (VGPRDiff != 0)
return VGPRDiff > 0;
Expand Down Expand Up @@ -413,15 +416,15 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg) const {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

RegExcess Excess(MF, RP, MaxSGPRs, MaxVGPRs);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't MaxVGPRs be MaxUnifiedVGPRs? I don't think we want the function's behavior to change when we don't have SGPR spills, and it looks like this will cause the function to report new beneficial saves in cases where the target has already been reached (e.g. gfx942, MaxUnifiedVGPRs=512, MaxVGPRs=256).

Copy link
Contributor Author

@jmmartinez jmmartinez Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right. In that case it should be:

RegExcess Excess(MF, RP, MaxSGPRs, UnifiedRF ? MaxUnifiedVGPRs : MaxVGPRs);

PS: I have to test this because all these register classes are quite confusing


if (SRI->isSGPRClass(RC))
return RP.getSGPRNum() > MaxSGPRs;
unsigned NumVGPRs =
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
// The addressable limit must always be respected.
if (NumVGPRs > MaxVGPRs)
return true;
// For unified RFs, combined VGPR usage limit must be respected as well.
return UnifiedRF && RP.getVGPRNum(true) > MaxUnifiedVGPRs;
return Excess.SGPR;

if (SRI->isAGPRClass(RC))
return Excess.AGPR;

return Excess.VGPR || Excess.ArchVGPR;
}

bool GCNRPTarget::satisfied() const {
Expand All @@ -432,6 +435,11 @@ bool GCNRPTarget::satisfied() const {
return true;
}

bool GCNRPTarget::spillsToMemory() const {
RegExcess Excess(MF, RP, MaxSGPRs, MaxVGPRs);
return Excess.spillsToMemory();
}

///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ class GCNRPTarget {

/// Whether the current RP is at or below the defined pressure target.
bool satisfied() const;
bool spillsToMemory() const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {
Expand Down
Loading
Loading