-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Register allocation anti-hints to reduce MFMA hazard NOPs #156943
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
mssefat
wants to merge
20
commits into
llvm:main
Choose a base branch
from
mssefat:reg-hint-avoid-hazard
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
7ef2da8
[AMDGPU] Improve register allocation to reduce MFMA hazard NOPs
mssefat 7d3f015
Rebase
mssefat f8df624
rebase test files
mssefat 2d8ef8b
Resotred SIRegisterInfo files
mssefat f0f214b
Resotred SIMachineFunctionInfo files
mssefat 9b0ca6f
Updated sources to support anti-hint mechanism
mssefat e34e16b
Made anti-hints map conditional in MIRYamlMapping
mssefat 887b556
Updated tests
mssefat 19d7119
Updated mir test
mssefat cd0f88d
Renamed test file
mssefat 7a8d17e
Added print and parse tests
mssefat 3a84b9b
Fixed typo
mssefat b893331
Fixed typo
mssefat f04eb48
Fixed test!
mssefat e82a0e2
Fixed test
mssefat a9b05ec
Fixed typo
mssefat c7a4140
[AMDGPU] Anti-hints in register allocation
mssefat 893d522
Modified flag name to reflect anti-hints
mssefat 9fe1c13
[NFC] Restore and remove to move MIR serialization changes to separat…
mssefat 6d8e044
Named operand and stable partition applied
mssefat File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,12 +37,19 @@ | |
#include "SIRegisterInfo.h" | ||
#include "llvm/CodeGen/LiveIntervals.h" | ||
#include "llvm/CodeGen/MachineFunctionPass.h" | ||
#include "llvm/CodeGen/Register.h" | ||
#include "llvm/InitializePasses.h" | ||
|
||
using namespace llvm; | ||
|
||
#define DEBUG_TYPE "amdgpu-pre-ra-optimizations" | ||
|
||
static cl::opt<bool> | ||
EnableAntiHintsForMFMARegs("amdgpu-anti-hints-for-mfma", cl::Hidden, | ||
cl::desc("Enable Anti-Hints for " | ||
"MFMA in GCNPreRAOptimizations stage."), | ||
cl::init(true)); | ||
|
||
namespace { | ||
|
||
class GCNPreRAOptimizationsImpl { | ||
|
@@ -247,6 +254,88 @@ bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) { | |
TRI = ST.getRegisterInfo(); | ||
|
||
bool Changed = false; | ||
// Add RA anti-hints to reduce MFMA hazard NOPs | ||
if (EnableAntiHintsForMFMARegs && ST.hasMAIInsts()) { | ||
// Max lookback window for RAW or WAW hazard | ||
constexpr unsigned MaxLookbackWindow = 19; | ||
for (const MachineBasicBlock &MBB : MF) { | ||
SmallVector<SmallVector<Register, 4>, 16> RecentMFMAs; | ||
for (const MachineInstr &MI : MBB) { | ||
if (MI.isDebugInstr()) | ||
continue; | ||
|
||
// Handle MFMA instructions | ||
if (SIInstrInfo::isMFMA(MI)) { | ||
SmallVector<Register, 4> MFMARegisters; | ||
// Helper to get named operand | ||
auto collectNamedOperand = [&](AMDGPU::OpName OpName, | ||
const char *OpNameStr) { | ||
const MachineOperand *MO = TII->getNamedOperand(MI, OpName); | ||
if (!MO) { | ||
LLVM_DEBUG(dbgs() << " Named operand " << OpNameStr | ||
<< " not found\n"); | ||
return; | ||
} | ||
if (MO->isReg() && MO->getReg().isVirtual()) { | ||
Register Reg = MO->getReg(); | ||
const TargetRegisterClass *RC = MRI->getRegClass(Reg); | ||
// Only consider VGPRs | ||
if (TRI->hasVGPRs(RC)) | ||
MFMARegisters.push_back(Reg); | ||
LLVM_DEBUG(dbgs() << " Collected " << OpNameStr << " : " | ||
<< printReg(Reg, TRI) << "\n"); | ||
} | ||
}; | ||
|
||
// Collect destination and source C registers | ||
collectNamedOperand(AMDGPU::OpName::vdst, "vdst"); // Destination | ||
collectNamedOperand(AMDGPU::OpName::src2, | ||
"src2"); // Matrix C (accumulator) | ||
if (!MFMARegisters.empty()) { | ||
RecentMFMAs.emplace_back(std::move(MFMARegisters)); | ||
// Maintain window | ||
if (RecentMFMAs.size() > MaxLookbackWindow) | ||
RecentMFMAs.erase(RecentMFMAs.begin()); | ||
} | ||
continue; | ||
} | ||
bool ShouldCheckReuse = MI.mayLoad() || MI.mayStore() || MI.isCopy() || | ||
SIInstrInfo::isVALU(MI); | ||
// Skip non-relevant instructions, or skip until at least one MFMA is | ||
// encountered | ||
if (!ShouldCheckReuse || RecentMFMAs.empty()) | ||
continue; | ||
|
||
// Process operands that might reuse MFMA registers | ||
for (const MachineOperand &MO : MI.operands()) { | ||
if (!MO.isReg() || !MO.getReg().isVirtual()) | ||
continue; | ||
|
||
const Register CandidateReg = MO.getReg(); | ||
const TargetRegisterClass *CandidateRC = | ||
MRI->getRegClass(CandidateReg); | ||
|
||
// Only process VGPR registers | ||
if (!TRI->isVGPRClass(CandidateRC)) | ||
continue; | ||
for (auto It = RecentMFMAs.rbegin(); It != RecentMFMAs.rend(); ++It) { | ||
const SmallVector<Register, 4> &MFMARegs = *It; | ||
for (Register MFMAReg : MFMARegs) { | ||
// Check if MFMA register is dead at current instruction | ||
const LiveInterval &MFMAInterval = LIS->getInterval(MFMAReg); | ||
const SlotIndex CurrentSlot = | ||
LIS->getInstructionIndex(MI).getRegSlot(); | ||
if (!MFMAInterval.liveAt(CurrentSlot)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you storing the SlotIndex for all the MFMAs, if you're only interested in the CurrentSlot? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed it. Please check. |
||
// Add bi-directional anti-hints | ||
MRI->addRegAllocationAntiHints(CandidateReg, MFMAReg); | ||
MRI->addRegAllocationAntiHints(MFMAReg, CandidateReg); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { | ||
Register Reg = Register::index2VirtReg(I); | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed it. Please check.