Skip to content

Commit

Permalink
[AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand in GCNHa…
Browse files Browse the repository at this point in the history
…zardRecognizer (#77628)

Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc
operand
in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set
appropriately
depending on whether a hazard is found or not, rather than inserting an
S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated.

Co-authored-by: Stephen Thomas <Stephen.Thomas@amd.com>
  • Loading branch information
jayfoad and stepthomas committed Jan 11, 2024
1 parent 40d5c2b commit b120dae
Show file tree
Hide file tree
Showing 4 changed files with 405 additions and 5 deletions.
17 changes: 12 additions & 5 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
auto IsExpiredFn = [](const MachineInstr &I, int) {
bool LdsdirCanWait = ST.hasLdsWaitVMSRC();
auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) ||
(LdsdirCanWait && SIInstrInfo::isLDSDIR(I) &&
!TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm());
};

if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;

BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
if (LdsdirCanWait) {
TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0);
} else {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
}

return true;
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasLdsDirect() const { return getGeneration() >= GFX11; }

bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }

bool hasVALUPartialForwardingHazard() const {
return getGeneration() >= GFX11;
}
Expand Down
Loading

0 comments on commit b120dae

Please sign in to comment.