Skip to content

Commit

Permalink
[AMDGPU] Add functions for composing and decomposing S_WAIT_DEPCTR op…
Browse files Browse the repository at this point in the history
…erands

Add functions AMDGPU::DepCtr::encodeField*() and AMDGPU::DepCtr::decodeField*()
for each of vm_vsrc, va_vdst and sa_sdst. These are now used in
AMDGPUInsertDelayAlu and GCNHazardRecognizer so as to make working with
S_WAITCNT_DEPCTR operands easier and more readable.

Differential Revision: https://reviews.llvm.org/D154424
  • Loading branch information
stepthomas committed Jul 4, 2023
1 parent d732965 commit 8aedad0
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 12 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
return true;
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
(MI.getOperand(0).getImm() & 0xf000) == 0)
AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)
return true;
return false;
}
Expand Down
23 changes: 12 additions & 11 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
(MI.getOpcode() == AMDGPU::S_WAITCNT &&
!MI.getOperand(0).getImm()) ||
(MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
MI.getOperand(0).getImm() == 0xffe3);
AMDGPU::DepCtr::decodeFieldVmVsrc(MI.getOperand(0).getImm()) == 0);
};

if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
Expand All @@ -1180,7 +1180,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0xffe3);
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
return true;
}

Expand Down Expand Up @@ -1293,7 +1293,8 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
return true;
}
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
(MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
AMDGPU::DepCtr::encodeFieldSaSdst(MI.getOperand(0).getImm(), 0) ==
0xfffe)
return true;
return false;
};
Expand All @@ -1304,7 +1305,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {

BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0xfffe);
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
return true;
}

Expand Down Expand Up @@ -1452,7 +1453,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
I.getOperand(0).getImm() == 0xffe3);
AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
};

if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
Expand All @@ -1461,7 +1462,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {

BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0xffe3);
.addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));

return true;
}
Expand Down Expand Up @@ -1523,7 +1524,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
I.getOperand(0).getImm() == 0x0fff))
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;

// Track registers writes
Expand Down Expand Up @@ -1685,10 +1686,10 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return false;

// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
// avoided (mask 0x0fff achieves this).
// avoided.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0x0fff);
.addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));

return true;
}
Expand Down Expand Up @@ -2779,7 +2780,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) {
// s_waitcnt_depctr sa_sdst(0) mitigates hazard.
if (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
!(I.getOperand(0).getImm() & 0x1))
AMDGPU::DepCtr::decodeFieldSaSdst(I.getOperand(0).getImm()) == 0)
return true;

// VALU access to any SGPR or literal constant other than HazardReg
Expand Down Expand Up @@ -2829,7 +2830,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
// Add s_waitcnt_depctr sa_sdst(0) after SALU write.
BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0xfffe);
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));

// SALU write may be s_getpc in a bundle.
if (MI->getOpcode() == AMDGPU::S_GETPC_B64) {
Expand Down
54 changes: 54 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,24 @@ unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
}

/// \returns VmVsrc bit width
inline unsigned getVmVsrcBitWidth() { return 3; }

/// \returns VmVsrc bit shift
inline unsigned getVmVsrcBitShift() { return 2; }

/// \returns VaVdst bit width
inline unsigned getVaVdstBitWidth() { return 4; }

/// \returns VaVdst bit shift
inline unsigned getVaVdstBitShift() { return 12; }

/// \returns SaSdst bit width
inline unsigned getSaSdstBitWidth() { return 1; }

/// \returns SaSdst bit shift
inline unsigned getSaSdstBitShift() { return 0; }

} // end namespace anonymous

namespace llvm {
Expand Down Expand Up @@ -1501,6 +1519,42 @@ int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
STI);
}

unsigned decodeFieldVmVsrc(unsigned Encoded) {
return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
}

unsigned decodeFieldVaVdst(unsigned Encoded) {
return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
}

unsigned decodeFieldSaSdst(unsigned Encoded) {
return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
}

unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
}

unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
return encodeFieldVmVsrc(0xffff, VmVsrc);
}

unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
}

unsigned encodeFieldVaVdst(unsigned VaVdst) {
return encodeFieldVaVdst(0xffff, VaVdst);
}

unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
}

unsigned encodeFieldSaSdst(unsigned SaSdst) {
return encodeFieldSaSdst(0xffff, SaSdst);
}

} // namespace DepCtr

//===----------------------------------------------------------------------===//
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,33 @@ bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
bool &IsDefault, const MCSubtargetInfo &STI);

/// \returns Decoded VaVdst from given immediate \p Encoded.
unsigned decodeFieldVaVdst(unsigned Encoded);

/// \returns Decoded VmVsrc from given immediate \p Encoded.
unsigned decodeFieldVmVsrc(unsigned Encoded);

/// \returns Decoded SaSdst from given immediate \p Encoded.
unsigned decodeFieldSaSdst(unsigned Encoded);

/// \returns \p VmVsrc as an encoded Depctr immediate.
unsigned encodeFieldVmVsrc(unsigned VmVsrc);

/// \returns \p Encoded combined with encoded \p VmVsrc.
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);

/// \returns \p VaVdst as an encoded Depctr immediate.
unsigned encodeFieldVaVdst(unsigned VaVdst);

/// \returns \p Encoded combined with encoded \p VaVdst.
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);

/// \returns \p SaSdst as an encoded Depctr immediate.
unsigned encodeFieldSaSdst(unsigned SaSdst);

/// \returns \p Encoded combined with encoded \p SaSdst.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);

} // namespace DepCtr

namespace Exp {
Expand Down

0 comments on commit 8aedad0

Please sign in to comment.