diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp index 27036eb02153e..7619a39bac9c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp @@ -51,7 +51,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass { MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64) return true; if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - (MI.getOperand(0).getImm() & 0xf000) == 0) + AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0) return true; return false; } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index dce3ac90c3b26..59dd1cd6f93c4 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1170,7 +1170,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { (MI.getOpcode() == AMDGPU::S_WAITCNT && !MI.getOperand(0).getImm()) || (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - MI.getOperand(0).getImm() == 0xffe3); + AMDGPU::DepCtr::decodeFieldVmVsrc(MI.getOperand(0).getImm()) == 0); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == @@ -1180,7 +1180,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { const SIInstrInfo *TII = ST.getInstrInfo(); BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0xffe3); + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); return true; } @@ -1293,7 +1293,8 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { return true; } if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe) + AMDGPU::DepCtr::encodeFieldSaSdst(MI.getOperand(0).getImm(), 0) == + 0xfffe) return true; return false; }; @@ -1304,7 +1305,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0xfffe); + .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0)); return true; } @@ -1452,7 +1453,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - I.getOperand(0).getImm() == 0xffe3); + AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == @@ -1461,7 +1462,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0xffe3); + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); return true; } @@ -1523,7 +1524,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) || SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - I.getOperand(0).getImm() == 0x0fff)) + AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0)) return HazardExpired; // Track registers writes @@ -1685,10 +1686,10 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { return false; // Hazard is observed - insert a wait on va_dst counter to ensure hazard is - // avoided (mask 0x0fff achieves this). + // avoided. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0x0fff); + .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0)); return true; } @@ -2779,7 +2780,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) { // s_waitcnt_depctr sa_sdst(0) mitigates hazard. if (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - !(I.getOperand(0).getImm() & 0x1)) + AMDGPU::DepCtr::decodeFieldSaSdst(I.getOperand(0).getImm()) == 0) return true; // VALU access to any SGPR or literal constant other than HazardReg @@ -2829,7 +2830,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { // Add s_waitcnt_depctr sa_sdst(0) after SALU write. BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0xfffe); + .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0)); // SALU write may be s_getpc in a bundle. if (MI->getOpcode() == AMDGPU::S_GETPC_B64) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index d6156a3d1881e..7c6d6a125373c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -95,6 +95,24 @@ unsigned getVmcntBitWidthHi(unsigned VersionMajor) { return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; } +/// \returns VmVsrc bit width +inline unsigned getVmVsrcBitWidth() { return 3; } + +/// \returns VmVsrc bit shift +inline unsigned getVmVsrcBitShift() { return 2; } + +/// \returns VaVdst bit width +inline unsigned getVaVdstBitWidth() { return 4; } + +/// \returns VaVdst bit shift +inline unsigned getVaVdstBitShift() { return 12; } + +/// \returns SaSdst bit width +inline unsigned getSaSdstBitWidth() { return 1; } + +/// \returns SaSdst bit shift +inline unsigned getSaSdstBitShift() { return 0; } + } // end namespace anonymous namespace llvm { @@ -1501,6 +1519,42 @@ int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, STI); } +unsigned decodeFieldVmVsrc(unsigned Encoded) { + return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); +} + +unsigned decodeFieldVaVdst(unsigned Encoded) { + return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); +} + +unsigned decodeFieldSaSdst(unsigned Encoded) { + return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); +} + +unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { + return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); +} + +unsigned encodeFieldVmVsrc(unsigned VmVsrc) { + return encodeFieldVmVsrc(0xffff, VmVsrc); +} + +unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { + return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth()); +} + +unsigned encodeFieldVaVdst(unsigned VaVdst) { + return encodeFieldVaVdst(0xffff, VaVdst); +} + +unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { + return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth()); +} + +unsigned encodeFieldSaSdst(unsigned SaSdst) { + return encodeFieldSaSdst(0xffff, SaSdst); +} + } // namespace DepCtr //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 4f14547b8709a..dd8c3f73ac25b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -977,6 +977,33 @@ bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI); +/// \returns Decoded VaVdst from given immediate \p Encoded. +unsigned decodeFieldVaVdst(unsigned Encoded); + +/// \returns Decoded VmVsrc from given immediate \p Encoded. +unsigned decodeFieldVmVsrc(unsigned Encoded); + +/// \returns Decoded SaSdst from given immediate \p Encoded. +unsigned decodeFieldSaSdst(unsigned Encoded); + +/// \returns \p VmVsrc as an encoded Depctr immediate. +unsigned encodeFieldVmVsrc(unsigned VmVsrc); + +/// \returns \p Encoded combined with encoded \p VmVsrc. +unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc); + +/// \returns \p VaVdst as an encoded Depctr immediate. +unsigned encodeFieldVaVdst(unsigned VaVdst); + +/// \returns \p Encoded combined with encoded \p VaVdst. +unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst); + +/// \returns \p SaSdst as an encoded Depctr immediate. +unsigned encodeFieldSaSdst(unsigned SaSdst); + +/// \returns \p Encoded combined with encoded \p SaSdst. +unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst); + } // namespace DepCtr namespace Exp {