diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 70db7b4918515..23490ce1f511d 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -224,6 +224,21 @@ VmemType getVmemType(const MachineInstr &Inst) { return VMEM_NOSAMPLER; } +// Return an optional WaitEventType value if Inst is a cache invalidate +// or WB instruction. +std::optional getInvOrWBWaitEventType(const MachineInstr &Inst) { + switch (Inst.getOpcode()) { + // FIXME: GLOBAL_INV needs to be tracked with xcnt too. + case AMDGPU::GLOBAL_INV: + return VMEM_READ_ACCESS; // tracked using loadcnt + case AMDGPU::GLOBAL_WB: + case AMDGPU::GLOBAL_WBINV: + return VMEM_WRITE_ACCESS; // tracked using storecnt + default: + return {}; + } +} + unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) { switch (T) { case LOAD_CNT: @@ -528,19 +543,9 @@ class SIInsertWaitcnts { } // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM - // instruction. + // instruction that is not an invalidate or WB instruction, which are + // checked for using getInvOrWBWaitEventType(). WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const { - switch (Inst.getOpcode()) { - // FIXME: GLOBAL_INV needs to be tracked with xcnt too. - case AMDGPU::GLOBAL_INV: - return VMEM_READ_ACCESS; // tracked using loadcnt - case AMDGPU::GLOBAL_WB: - case AMDGPU::GLOBAL_WBINV: - return VMEM_WRITE_ACCESS; // tracked using storecnt - default: - break; - } - // Maps VMEM access types to their corresponding WaitEventType. static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = { VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}; @@ -2271,8 +2276,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, ScoreBrackets->updateByEvent(LDS_ACCESS, Inst); } } else if (TII->isFLAT(Inst)) { - if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) { - ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst); + if (std::optional ET = getInvOrWBWaitEventType(Inst)) { + ScoreBrackets->updateByEvent(*ET, Inst); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index c66985a19685b..c739b76c500f9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1091,11 +1091,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { Opcode == AMDGPU::DS_GWS_BARRIER; } - static bool isGFX12CacheInvOrWBInst(unsigned Opc) { - return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB || - Opc == AMDGPU::GLOBAL_WBINV; - } - static bool isF16PseudoScalarTrans(unsigned Opcode) { return Opcode == AMDGPU::V_S_EXP_F16_e64 || Opcode == AMDGPU::V_S_LOG_F16_e64 ||