diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 70db7b4918515..6b8b2c6ef3fde 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -111,8 +111,7 @@ struct HardwareLimits { }; #define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \ - DECL(VMEM_ACCESS) /* vmem read & write */ \ - DECL(VMEM_READ_ACCESS) /* vmem read */ \ + DECL(VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \ DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \ DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \ DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \ @@ -362,8 +361,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator { assert(ST); static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = { - eventMask({VMEM_ACCESS, VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, - VMEM_BVH_READ_ACCESS}), + eventMask( + {VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}), eventMask({SMEM_ACCESS, LDS_ACCESS, GDS_ACCESS, SQ_MESSAGE}), eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS, EXP_POS_ACCESS, EXP_LDS_ACCESS}), @@ -399,7 +398,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator { assert(ST); static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = { - eventMask({VMEM_ACCESS, VMEM_READ_ACCESS}), + eventMask({VMEM_ACCESS}), eventMask({LDS_ACCESS, GDS_ACCESS}), eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS, EXP_POS_ACCESS, EXP_LDS_ACCESS}), @@ -533,7 +532,7 @@ class SIInsertWaitcnts { switch (Inst.getOpcode()) { // FIXME: GLOBAL_INV needs to be tracked with xcnt too. case AMDGPU::GLOBAL_INV: - return VMEM_READ_ACCESS; // tracked using loadcnt + return VMEM_ACCESS; // tracked using loadcnt case AMDGPU::GLOBAL_WB: case AMDGPU::GLOBAL_WBINV: return VMEM_WRITE_ACCESS; // tracked using storecnt @@ -543,7 +542,7 @@ class SIInsertWaitcnts { // Maps VMEM access types to their corresponding WaitEventType. static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = { - VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}; + VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}; assert(SIInstrInfo::isVMEM(Inst)); // LDS DMA loads are also stores, but on the LDS side. On the VMEM side @@ -559,7 +558,7 @@ class SIInsertWaitcnts { return VMEM_WRITE_ACCESS; } if (!ST->hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst)) - return VMEM_READ_ACCESS; + return VMEM_ACCESS; return VmemReadMapping[getVmemType(Inst)]; }