Skip to content

Commit

Permalink
[AMDGPU] Handle LDS DMA and LDS_DIRECT hazards
Browse files Browse the repository at this point in the history
There shall be 1 wait state between M0 write and LDS DMA/LDS_DIRECT use.

Differential Revision: https://reviews.llvm.org/D124550
  • Loading branch information
rampitec committed May 4, 2022
1 parent bc78c09 commit 63f21f4
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 14 deletions.
33 changes: 19 additions & 14 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ static bool isPermlane(const MachineInstr &MI) {
Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
}

static bool isLdsDma(const MachineInstr &MI) {
return SIInstrInfo::isVALU(MI) &&
(SIInstrInfo::isMUBUF(MI) || SIInstrInfo::isFLAT(MI));
}

static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
Expand Down Expand Up @@ -226,12 +231,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return HazardType;

if (ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
return HazardType;

if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
if (((ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
(ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
(ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
(ST.hasReadM0LdsDirectHazard() &&
MI->readsRegister(AMDGPU::LDS_DIRECT))) &&
checkReadM0Hazards(MI) > 0)
return HazardType;

Expand Down Expand Up @@ -351,11 +356,11 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (isRFE(MI->getOpcode()))
return std::max(WaitStates, checkRFEHazards(MI));

if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
isSMovRel(MI->getOpcode())))
return std::max(WaitStates, checkReadM0Hazards(MI));

if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
if ((ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))) ||
(ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) ||
(ST.hasReadM0LdsDmaHazard() && isLdsDma(*MI)) ||
(ST.hasReadM0LdsDirectHazard() && MI->readsRegister(AMDGPU::LDS_DIRECT)))
return std::max(WaitStates, checkReadM0Hazards(MI));

if (SIInstrInfo::isMAI(*MI))
Expand Down Expand Up @@ -1014,10 +1019,10 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {

int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
const int SMovRelWaitStates = 1;
const int ReadM0WaitStates = 1;
auto IsHazardFn = [TII](const MachineInstr &MI) { return TII->isSALU(MI); };
return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
SMovRelWaitStates);
return ReadM0WaitStates -
getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
}

void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
getGeneration() <= AMDGPUSubtarget::GFX9;
}

bool hasReadM0LdsDmaHazard() const {
return getGeneration() == AMDGPUSubtarget::GFX9;
}

bool hasReadM0LdsDirectHazard() const {
return getGeneration() == AMDGPUSubtarget::GFX9;
}

bool hasVcmpxPermlaneHazard() const {
return HasVcmpxPermlaneHazard;
}
Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AMDGPU/hazard.mir
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,27 @@ body: |
S_SENDMSG 3, implicit $exec, implicit $m0
S_ENDPGM 0
...

# GCN-LABEL: name: buffer_store_lds_dword
# GCN: $m0 = S_MOV_B32 0
# GFX9-NEXT: S_NOP 0
# GCN-NEXT: BUFFER_STORE_LDS_DWORD
---
name: buffer_store_lds_dword
body: |
bb.0:
$m0 = S_MOV_B32 0
BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
...

# GCN-LABEL: name: lds_direct_read_m0
# GCN: $m0 = S_MOV_B32 0
# GFX9-NEXT: S_NOP 0
# GCN-NEXT: V_MOV_B32
---
name: lds_direct_read_m0
body: |
bb.0:
$m0 = S_MOV_B32 0
$vgpr0 = V_MOV_B32_e32 $lds_direct, implicit $exec, implicit $m0
...
49 changes: 49 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lds-dma-hazards.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s

# GCN-LABEL: name: buffer_load_dword_lds
# GCN: $m0 = S_MOV_B32 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: BUFFER_LOAD_DWORD_LDS_ADDR64
---
name: buffer_load_dword_lds
body: |
bb.0:
$m0 = S_MOV_B32 0
BUFFER_LOAD_DWORD_LDS_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec, implicit $m0
...

# GCN-LABEL: name: buffer_store_lds_dword
# GCN: $m0 = S_MOV_B32 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: BUFFER_STORE_LDS_DWORD
---
name: buffer_store_lds_dword
body: |
bb.0:
$m0 = S_MOV_B32 0
BUFFER_STORE_LDS_DWORD $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $m0
...

# GCN-LABEL: name: global_load_lds_dword
# GCN: $m0 = S_MOV_B32 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: GLOBAL_LOAD_LDS_DWORD
---
name: global_load_lds_dword
body: |
bb.0:
$m0 = S_MOV_B32 0
GLOBAL_LOAD_LDS_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $m0
...

# GCN-LABEL: name: scratch_load_lds_dword
# GCN: $m0 = S_MOV_B32 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: SCRATCH_LOAD_LDS_DWORD
---
name: scratch_load_lds_dword
body: |
bb.0:
$m0 = S_MOV_B32 0
SCRATCH_LOAD_LDS_DWORD $vgpr2, 0, 0, implicit $exec, implicit $m0
...

0 comments on commit 63f21f4

Please sign in to comment.