From 37d5d3e40cd60401988c9c3f67fc9ed75d21714b Mon Sep 17 00:00:00 2001 From: Stephen Thomas Date: Tue, 17 Jan 2023 13:26:29 +0000 Subject: [PATCH] [AMDGPU] Support GFX12 VDSDIR instructions WAITVMSRC operand in GCNHazardRecognizer Modify GCNHazardRecognizer::fixLdsDirectVMEMHazard() so the waitvsrc operand in gfx12 DS_PARAM_LOAD or DS_DIRECT_LOAD instructions is set appropriately depending on whether a hazard is found or not, rather than inserting an S_WAITCNT_DEPCTR instruction if a hazard needs to be mitigated. --- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 17 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 + ...zards.mir => lds-direct-hazards-gfx11.mir} | 0 .../AMDGPU/lds-direct-hazards-gfx12.mir | 391 ++++++++++++++++++ 4 files changed, 405 insertions(+), 5 deletions(-) rename llvm/test/CodeGen/AMDGPU/{lds-direct-hazards.mir => lds-direct-hazards-gfx11.mir} (100%) create mode 100644 llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a7d8ff0242b801..bcd93e30d6c2d1 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { return false; return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); }; - auto IsExpiredFn = [](const MachineInstr &I, int) { + bool LdsdirCanWait = ST.hasLdsWaitVMSRC(); + auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) { return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0); + AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) || + (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) && + !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm()); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == std::numeric_limits::max()) return false; - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + if (LdsdirCanWait) { + TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0); + } else { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f6f37f5170a403..85d062a9a6f5e8 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1128,6 +1128,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasLdsDirect() const { return getGeneration() >= GFX11; } + bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } + bool hasVALUPartialForwardingHazard() const { return getGeneration() >= GFX11; } diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir similarity index 100% rename from llvm/test/CodeGen/AMDGPU/lds-direct-hazards.mir rename to llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir new file mode 100644 index 00000000000000..1ef6ce88e61106 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir @@ -0,0 +1,391 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: lds_param_load_no_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_no_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst0_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst0_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst0_war_salu +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst0_war_salu + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $m0 = S_MOV_B32 killed $sgpr0 + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $m0 = S_MOV_B32 killed $sgpr0 + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst1_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst1_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 1, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst10_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst10_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst10_waw +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst10_waw + ; GCN: $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 10, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_va_vdst20_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_va_vdst20_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_valu_war_trans +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_valu_war_trans + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_trans_war_valu +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_trans_war_valu + ; GCN: $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 0, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_valu_war_vmem +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_valu_war_vmem + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_valu_war_lds +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_valu_war_lds + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_valu_war_ldsdir +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_valu_war_ldsdir + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr10 = DS_PARAM_LOAD 0, 1, 15, 1, implicit $m0, implicit $exec + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 4, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_vmem_war +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_vmem_war + ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_vmem_war_valu +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_vmem_war_valu + ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_vmem_war_exp +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_vmem_war_exp + ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_vmem_war_waitcnt_depctr +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr + ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + S_WAITCNT_DEPCTR 65507 + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_param_load_vmem_war_waitcnt_depctr2 +body: | + bb.0: + ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2 + ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) + ; GCN-NEXT: S_WAITCNT_DEPCTR 65535 + ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) + S_WAITCNT_DEPCTR 65535 + $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_direct_load_no_war +body: | + bb.0: + ; GCN-LABEL: name: lds_direct_load_no_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +... + +--- +name: lds_direct_load_va_vdst0_war +body: | + bb.0: + ; GCN-LABEL: name: lds_direct_load_va_vdst0_war + ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr1 = DS_DIRECT_LOAD 0, 1, implicit $m0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr1 = DS_DIRECT_LOAD 15, 1, implicit $m0, implicit $exec + S_ENDPGM 0 +...