diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index ca7dfa734e94d..afb1dce8743bf 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1396,13 +1396,13 @@ multiclass WMMAInst; - let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in { def _twoaddr # Suffix : VOP3P_Pseudo; } } if convertibleTo3Addr then { - let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { + let isConvergent = 1, Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { def _threeaddr # Suffix : VOP3P_Pseudo; } diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir index df3e780c61f46..955cf0dbe38d4 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-convergent.mir @@ -1,11 +1,16 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 + +# machine-sink must not sink WMMA* instructions. +# Ensure that WMMA instructions are marked as convergent to prevent +# machine-sink from sinking them. + # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink %s -o - | FileCheck %s --- -name: wmma_test +name: wmma_test_WMMA_F32_16X16X16_F16_w32_threeaddr tracksRegLiveness: true body: | - ; CHECK-LABEL: name: wmma_test + ; CHECK-LABEL: name: wmma_test_WMMA_F32_16X16X16_F16_w32_threeaddr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: {{ $}} @@ -40,3 +45,147 @@ body: | S_ENDPGM 0 ... + +--- +name: wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wmma_test_V_WMMA_F32_16X16X16_F16_twoaddr_w32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vsrc:vreg_256 = IMPLICIT_DEF + ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec + ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %vsrc:vreg_256 = IMPLICIT_DEF + %ssrc:sreg_32 = IMPLICIT_DEF + early-clobber %vdst:vreg_256 = V_WMMA_F32_16X16X16_F16_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, implicit $exec + %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + bb.1: + %vcopy:vgpr_32 = COPY %vdst.sub0 + bb.2: + SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 +... + +--- +name: wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wmma_test_V_WMMA_I32_16X16X16_IU8_twoaddr_w32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vsrc:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: %vsrc2:vreg_256 = IMPLICIT_DEF + ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec + ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %vsrc:vreg_128 = IMPLICIT_DEF + %vsrc2:vreg_256 = IMPLICIT_DEF + %ssrc:sreg_32 = IMPLICIT_DEF + early-clobber %vdst:vreg_256 = V_WMMA_I32_16X16X16_IU8_twoaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc2, 0, 0, 0, implicit $exec + %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + bb.1: + %vcopy:vgpr_32 = COPY %vdst.sub0 + bb.2: + SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 +... + +--- +name: wmma_test_V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wmma_test_V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vsrc:vreg_256 = IMPLICIT_DEF + ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %vsrc:vreg_256 = IMPLICIT_DEF + %ssrc:sreg_32 = IMPLICIT_DEF + early-clobber %vdst:vreg_256 = V_WMMA_BF16_16X16X16_BF16_threeaddr_w32 8, %vsrc, 8, %vsrc, 8, %vsrc, 0, 0, 0, 0, implicit $exec + %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + bb.1: + %vcopy:vgpr_32 = COPY %vdst.sub0 + bb.2: + SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 +... + +--- +name: wmma_test_V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wmma_test_V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vsrc256:vreg_256 = IMPLICIT_DEF + ; CHECK-NEXT: %vsrc512:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: %ssrc:sreg_32 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %vdst:vreg_256 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr %vsrc512, %vsrc512, 8, %vsrc256, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: %sdst:sreg_32 = SI_IF %ssrc, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vcopy:vgpr_32 = COPY %vdst.sub0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: SI_END_CF %sdst, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %vsrc256:vreg_256 = IMPLICIT_DEF + %vsrc512:vreg_512 = IMPLICIT_DEF + %ssrc:sreg_32 = IMPLICIT_DEF + early-clobber %vdst:vreg_256 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr %vsrc512, %vsrc512, 8, %vsrc256, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, implicit $exec + %sdst:sreg_32 = SI_IF %ssrc:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.1 + bb.1: + %vcopy:vgpr_32 = COPY %vdst.sub0 + bb.2: + SI_END_CF %sdst:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 +...