Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,10 @@ define internal void @use256vgprs() {
; GFX10WGP-WAVE64: NumVgprs: 256
; GFX10CU-WAVE32: NumVgprs: 256
; GFX10CU-WAVE64: NumVgprs: 256
; GFX11WGP-WAVE32: NumVgprs: 128
; GFX11WGP-WAVE64: NumVgprs: 128
; GFX11CU-WAVE32: NumVgprs: 128
; GFX11CU-WAVE64: NumVgprs: 128
; GFX11WGP-WAVE32: NumVgprs: 256
; GFX11WGP-WAVE64: NumVgprs: 256
; GFX11CU-WAVE32: NumVgprs: 256
; GFX11CU-WAVE64: NumVgprs: 256
define amdgpu_kernel void @f256() #256 {
call void @use256vgprs()
ret void
Expand All @@ -555,8 +555,8 @@ attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
; GFX10WGP-WAVE64: NumVgprs: 256
; GFX10CU-WAVE32: NumVgprs: 128
; GFX10CU-WAVE64: NumVgprs: 128
; GFX11WGP-WAVE32: NumVgprs: 128
; GFX11WGP-WAVE64: NumVgprs: 128
; GFX11WGP-WAVE32: NumVgprs: 256
; GFX11WGP-WAVE64: NumVgprs: 256
; GFX11CU-WAVE32: NumVgprs: 128
; GFX11CU-WAVE64: NumVgprs: 128
define amdgpu_kernel void @f512() #512 {
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/coalescer-early-clobber-subreg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ body: |
; CHECK-LABEL: name: foo1
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1
; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; CHECK-NEXT: S_ENDPGM 0
INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32
Expand All @@ -41,7 +41,7 @@ body: |
; CHECK-LABEL: name: foo2
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0
; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; CHECK-NEXT: S_ENDPGM 0
INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %0:vgpr_32
Expand All @@ -62,7 +62,7 @@ body: |
; CHECK-LABEL: name: foo3
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1
; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; CHECK-NEXT: S_ENDPGM 0
INLINEASM &"", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32
Expand All @@ -83,7 +83,7 @@ body: |
; CHECK-LABEL: name: foo4
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VGPR_32 */, def undef %2.sub0
; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VRegOrLds_32 */, def undef early-clobber %2.sub1, 1835018 /* regdef:VRegOrLds_32 */, def undef %2.sub0
; CHECK-NEXT: FLAT_STORE_DWORDX2 $vgpr0_vgpr1, %2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
; CHECK-NEXT: S_ENDPGM 0
INLINEASM &"", 0 /* attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0:vgpr_32, 1835018 /* regdef:VGPR_32 */, def %1:vgpr_32
Expand Down
84 changes: 84 additions & 0 deletions llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s

# GFX10-LABEL: name: test_fmamk_reg_imm_f16
# GFX10: %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = V_MOV_B32_e32 1078523331, implicit $exec
%3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
...

# GFX10-LABEL: name: test_fmamk_imm_reg_f16
# GFX10: %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = V_MOV_B32_e32 1078523331, implicit $exec
%3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec
...

# GFX10-LABEL: name: test_fmaak_f16
# GFX10: %1:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = V_MOV_B32_e32 1078523331, implicit $exec
%2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec
...

# GFX10-LABEL: name: test_fmaak_inline_literal_f16
# GFX10: %1:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec

---
name: test_fmaak_inline_literal_f16
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0', virtual-reg: '%0' }
body: |
bb.0:
liveins: $vgpr0
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
%2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec
S_ENDPGM 0
...

101 changes: 101 additions & 0 deletions llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX11 %s

---
name: test_fmamk_reg_imm_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: vgpr_32 }
body: |
bb.0:
; GFX11-LABEL: name: test_fmamk_reg_imm_f16
; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = COPY %0.sub0
%3 = V_MOV_B32_e32 1078523331, implicit $exec
%4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec
...

---
name: test_fmamk_imm_reg_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: vgpr_32 }
body: |
bb.0:
; GFX11-LABEL: name: test_fmamk_imm_reg_f16
; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = COPY %0.sub0
%3 = V_MOV_B32_e32 1078523331, implicit $exec
%4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec
...

---
name: test_fmaak_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- { id: 4, class: vgpr_32 }
body: |
bb.0:
; GFX11-LABEL: name: test_fmaak_f16
; GFX11: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
%0 = IMPLICIT_DEF
%1 = COPY %0.sub0
%2 = COPY %0.sub1
%3 = V_MOV_B32_e32 1078523331, implicit $exec
%4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
...

---
name: test_fmaak_inline_literal_f16
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0', virtual-reg: '%0' }
body: |
bb.0:
liveins: $vgpr0
; GFX11-LABEL: name: test_fmaak_inline_literal_f16
; GFX11: liveins: $vgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
%2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec
S_ENDPGM 0
...

24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
Expand All @@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5832713 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
Expand All @@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/AMDGPU/mode-register-fptrunc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=CHECK
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-mode-register %s -o - | FileCheck %s --check-prefixes=GFX11

---
name: ftrunc_upward

body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: ftrunc_upward
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr1 = V_CVT_F16_F32_e32 $vgpr0, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; GFX11-LABEL: name: ftrunc_upward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 129, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr1 = V_CVT_F16_F32_t16_e64 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr1 = FPTRUNC_UPWARD_PSEUDO $vgpr0, implicit $mode, implicit $exec
S_ENDPGM 0
...
---
name: ftrunc_downward

body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: ftrunc_downward
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; CHECK-NEXT: $vgpr0 = V_CVT_F16_F32_e32 $vgpr1, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
; GFX11-LABEL: name: ftrunc_downward
; GFX11: liveins: $sgpr0
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
; GFX11-NEXT: S_SETREG_IMM32_B32 1, 193, implicit-def $mode, implicit $mode
; GFX11-NEXT: $vgpr0 = V_CVT_F16_F32_t16_e64 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
; GFX11-NEXT: S_ENDPGM 0
$vgpr1 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
$vgpr0 = FPTRUNC_DOWNWARD_PSEUDO $vgpr1, implicit $mode, implicit $exec
S_ENDPGM 0
...
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908: bb.0 (%ir-block.0):
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def %26
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def %23
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]]
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
Expand All @@ -34,10 +34,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5832714 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2949130 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3211274 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
Expand All @@ -57,10 +57,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A: bb.0 (%ir-block.0):
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef %5:agpr_32
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef %5:agpr_32
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def %25
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def %23
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset.cast, addrspace 4)
Expand All @@ -79,10 +79,10 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:AGPR_32 */, undef renamable $agpr0
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1900553 /* reguse:AGPR_32 */, undef renamable $agpr0
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ define i32 @zext_fadd_f16(half %x, half %y) {
; GFX9: v_fma_f16 [[FMA:v[0-9]+]], v0, v1, v2
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]]

; GFX10Plus: v_fmac_f16_e32 [[FMA:v[0-9]+]], v0, v1
; GFX10Plus: v_fmac_f16{{_e64|_e32}} [[FMA:v[0-9]+]], v0, v1
; GFX10Plus-NEXT: v_and_b32_e32 v0, 0xffff, [[FMA]]
define i32 @zext_fma_f16(half %x, half %y, half %z) {
%fma = call half @llvm.fma.f16(half %x, half %y, half %z)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11

---
name: mad_cvv_f32
Expand All @@ -10,6 +11,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_cvv_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -25,6 +31,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vcv_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -40,6 +51,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vvc_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec
Expand All @@ -55,6 +71,11 @@ body: |
; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vsc_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$sgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec
Expand All @@ -70,6 +91,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_cvv_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -85,6 +111,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vcv_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -100,6 +131,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vvc_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec
Expand All @@ -115,6 +151,11 @@ body: |
; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vsc_f32
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$sgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec
Expand All @@ -130,6 +171,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_cvv_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -145,6 +191,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vcv_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -160,6 +211,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vvc_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
Expand All @@ -175,6 +231,11 @@ body: |
; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: mad_vsc_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$sgpr1 = IMPLICIT_DEF
$vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
Expand All @@ -190,6 +251,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_cvv_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_t16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -205,6 +271,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vcv_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_t16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
Expand All @@ -220,6 +291,11 @@ body: |
; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vvc_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_t16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$vgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
Expand All @@ -235,6 +311,11 @@ body: |
; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr2
; GFX11-LABEL: name: fma_vsc_f16
; GFX11: $vgpr0 = IMPLICIT_DEF
; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF
; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_t16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec
; GFX11-NEXT: SI_RETURN implicit $vgpr2
$vgpr0 = IMPLICIT_DEF
$sgpr1 = IMPLICIT_DEF
$vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def undef %22.sub0
; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1966090 /* regdef:VGPR_32 */, def undef %22.sub0
; GCN-NEXT: undef %24.sub0:av_64 = COPY %22.sub0
; GCN-NEXT: SI_SPILL_AV64_SAVE %24, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %16:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
; GCN-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
; GCN-NEXT: undef %23.sub0:vreg_64 = COPY [[SI_SPILL_AV64_RESTORE]].sub0
; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2949129 /* reguse:VReg_64 */, %23
; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3211273 /* reguse:VReg_64 */, %23
; GCN-NEXT: S_ENDPGM 0
%v0 = call i32 asm sideeffect "; def $0", "=v"()
%tmp = insertelement <2 x i32> undef, i32 %v0, i32 0
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/AMDGPU/true16-ra-f128-fail.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# RUN: not llc -march=amdgcn -mcpu=gfx1100 -debug-only=regalloc -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck --check-prefixes=CHECK %s
# REQUIRES: asserts

--- |
define amdgpu_ps void @e32() {
ret void
}
...


---
name: e32
tracksRegLiveness: true
machineFunctionInfo:
stackPtrOffsetReg: $sgpr32

body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127
; CHECK: error: ran out of registers during register allocation
; CHECK: [[REG1:vgpr[0-9]+]] = V_ADD_F16_t16_e32
; CHECK: SI_SPILL_V32_SAVE $[[REG1]]
%0:vgpr_32_lo128 = V_ADD_F16_t16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode
S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79
S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127
S_ENDPGM 0, implicit %0
...

55 changes: 55 additions & 0 deletions llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s

--- |
define amdgpu_ps void @e32() #0 {
ret void
}

define amdgpu_ps void @e64() #0 {
ret void
}

...


---
name: e32
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127
; GCN-LABEL: name: e32
; GCN: renamable $vgpr128 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode
%0:vgpr_32 = V_ADD_F16_e32 $vgpr0, $vgpr1, implicit $exec, implicit $mode
S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79
S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127
S_ENDPGM 0, implicit %0
...

---
name: e64
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127
; GCN-LABEL: name: e64
; GCN: renamable $vgpr128 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode
%0:vgpr_32 = V_ADD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec, implicit $mode
S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
S_NOP 0, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
S_NOP 0, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79
S_NOP 0, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
S_NOP 0, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
S_NOP 0, implicit $vgpr112, implicit $vgpr113, implicit $vgpr114, implicit $vgpr115, implicit $vgpr116, implicit $vgpr117, implicit $vgpr118, implicit $vgpr119, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127
S_ENDPGM 0, implicit %0
...
84 changes: 2 additions & 82 deletions llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1100 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GCN %s

# GCN-LABEL: name: test_fmamk_reg_imm_f32
# GCN: %2:vgpr_32 = IMPLICIT_DEF
Expand Down Expand Up @@ -62,65 +62,6 @@ body: |
...

# GCN-LABEL: name: test_fmamk_reg_imm_f16
# GCN: %2:vgpr_32 = IMPLICIT_DEF
# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = V_MOV_B32_e32 1078523331, implicit $exec
%3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
...

# GCN-LABEL: name: test_fmamk_imm_reg_f16
# GCN: %2:vgpr_32 = IMPLICIT_DEF
# GCN-NOT: V_MOV_B32
# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_imm_reg_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = COPY %0.sub1
%2 = V_MOV_B32_e32 1078523331, implicit $exec
%3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec
...

# GCN-LABEL: name: test_fmaak_f16
# GCN: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NOT: V_MOV_B32
# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
---
name: test_fmaak_f16
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = V_MOV_B32_e32 1078523331, implicit $exec
%2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec
...

# GCN-LABEL: name: test_fmaak_sgpr_src0_f32
# GCN: %1:vgpr_32 = IMPLICIT_DEF
Expand Down Expand Up @@ -207,27 +148,6 @@ body: |
...

# GCN-LABEL: name: test_fmaak_inline_literal_f16
# GCN: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NOT: V_MOV_B32
# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec

---
name: test_fmaak_inline_literal_f16
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0', virtual-reg: '%0' }
body: |
bb.0:
liveins: $vgpr0
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
%2:vgpr_32 = V_FMAC_F16_e32 16384, killed %0, %1, implicit $mode, implicit $exec
S_ENDPGM 0
...

# GCN-LABEL: name: test_fmamk_reg_imm_f32_2_folds
# GCN: %2:vgpr_32 = IMPLICIT_DEF
Expand Down
50 changes: 28 additions & 22 deletions llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_EQ_I16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F16_e32_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp1]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
; GCN-NEXT: V_CMP_NGE_F16_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F16_e64_dpp]], 10101, implicit-def $scc
; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc
; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
Expand All @@ -38,33 +40,37 @@ body: |
; unsafe to combine cmpx
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_EQ_I16_e32 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
V_CMPX_EQ_I16_t16_nosdst_e64 %5, %0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec
%6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMP_CLASS_F16_e32 %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
%7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, %0, implicit-def $vcc, implicit $mode, implicit $exec
%7:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%8:sgpr_32 = V_CMP_GE_F16_e64 1, %7, 0, %0, 1, implicit $mode, implicit $exec
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, implicit $mode, implicit $exec
; unsafe to combine cmpx
%9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %9, %0, implicit-def $exec, implicit $mode, implicit $exec
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec
; shrink
%13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%14:sgpr_32 = V_CMP_NGE_F16_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
%14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
; do not shrink True16 instructions
%15:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%16:sgpr_32 = V_CMP_NGE_F16_e64 0, %15, 0, %0, 0, implicit $mode, implicit $exec
%17:sgpr_32 = S_AND_B32 %16, 10101, implicit-def $scc
%16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, implicit $mode, implicit $exec
; do not shrink, sdst used
%17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
%18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec
%19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc
; commute
%18:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
%20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec
...
---
Expand All @@ -83,9 +89,9 @@ body: |
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1
%2:vgpr_32 = IMPLICIT_DEF
Expand All @@ -94,9 +100,9 @@ body: |
; Do not combine VOPC when row_mask or bank_mask is not 0xf
; All cases are covered by generic rules for creating DPP instructions
%4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
%99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
%5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
%6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
%6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
...
498 changes: 498 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s

Large diffs are not rendered by default.

1,473 changes: 1,473 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s

Large diffs are not rendered by default.

228 changes: 228 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error %s

v_add_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_fmaak_f16_e32 v255, v1, v2, 0xfe0b
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_fmamk_f16_e32 v255, v1, 0xfe0b, v3
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_e32 v255, v1, v2
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_fmaak_f16_e32 v5, v255, v2, 0xfe0b
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_fmamk_f16_e32 v5, v255, 0xfe0b, v3
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_e32 v5, v255, v2
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_fmaak_f16_e32 v5, v1, v255, 0xfe0b
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_fmamk_f16_e32 v5, v1, 0xfe0b, v255
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_e32 v5, v1, v255
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: error: operands are not valid for this GPU or mode

192 changes: 192 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s

v_add_f16 v255, v1, v2
// GFX11: v_add_f16_e64

v_fmac_f16 v255, v1, v2
// GFX11: v_fmac_f16_e64

v_ldexp_f16 v255, v1, v2
// GFX11: v_ldexp_f16_e64

v_max_f16 v255, v1, v2
// GFX11: v_max_f16_e64

v_min_f16 v255, v1, v2
// GFX11: v_min_f16_e64

v_mul_f16 v255, v1, v2
// GFX11: v_mul_f16_e64

v_sub_f16 v255, v1, v2
// GFX11: v_sub_f16_e64

v_subrev_f16 v255, v1, v2
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v255, v2
// GFX11: v_add_f16_e64

v_fmac_f16 v5, v255, v2
// GFX11: v_fmac_f16_e64

v_ldexp_f16 v5, v255, v2
// GFX11: v_ldexp_f16_e64

v_max_f16 v5, v255, v2
// GFX11: v_max_f16_e64

v_min_f16 v5, v255, v2
// GFX11: v_min_f16_e64

v_mul_f16 v5, v255, v2
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v255, v2
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v255, v2
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v1, v255
// GFX11: v_add_f16_e64

v_fmac_f16 v5, v1, v255
// GFX11: v_fmac_f16_e64

v_max_f16 v5, v1, v255
// GFX11: v_max_f16_e64

v_min_f16 v5, v1, v255
// GFX11: v_min_f16_e64

v_mul_f16 v5, v1, v255
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v1, v255
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v1, v255
// GFX11: v_subrev_f16_e64

v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_add_f16_e64

v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_ldexp_f16_e64

v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_add_f16_e64

v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_ldexp_f16_e64

v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0]
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_add_f16_e64

v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_subrev_f16_e64

v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_add_f16_e64

v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_ldexp_f16_e64

v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_add_f16_e64

v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_ldexp_f16_e64

v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_subrev_f16_e64

v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_add_f16_e64

v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_max_f16_e64

v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_min_f16_e64

v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_mul_f16_e64

v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_sub_f16_e64

v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_subrev_f16_e64

1,973 changes: 1,973 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s

Large diffs are not rendered by default.

1,973 changes: 1,973 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s

Large diffs are not rendered by default.

542 changes: 542 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s

Large diffs are not rendered by default.

542 changes: 542 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s

Large diffs are not rendered by default.