-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC] Remove incorrect _e32
suffix from v_dot2c_f32_f16
and v_dot4c_i32_i8
#77993
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesThe two VOP2 instructions cannot be encoded as VOP3. Fix #54691. Full diff: https://github.com/llvm/llvm-project/pull/77993.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 48d4e259bc1cec..8a06fb91489cc8 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -2520,16 +2520,22 @@ multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
VOP2_Real_dpp_gfx10<op>,
VOP2_Real_dpp8_gfx10<op>;
+multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> {
+ let IsSingle = 1 in
+ defm NAME : VOP2_Real_e32_gfx10<op>;
+ defm NAME : VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
+}
+
let SubtargetPredicate = HasDot5Insts in {
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
// NB: Opcode conflicts with V_DOT8C_I32_I4
// This opcode exists in gfx 10.1* only
- defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+ defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>;
}
let SubtargetPredicate = HasDot6Insts in {
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
- defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
+ defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>;
}
let SubtargetPredicate = HasDot4Insts in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
index 6a79ad85a9a287..d2608055eb491c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
@@ -13,7 +13,7 @@ define i32 @v_sdot4(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: v_sdot4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 false)
@@ -78,7 +78,7 @@ define i32 @v_sdot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) {
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v6
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
; GFX10-NEXT: v_or3_b32 v1, v3, v4, v5
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v8, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v8, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v8
; GFX10-NEXT: s_setpc_b64 s[30:31]
%a.cast = bitcast <4 x i8> %a to i32
@@ -99,7 +99,7 @@ define i32 @v_sdot4_fnegf32_a(float %a, i32 %b, i32 %c) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg float %a
@@ -120,7 +120,7 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 8573cd4d1fe136..b7baad61651f76 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -54,7 +54,7 @@ entry:
; GFX906: v_mac_f32_e32
; GFX906-DL-UNSAFE: v_dot2_f32_f16
-; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
+; GFX10-DL-UNSAFE: v_dot2c_f32_f16
; GFX906-CONTRACT: v_dot2_f32_f16
@@ -95,7 +95,7 @@ entry:
; GFX906: v_mac_f32_e32
; GFX906-DL-UNSAFE: v_dot2_f32_f16
-; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
+; GFX10-DL-UNSAFE: v_dot2c_f32_f16
; GFX906-CONTRACT: v_dot2_f32_f16
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll
index 56f72ac9d9e8c6..9da07ea04ded59 100644
--- a/llvm/test/CodeGen/AMDGPU/idot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot2.ll
@@ -2855,7 +2855,7 @@ define amdgpu_kernel void @notsdot2_sext8(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0001
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
ptr addrspace(1) %src2,
diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll
index 5c44ba008df04e..fdd913867c8f89 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll
@@ -127,7 +127,7 @@ define amdgpu_kernel void @idot4_acc32(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -336,7 +336,7 @@ define amdgpu_kernel void @idot4_acc16(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: global_load_dword v3, v0, s[6:7]
; GFX10-DL-NEXT: global_load_sshort v4, v1, s[2:3]
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v4, v2, v3
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v4, v2, v3
; GFX10-DL-NEXT: global_store_short v1, v4, s[2:3]
; GFX10-DL-NEXT: s_endpgm
;
@@ -710,7 +710,7 @@ define amdgpu_kernel void @idot4_multiuse_mul1(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, v3, s2
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -906,7 +906,7 @@ define amdgpu_kernel void @idot4_acc32_vecMul(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1335,7 +1335,7 @@ define amdgpu_kernel void @idot4_acc32_2ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1513,7 +1513,7 @@ define amdgpu_kernel void @idot4_acc32_3ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1698,7 +1698,7 @@ define amdgpu_kernel void @idot4_acc32_3ele_permuted(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020003
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1870,7 +1870,7 @@ define amdgpu_kernel void @idot4_acc32_opt(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: global_load_dword v2, v0, s[6:7]
; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2070,7 +2070,7 @@ define amdgpu_kernel void @idot4_acc32_3src(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v3, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v3, v0
; GFX10-DL-NEXT: global_store_dword v2, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2479,7 +2479,7 @@ define amdgpu_kernel void @idot4_bad_source(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v1, v1, 0xc0c0201
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, s2, s3
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2674,7 +2674,7 @@ define amdgpu_kernel void @idot4_commutative(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2874,7 +2874,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele_src0(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -3105,7 +3105,7 @@ define amdgpu_kernel void @idot4_4src(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
index 3ced3765b91436..fdf1b7db426527 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -26,7 +26,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX10: {{v_dot2c_f32_f16_e32|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: {{v_dot2c_f32_f16|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
index 08dbe29c5de4e5..7770fc02d50706 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -29,7 +29,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: v_dot4c_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GF11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} neg_lo:[1,1,0]{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
ptr addrspace(1) %r,
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
index 914b6a7db7ddeb..7397316bbf92bb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
@@ -1,10 +1,10 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck %s
-# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
+# CHECK: v_dot2c_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
0x01,0x05,0x0a,0x04
-# CHECK: v_dot2c_f32_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
+# CHECK: v_dot2c_f32_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
0x01,0x05,0xfe,0x05
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
@@ -85,10 +85,10 @@
# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00
-# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
+# CHECK: v_dot4c_i32_i8 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
0x01,0x05,0x0a,0x1a
-# CHECK: v_dot4c_i32_i8_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
+# CHECK: v_dot4c_i32_i8 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
0x01,0x05,0xfe,0x1b
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
index 972673542f4cea..4689a40e936e40 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
@@ -29,7 +29,7 @@
# GFX10: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
-# GFX10: v_dot2c_f32_f16_e32 v5, v1, v2
+# GFX10: v_dot2c_f32_f16 v5, v1, v2
0x01,0x05,0x0a,0x04
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
@@ -44,7 +44,7 @@
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05
-# GFX10: v_dot4c_i32_i8_e32 v5, v1, v2
+# GFX10: v_dot4c_i32_i8 v5, v1, v2
0x01,0x05,0x0a,0x1a
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
@llvm/pr-subscribers-mc Author: Shilei Tian (shiltian) ChangesThe two VOP2 instructions cannot be encoded as VOP3. Fix #54691. Full diff: https://github.com/llvm/llvm-project/pull/77993.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 48d4e259bc1cec..8a06fb91489cc8 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -2520,16 +2520,22 @@ multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
VOP2_Real_dpp_gfx10<op>,
VOP2_Real_dpp8_gfx10<op>;
+multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> {
+ let IsSingle = 1 in
+ defm NAME : VOP2_Real_e32_gfx10<op>;
+ defm NAME : VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
+}
+
let SubtargetPredicate = HasDot5Insts in {
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
// NB: Opcode conflicts with V_DOT8C_I32_I4
// This opcode exists in gfx 10.1* only
- defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+ defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>;
}
let SubtargetPredicate = HasDot6Insts in {
defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
- defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
+ defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>;
}
let SubtargetPredicate = HasDot4Insts in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
index 6a79ad85a9a287..d2608055eb491c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
@@ -13,7 +13,7 @@ define i32 @v_sdot4(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: v_sdot4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 false)
@@ -78,7 +78,7 @@ define i32 @v_sdot4_cast_v4i8(<4 x i8> %a, <4 x i8> %b, i32 %c) {
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v6
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
; GFX10-NEXT: v_or3_b32 v1, v3, v4, v5
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v8, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v8, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v8
; GFX10-NEXT: s_setpc_b64 s[30:31]
%a.cast = bitcast <4 x i8> %a to i32
@@ -99,7 +99,7 @@ define i32 @v_sdot4_fnegf32_a(float %a, i32 %b, i32 %c) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg float %a
@@ -120,7 +120,7 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_dot4c_i32_i8 v2, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 8573cd4d1fe136..b7baad61651f76 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -54,7 +54,7 @@ entry:
; GFX906: v_mac_f32_e32
; GFX906-DL-UNSAFE: v_dot2_f32_f16
-; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
+; GFX10-DL-UNSAFE: v_dot2c_f32_f16
; GFX906-CONTRACT: v_dot2_f32_f16
@@ -95,7 +95,7 @@ entry:
; GFX906: v_mac_f32_e32
; GFX906-DL-UNSAFE: v_dot2_f32_f16
-; GFX10-DL-UNSAFE: v_dot2c_f32_f16_e32
+; GFX10-DL-UNSAFE: v_dot2c_f32_f16
; GFX906-CONTRACT: v_dot2_f32_f16
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll
index 56f72ac9d9e8c6..9da07ea04ded59 100644
--- a/llvm/test/CodeGen/AMDGPU/idot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot2.ll
@@ -2855,7 +2855,7 @@ define amdgpu_kernel void @notsdot2_sext8(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0001
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
ptr addrspace(1) %src2,
diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll
index 5c44ba008df04e..fdd913867c8f89 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll
@@ -127,7 +127,7 @@ define amdgpu_kernel void @idot4_acc32(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -336,7 +336,7 @@ define amdgpu_kernel void @idot4_acc16(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: global_load_dword v3, v0, s[6:7]
; GFX10-DL-NEXT: global_load_sshort v4, v1, s[2:3]
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v4, v2, v3
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v4, v2, v3
; GFX10-DL-NEXT: global_store_short v1, v4, s[2:3]
; GFX10-DL-NEXT: s_endpgm
;
@@ -710,7 +710,7 @@ define amdgpu_kernel void @idot4_multiuse_mul1(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, v3, s2
; GFX10-DL-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -906,7 +906,7 @@ define amdgpu_kernel void @idot4_acc32_vecMul(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1335,7 +1335,7 @@ define amdgpu_kernel void @idot4_acc32_2ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc0c0100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1513,7 +1513,7 @@ define amdgpu_kernel void @idot4_acc32_3ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1698,7 +1698,7 @@ define amdgpu_kernel void @idot4_acc32_3ele_permuted(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020003
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -1870,7 +1870,7 @@ define amdgpu_kernel void @idot4_acc32_opt(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: global_load_dword v2, v0, s[6:7]
; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2070,7 +2070,7 @@ define amdgpu_kernel void @idot4_acc32_3src(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v3, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v3, v0
; GFX10-DL-NEXT: global_store_dword v2, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2276,7 +2276,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2479,7 +2479,7 @@ define amdgpu_kernel void @idot4_bad_source(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v1, v1, 0xc0c0201
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mad_i32_i24 v0, v0, s2, s3
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v0, v1, v2
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v0, v1, v2
; GFX10-DL-NEXT: global_store_dword v3, v0, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2674,7 +2674,7 @@ define amdgpu_kernel void @idot4_commutative(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_perm_b32 v1, v2, v2, 0xc020100
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
@@ -2874,7 +2874,7 @@ define amdgpu_kernel void @idot4_acc32_3src_3ele_src0(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v1, s0
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v1, v2, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v1, v2, v0
; GFX10-DL-NEXT: global_store_dword v3, v1, s[6:7]
; GFX10-DL-NEXT: s_endpgm
;
@@ -3105,7 +3105,7 @@ define amdgpu_kernel void @idot4_4src(ptr addrspace(1) %src1,
; GFX10-DL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX10-DL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-DL-NEXT: v_mov_b32_e32 v2, s2
-; GFX10-DL-NEXT: v_dot4c_i32_i8_e32 v2, v1, v0
+; GFX10-DL-NEXT: v_dot4c_i32_i8 v2, v1, v0
; GFX10-DL-NEXT: global_store_dword v3, v2, s[0:1]
; GFX10-DL-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
index 3ced3765b91436..fdf1b7db426527 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -26,7 +26,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_no_clamp
; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX10: {{v_dot2c_f32_f16_e32|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: {{v_dot2c_f32_f16|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
index 08dbe29c5de4e5..7770fc02d50706 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -29,7 +29,7 @@ entry:
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_no_clamp
; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: v_dot4c_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GF11: v_dot4_i32_iu8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} neg_lo:[1,1,0]{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp(
ptr addrspace(1) %r,
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
index 914b6a7db7ddeb..7397316bbf92bb 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1011-xdl-insts.txt
@@ -1,10 +1,10 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1011 -disassemble -show-encoding < %s | FileCheck %s
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1012 -disassemble -show-encoding < %s | FileCheck %s
-# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
+# CHECK: v_dot2c_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04]
0x01,0x05,0x0a,0x04
-# CHECK: v_dot2c_f32_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
+# CHECK: v_dot2c_f32_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x05]
0x01,0x05,0xfe,0x05
# CHECK: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0x00]
@@ -85,10 +85,10 @@
# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00]
0xfa,0x04,0x0a,0x04,0x01,0xe4,0x80,0x00
-# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
+# CHECK: v_dot4c_i32_i8 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1a]
0x01,0x05,0x0a,0x1a
-# CHECK: v_dot4c_i32_i8_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
+# CHECK: v_dot4c_i32_i8 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x1b]
0x01,0x05,0xfe,0x1b
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x1a,0x01,0xe4,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
index 972673542f4cea..4689a40e936e40 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1011_dlops.txt
@@ -29,7 +29,7 @@
# GFX10: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c]
0x00,0x40,0x19,0xcc,0x01,0x05,0x0e,0x1c
-# GFX10: v_dot2c_f32_f16_e32 v5, v1, v2
+# GFX10: v_dot2c_f32_f16 v5, v1, v2
0x01,0x05,0x0a,0x04
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
@@ -44,7 +44,7 @@
# GFX10: v_dot2c_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05
-# GFX10: v_dot4c_i32_i8_e32 v5, v1, v2
+# GFX10: v_dot4c_i32_i8 v5, v1, v2
0x01,0x05,0x0a,0x1a
# GFX10: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
defm NAME : VOP2_Real_e32_gfx10<op>; | ||
defm NAME : VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; | ||
} | ||
|
||
let SubtargetPredicate = HasDot5Insts in { | ||
defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll probably need to do the same thing for GFX9 in this patch once the GFX10 fix looks good and is finalized.
Do you want to strip _e32 on all subtargets? If so, you can set IsSingle = 1 in the VOPProfile for these instructions instead. If it varies per target then this looks ok. |
…d `v_dot4c_i32_i8` The two VOP2 instructions cannot be encoded as VOP3. Fix llvm#54691.
@@ -2520,16 +2520,22 @@ multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : | |||
VOP2_Real_dpp_gfx10<op>, | |||
VOP2_Real_dpp8_gfx10<op>; | |||
|
|||
multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>, | |||
VOP2_Real_dpp8_gfx10<op> { | |||
let IsSingle = 1 in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what IsSingle means
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://reviews.llvm.org/D99408 first introduced it. (It is also where I got to know what _e32
and _e64
suffices are. :-)
…d `v_dot4c_i32_i8` (llvm#77993) The two VOP2 instructions cannot be encoded as VOP3. Fix llvm#54691.
The two VOP2 instructions cannot be encoded as VOP3.
Fix #54691.