-
Notifications
You must be signed in to change notification settings - Fork 15k
[AMDGPU] Add clamp support to v_add_{max|min}_{i|u}32 #164489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Add clamp support to v_add_{max|min}_{i|u}32 #164489
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 29.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164489.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 42ec8ba876a8f..7cce033b30141 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
- defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
- defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
+ defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
+ defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
}
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
diff --git a/llvm/test/CodeGen/AMDGPU/add-max.ll b/llvm/test/CodeGen/AMDGPU/add-max.ll
index 00c66563572ea..b3a70577b6979 100644
--- a/llvm/test/CodeGen/AMDGPU/add-max.ll
+++ b/llvm/test/CodeGen/AMDGPU/add-max.ll
@@ -5,7 +5,7 @@
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_svv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; SDAG-LABEL: add_max_u32_ssv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
+; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_ssv:
@@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN-LABEL: add_max_u32_vsi:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
+; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN-LABEL: add_max_u32_svl:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
+; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
; SDAG-LABEL: add_max_u32_slv:
; SDAG: ; %bb.0:
-; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
+; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_slv:
@@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_u32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: add_min_i32_vvv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
+; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 7ee0015f15997..711d57baac15f 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) {
; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2
+; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
@@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) {
; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4
; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5
-; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4
+; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5
+; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3]
@@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
@@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8
; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
@@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14
-; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9
+; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8
+; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8
; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5]
; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7]
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11
-; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10
+; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11
+; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3]
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index d3b44eb788444..8160544ddea4d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04]
v_add_min_u32 v2, s4, v7, v8
-// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_u32 v2, v4, 0, 1
-// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
v_add_min_u32 v2, v4, 3, s2
-// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
v_add_min_u32 v2, s4, 4, v2
-// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
v_add_min_u32 v2, v4, v7, 12345
-// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_u32 v2, s4, v7, v8
-// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_u32 v2, v4, 0, 1
-// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
v_add_max_u32 v2, v4, 3, s2
-// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
v_add_max_u32 v2, s4, 4, v2
-// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
v_add_max_u32 v2, v4, v7, 12345
-// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_max_u32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
v_cvt_pk_bf16_f32 v5, v1, v2
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 98d07ac1ece27..d913bd2db504b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, s4, v7, v8
-// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
v_add_min_i32 v2, v4, 0, 1
-// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
v_add_min_i32 v2, v4, 3, s2
-// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
v_add_min_i32 v2, s4, 4, v2
-// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
v_add_min_i32 v2, v4, v7, 12345
-// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+
+v_add_min_i32 v0, v1, v2, v3 clamp
+// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
v_add_max_i32 v2, s4, v7, v8
-// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
+// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
v_add_max_i32 v2, v4, 0, 1
-// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
+// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
v_add_max_i32 v2, v4, 3, s2
-// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
+// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
v_add_max_i32 v2, s4, 4, v2
-// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
+// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
v_add_max_i32 v2, v4, v7, 12345
-// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
+// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03...
[truncated]
|
|
|
||
| 0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00 | ||
| # GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] | ||
| # GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These changes are surprising though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is correct though, as this is VOP3 only.

No description provided.