-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalISel] Handle more commutable instructions in commute_constant_to_rhs
#87424
[GlobalISel] Handle more commutable instructions in commute_constant_to_rhs
#87424
Conversation
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like most of these are untested, should add some tests
Created using spr 1.3.4 [skip ci]
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-amdgpu Author: None (darkbuck) ChangesFull diff: https://github.com/llvm/llvm-project/pull/87424.diff 8 Files Affected:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 778ff7e437eb50..8568a7ae90e56c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -443,14 +443,20 @@ def select_constant_cmp: GICombineRule<
// TODO: handle compares (currently not marked as isCommutable)
def commute_int_constant_to_rhs : GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR):$root,
+ (match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR,
+ G_SMIN, G_SMAX, G_UMIN, G_UMAX, G_UADDO, G_SADDO,
+ G_UMULO, G_SMULO, G_UMULH, G_SMULH,
+ G_UADDSAT, G_SADDSAT, G_SMULFIX, G_UMULFIX,
+ G_SMULFIXSAT, G_UMULFIXSAT):$root,
[{ return Helper.matchCommuteConstantToRHS(*${root}); }]),
(apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
>;
def commute_fp_constant_to_rhs : GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_FADD, G_FMUL):$root,
+ (match (wip_match_opcode G_FADD, G_FMUL, G_FMINNUM, G_FMAXNUM,
+ G_FMINNUM_IEEE, G_FMAXNUM_IEEE,
+ G_FMINIMUM, G_FMAXIMUM):$root,
[{ return Helper.matchCommuteFPConstantToRHS(*${root}); }]),
(apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
>;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
index 8c4300d9e7329f..03e507f5eaa7fb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir
@@ -11,7 +11,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[COPY]], [[C]]
; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16)
; CHECK-NEXT: RET_ReallyLR implicit $h0
%0:_(s16) = COPY $h0
@@ -33,7 +33,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[COPY]], [[C]]
; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%0:_(s32) = COPY $s0
@@ -55,7 +55,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[COPY]], [[C]]
; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s64) = COPY $d0
@@ -77,7 +77,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
- ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]]
+ ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[COPY]], [[C]]
; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s64) = COPY $d0
@@ -100,7 +100,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
@@ -125,7 +125,7 @@ body: |
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BITCAST]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%1:_(<2 x s64>) = COPY $q0
@@ -150,7 +150,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]]
+ ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
@@ -174,7 +174,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]]
+ ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[COPY]], [[BUILD_VECTOR]]
; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
index 7badf4732fd0d4..ae0a9b1c7c4f1f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll
@@ -4,7 +4,7 @@ define half @test_s16(half %a) #0 {
; CHECK-LABEL: test_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmax h0, h1, h0
+; CHECK-NEXT: fmax h0, h0, h1
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt half %a, 0.0
@@ -16,7 +16,7 @@ define float @test_s32(float %a) #0 {
; CHECK-LABEL: test_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmax s0, s1, s0
+; CHECK-NEXT: fmax s0, s0, s1
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt float %a, 0.0
@@ -28,7 +28,7 @@ define double @test_s64(double %a) #0 {
; CHECK-LABEL: test_s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmax d0, d1, d0
+; CHECK-NEXT: fmax d0, d0, d1
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt double %a, 0.0
@@ -40,7 +40,7 @@ define <4 x half> @test_v4s16(<4 x half> %a) #0 {
; CHECK-LABEL: test_v4s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fmax v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt <4 x half> %a, zeroinitializer
@@ -52,7 +52,7 @@ define <8 x half> @test_v8s16(<8 x half> %a) #0 {
; CHECK-LABEL: test_v8s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fmax v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt <8 x half> %a, zeroinitializer
@@ -64,7 +64,7 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 {
; CHECK-LABEL: test_v2s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fmax v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt <2 x float> %a, zeroinitializer
@@ -76,7 +76,7 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 {
; CHECK-LABEL: test_v4s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fmax v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt <4 x float> %a, zeroinitializer
@@ -88,7 +88,7 @@ define <2 x double> @test_v2s64(<2 x double> %a) #0 {
; CHECK-LABEL: test_v2s64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fmax v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
entry:
%fcmp = fcmp olt <2 x double> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
index ee0e83c5e07632..020761352148f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
@@ -254,8 +254,8 @@ body: |
; CHECK-NEXT: %one_s32:_(s32) = G_ANYEXT %one(s16)
; CHECK-NEXT: %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32)
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat
- ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef, [[FMUL]]
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef, [[FMAXNUM_IEEE]]
+ ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FMUL]], %zero_undef
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %one_undef
; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%two:_(s16) = G_FCONSTANT half 0xH4000
@@ -306,7 +306,7 @@ body: |
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat
; CHECK-NEXT: %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef
; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FMUL]]
- ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef, [[FMAXNUM_IEEE]]
+ ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %qnan_undef
; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%two:_(s16) = G_FCONSTANT half 0xH4000
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
index d6321dae3aa7e5..67e6de1ce76449 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
@@ -318,7 +318,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val:_(s32) = COPY $vgpr4
; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255
- ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %k255, %val
+ ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255
; CHECK-NEXT: $vgpr0 = COPY %umin0(s32)
%ptr0:_(p1) = COPY $vgpr0_vgpr1
%ptr1:_(p1) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
index dc13dee4f148ac..1d94d76da148f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
@@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 17
; GFX8-NEXT: v_min_i16_e32 v1, 17, v0
-; GFX8-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_min_i16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v2, -12
; GFX8-NEXT: v_max_i16_e32 v1, -12, v1
-; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
index 7e38762e7b559c..a8233054db9bc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
@@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, 17
; GFX8-NEXT: v_min_u16_e32 v1, 17, v0
-; GFX8-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_min_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v2, 12
; GFX8-NEXT: v_max_u16_e32 v1, 12, v1
-; GFX8-NEXT: v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 07480a0ce0c2e7..cc0f7e2ca5a54c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -983,7 +983,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
; CHECK-NEXT: v_mul_lo_u32 v6, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v7, v3, v5
-; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
+; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5
; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3
; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; CHECK-NEXT: v_mul_lo_u32 v8, v4, v7
@@ -1010,7 +1010,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc
; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5
-; CHECK-NEXT: v_mul_hi_u32 v7, v5, v3
+; CHECK-NEXT: v_mul_hi_u32 v7, v3, v5
; CHECK-NEXT: v_mul_lo_u32 v5, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6
; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6
@@ -1058,7 +1058,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) {
; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2
-; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
+; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
@@ -1265,10 +1265,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4
-; GISEL-NEXT: v_mul_hi_u32 v9, v4, v9
+; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4
-; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
+; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4
@@ -1339,7 +1339,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
; CGP-NEXT: v_mul_lo_u32 v8, v6, v7
; CGP-NEXT: v_mul_lo_u32 v9, v5, v7
-; CGP-NEXT: v_mul_hi_u32 v10, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v10, v5, v7
; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v5
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
; CGP-NEXT: v_mul_lo_u32 v10, v6, v9
@@ -1366,7 +1366,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc
; CGP-NEXT: v_mul_lo_u32 v8, v5, v7
-; CGP-NEXT: v_mul_hi_u32 v9, v7, v5
+; CGP-NEXT: v_mul_hi_u32 v9, v5, v7
; CGP-NEXT: v_mul_lo_u32 v7, v6, v7
; CGP-NEXT: v_mul_lo_u32 v10, v6, v8
; CGP-NEXT: v_mul_hi_u32 v11, v5, v8
@@ -1433,10 +1433,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8
; CGP-NEXT: v_mul_lo_u32 v9, v7, v4
-; CGP-NEXT: v_mul_hi_u32 v7, v4, v7
+; CGP-NEXT: v_mul_hi_u32 v7, v7, v4
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
; CGP-NEXT: v_mul_lo_u32 v11, v5, v4
-; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
+; CGP-NEXT: v_mul_hi_u32 v5, v5, v4
; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8
; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
; CGP-NEXT: v_mul_lo_u32 v8, v8, v4
|
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a MIR test for these combine. You can just have one function per opcode and then copy-paste it. Ideally each case should check both the easy case (just a constant on the lhs), but also the G_CONSTANT_FOLD_BARRIER
case in matchCommuteConstantToRHS
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Created using spr 1.3.4
@Pierre-vh could you review newly added tests? |
…ith different operand patterns Created using spr 1.3.4
No description provided.