[InstCombine] Avoid breaking SPF by `foldICmpUsingKnownBits` #82472

dtcxzyw · 2024-02-21T08:27:14Z

This patch moves foldICmpUsingKnownBits after the SPF test to avoid breaking SPFs.

I found this when investigating a pattern found in abseil-cpp/arg.cc: https://alive2.llvm.org/ce/z/MsP-Vb. But this patch doesn't fix it :(

PR Link: llvm/llvm-project#82472

llvmbot · 2024-02-22T20:41:41Z

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch moves foldICmpUsingKnownBits after the SPF test to avoid breaking SPFs.

I found this when investigating a pattern found in abseil-cpp/arg.cc: https://alive2.llvm.org/ce/z/MsP-Vb. But this patch doesn't fix it :(

Full diff: https://github.com/llvm/llvm-project/pull/82472.diff

4 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+3-3)
(modified) llvm/test/Transforms/InstCombine/minmax-fold.ll (+6-8)
(modified) llvm/test/Transforms/InstCombine/sadd_sat.ll (+19-19)
(modified) llvm/test/Transforms/InstCombine/select_meta.ll (+8-9)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 49e597171b1c6f..db24e891d307ee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -7034,9 +7034,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpUsingBoolRange(I))
     return Res;
 
-  if (Instruction *Res = foldICmpUsingKnownBits(I))
-    return Res;
-
   if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))
     return Res;
 
@@ -7057,6 +7054,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
         return nullptr;
     }
 
+  if (Instruction *Res = foldICmpUsingKnownBits(I))
+    return Res;
+
   // Do this after checking for min/max to prevent infinite looping.
   if (Instruction *Res = foldICmpWithZero(I))
     return Res;
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 1f7837c109b3f1..57437c325fcd51 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -316,8 +316,7 @@ define i32 @test73(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -499,10 +498,9 @@ define i32 @clamp_check_for_no_infinite_loop3(i32 %i) {
 ; CHECK-LABEL: @clamp_check_for_no_infinite_loop3(
 ; CHECK-NEXT:    br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]]
 ; CHECK:       truelabel:
-; CHECK-NEXT:    [[I3:%.*]] = call i32 @llvm.smax.i32(i32 [[I:%.*]], i32 1)
-; CHECK-NEXT:    [[I6:%.*]] = call i32 @llvm.umin.i32(i32 [[I3]], i32 2)
-; CHECK-NEXT:    [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2
-; CHECK-NEXT:    ret i32 [[I7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 2
+; CHECK-NEXT:    [[I6:%.*]] = select i1 [[TMP1]], i32 4, i32 8
+; CHECK-NEXT:    ret i32 [[I6]]
 ; CHECK:       falselabel:
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -1391,8 +1389,8 @@ entry:
 define i32 @twoway_clamp_gt(i32 %num) {
 ; CHECK-LABEL: @twoway_clamp_gt(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S1:%.*]] = call i32 @llvm.smax.i32(i32 [[NUM:%.*]], i32 13767)
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[S1]], i32 13768)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[NUM:%.*]], 13768
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP0]], i32 13767, i32 13768
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index 5ccb6f92b6c722..01394eddb82fce 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -624,7 +624,7 @@ define i32 @sadd_sat32_zext(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 2147483647)
+; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
 ; CHECK-NEXT:    [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV7]]
 ;
@@ -679,10 +679,10 @@ entry:
 define i32 @ashrA(i64 %a, i32 %b) {
 ; CHECK-LABEL: @ashrA(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[B:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr i64 [[A:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[B:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %conv = ashr i64 %a, 32
@@ -697,10 +697,10 @@ entry:
 define i32 @ashrB(i32 %a, i64 %b) {
 ; CHECK-LABEL: @ashrB(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[B:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[A:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[CONV1:%.*]] = lshr i64 [[B:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[A:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %conv = sext i32 %a to i64
@@ -717,12 +717,12 @@ entry:
 define i32 @ashrAB(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ashrAB(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[B:%.*]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP2]], i32 [[TMP3]])
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr i64 [[A:%.*]], 32
+; CHECK-NEXT:    [[CONV1:%.*]] = lshr i64 [[B:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[CONV]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
   %conv = ashr i64 %a, 32
@@ -805,10 +805,10 @@ entry:
 define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @ashrv2i8_s(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i16> [[TMP0]] to <2 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]])
-; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc <2 x i16> [[CONV]] to <2 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP0]], <2 x i8> [[B:%.*]])
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
 ;
 entry:
   %conv = ashr <2 x i16> %a, <i16 8, i16 8>
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
index df1e5a82ad5d15..cd133101736cfc 100644
--- a/llvm/test/Transforms/InstCombine/select_meta.ll
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -171,8 +171,7 @@ define i32 @test72(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -317,7 +316,7 @@ define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv)
 
 define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 ; CHECK-LABEL: @select_add(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable [[META2:![0-9]+]]
 ; CHECK-NEXT:    [[RET:%.*]] = add i64 [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
@@ -328,7 +327,7 @@ define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 
 define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_or(
-; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -339,7 +338,7 @@ define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 
 define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 ; CHECK-LABEL: @select_sub(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = sub i17 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i17 [[RET]]
 ;
@@ -350,7 +349,7 @@ define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 
 define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 ; CHECK-LABEL: @select_ashr(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = ashr i128 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i128 [[RET]]
 ;
@@ -361,7 +360,7 @@ define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 
 define double @select_fmul(i1 %cond, double %x, double %y) {
 ; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fmul double [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret double [[RET]]
 ;
@@ -372,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) {
 
 define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
@@ -391,5 +390,5 @@ define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 2, i32 10}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 10, i32 2}
-; CHECK: [[META2:![0-9]+]] = !{}
+; CHECK: [[META2]] = !{}
 ;.

nikic

Based on the test diffs, we're clearly missing a umax(smax) -> smax fold. But that should be addressed by adding the fold. Please add some test coverage for the benefit that this is supposed to produce.

dtcxzyw · 2024-02-23T18:57:43Z

Based on the test diffs, we're clearly missing a umax(smax) -> smax fold. But that should be addressed by adding the fold. Please add some test coverage for the benefit that this is supposed to produce.

Alive2: https://alive2.llvm.org/ce/z/wfEj-e

…82929) This patch extends `reassociateMinMaxWithConstants` to fold the following patterns: ``` umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1) smin (umin X, nneg C0), nneg C1 --> umin X, (smin/umin C0, C1) ``` Alive2: https://alive2.llvm.org/ce/z/wfEj-e Address the comment #82472 (review).

dtcxzyw mentioned this pull request Feb 22, 2024

pre-commit: test PR82478 dtcxzyw/llvm-opt-benchmark#257

Closed

[InstCombine] Avoid breaking SPF by foldICmpUsingKnownBits

9ecd542

dtcxzyw force-pushed the perf/fold-icmp-avoid-breaking-spf branch from f201752 to 9ecd542 Compare February 22, 2024 19:04

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 22, 2024

pre-commit: test PR82472

3153544

PR Link: llvm/llvm-project#82472

dtcxzyw mentioned this pull request Feb 22, 2024

pre-commit: test PR82472 dtcxzyw/llvm-opt-benchmark#261

Open

dtcxzyw requested a review from goldsteinn February 22, 2024 20:41

dtcxzyw marked this pull request as ready for review February 22, 2024 20:41

dtcxzyw requested a review from nikic as a code owner February 22, 2024 20:41

llvmbot added the llvm:transforms label Feb 22, 2024

nikic requested changes Feb 22, 2024

View reviewed changes

dtcxzyw mentioned this pull request Feb 25, 2024

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

Merged

dtcxzyw marked this pull request as draft February 29, 2024 14:56

dtcxzyw mentioned this pull request Mar 3, 2024

[InstCombine] Fix fail to fold (A >> C1) Pred C2 if shr is used multple times #83430 #83563

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Avoid breaking SPF by `foldICmpUsingKnownBits` #82472

[InstCombine] Avoid breaking SPF by `foldICmpUsingKnownBits` #82472

dtcxzyw commented Feb 21, 2024 •

edited

llvmbot commented Feb 22, 2024

nikic left a comment

dtcxzyw commented Feb 23, 2024

[InstCombine] Avoid breaking SPF by foldICmpUsingKnownBits #82472

Are you sure you want to change the base?

[InstCombine] Avoid breaking SPF by foldICmpUsingKnownBits #82472

Conversation

dtcxzyw commented Feb 21, 2024 • edited

llvmbot commented Feb 22, 2024

nikic left a comment

Choose a reason for hiding this comment

dtcxzyw commented Feb 23, 2024

[InstCombine] Avoid breaking SPF by `foldICmpUsingKnownBits` #82472

[InstCombine] Avoid breaking SPF by `foldICmpUsingKnownBits` #82472

dtcxzyw commented Feb 21, 2024 •

edited