Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Avoid breaking SPF by foldICmpUsingKnownBits #82472

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Feb 21, 2024

This patch moves foldICmpUsingKnownBits after the SPF test to avoid breaking SPFs.

I found this when investigating a pattern found in abseil-cpp/arg.cc: https://alive2.llvm.org/ce/z/MsP-Vb. But this patch doesn't fix it :(

@dtcxzyw dtcxzyw force-pushed the perf/fold-icmp-avoid-breaking-spf branch from f201752 to 9ecd542 Compare February 22, 2024 19:04
dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 22, 2024
@dtcxzyw dtcxzyw marked this pull request as ready for review February 22, 2024 20:41
@dtcxzyw dtcxzyw requested a review from nikic as a code owner February 22, 2024 20:41
@llvmbot
Copy link
Collaborator

llvmbot commented Feb 22, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch moves foldICmpUsingKnownBits after the SPF test to avoid breaking SPFs.

I found this when investigating a pattern found in abseil-cpp/arg.cc: https://alive2.llvm.org/ce/z/MsP-Vb. But this patch doesn't fix it :(


Full diff: https://github.com/llvm/llvm-project/pull/82472.diff

4 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+3-3)
  • (modified) llvm/test/Transforms/InstCombine/minmax-fold.ll (+6-8)
  • (modified) llvm/test/Transforms/InstCombine/sadd_sat.ll (+19-19)
  • (modified) llvm/test/Transforms/InstCombine/select_meta.ll (+8-9)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 49e597171b1c6f..db24e891d307ee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -7034,9 +7034,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpUsingBoolRange(I))
     return Res;
 
-  if (Instruction *Res = foldICmpUsingKnownBits(I))
-    return Res;
-
   if (Instruction *Res = foldICmpTruncWithTruncOrExt(I, Q))
     return Res;
 
@@ -7057,6 +7054,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
         return nullptr;
     }
 
+  if (Instruction *Res = foldICmpUsingKnownBits(I))
+    return Res;
+
   // Do this after checking for min/max to prevent infinite looping.
   if (Instruction *Res = foldICmpWithZero(I))
     return Res;
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 1f7837c109b3f1..57437c325fcd51 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -316,8 +316,7 @@ define i32 @test73(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -499,10 +498,9 @@ define i32 @clamp_check_for_no_infinite_loop3(i32 %i) {
 ; CHECK-LABEL: @clamp_check_for_no_infinite_loop3(
 ; CHECK-NEXT:    br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]]
 ; CHECK:       truelabel:
-; CHECK-NEXT:    [[I3:%.*]] = call i32 @llvm.smax.i32(i32 [[I:%.*]], i32 1)
-; CHECK-NEXT:    [[I6:%.*]] = call i32 @llvm.umin.i32(i32 [[I3]], i32 2)
-; CHECK-NEXT:    [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2
-; CHECK-NEXT:    ret i32 [[I7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 2
+; CHECK-NEXT:    [[I6:%.*]] = select i1 [[TMP1]], i32 4, i32 8
+; CHECK-NEXT:    ret i32 [[I6]]
 ; CHECK:       falselabel:
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -1391,8 +1389,8 @@ entry:
 define i32 @twoway_clamp_gt(i32 %num) {
 ; CHECK-LABEL: @twoway_clamp_gt(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S1:%.*]] = call i32 @llvm.smax.i32(i32 [[NUM:%.*]], i32 13767)
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[S1]], i32 13768)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[NUM:%.*]], 13768
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP0]], i32 13767, i32 13768
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index 5ccb6f92b6c722..01394eddb82fce 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -624,7 +624,7 @@ define i32 @sadd_sat32_zext(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[A:%.*]] to i64
 ; CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[B:%.*]] to i64
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 2147483647)
+; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
 ; CHECK-NEXT:    [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV7]]
 ;
@@ -679,10 +679,10 @@ entry:
 define i32 @ashrA(i64 %a, i32 %b) {
 ; CHECK-LABEL: @ashrA(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[B:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr i64 [[A:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[B:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %conv = ashr i64 %a, 32
@@ -697,10 +697,10 @@ entry:
 define i32 @ashrB(i32 %a, i64 %b) {
 ; CHECK-LABEL: @ashrB(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[B:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[A:%.*]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[CONV1:%.*]] = lshr i64 [[B:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[A:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %conv = sext i32 %a to i64
@@ -717,12 +717,12 @@ entry:
 define i32 @ashrAB(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ashrAB(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[B:%.*]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP2]], i32 [[TMP3]])
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr i64 [[A:%.*]], 32
+; CHECK-NEXT:    [[CONV1:%.*]] = lshr i64 [[B:%.*]], 32
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[CONV1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[CONV]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
   %conv = ashr i64 %a, 32
@@ -805,10 +805,10 @@ entry:
 define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @ashrv2i8_s(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i16> [[TMP0]] to <2 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]])
-; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
+; CHECK-NEXT:    [[CONV:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc <2 x i16> [[CONV]] to <2 x i8>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP0]], <2 x i8> [[B:%.*]])
+; CHECK-NEXT:    ret <2 x i8> [[TMP1]]
 ;
 entry:
   %conv = ashr <2 x i16> %a, <i16 8, i16 8>
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
index df1e5a82ad5d15..cd133101736cfc 100644
--- a/llvm/test/Transforms/InstCombine/select_meta.ll
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -171,8 +171,7 @@ define i32 @test72(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -317,7 +316,7 @@ define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv)
 
 define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 ; CHECK-LABEL: @select_add(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable [[META2:![0-9]+]]
 ; CHECK-NEXT:    [[RET:%.*]] = add i64 [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
@@ -328,7 +327,7 @@ define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 
 define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_or(
-; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -339,7 +338,7 @@ define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 
 define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 ; CHECK-LABEL: @select_sub(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = sub i17 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i17 [[RET]]
 ;
@@ -350,7 +349,7 @@ define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 
 define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 ; CHECK-LABEL: @select_ashr(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = ashr i128 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i128 [[RET]]
 ;
@@ -361,7 +360,7 @@ define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 
 define double @select_fmul(i1 %cond, double %x, double %y) {
 ; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fmul double [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret double [[RET]]
 ;
@@ -372,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) {
 
 define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
@@ -391,5 +390,5 @@ define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 2, i32 10}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 10, i32 2}
-; CHECK: [[META2:![0-9]+]] = !{}
+; CHECK: [[META2]] = !{}
 ;.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the test diffs, we're clearly missing a umax(smax) -> smax fold. But that should be addressed by adding the fold. Please add some test coverage for the benefit that this is supposed to produce.

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Feb 23, 2024

Based on the test diffs, we're clearly missing a umax(smax) -> smax fold. But that should be addressed by adding the fold. Please add some test coverage for the benefit that this is supposed to produce.

Alive2: https://alive2.llvm.org/ce/z/wfEj-e

dtcxzyw added a commit that referenced this pull request Feb 25, 2024
…82929)

This patch extends `reassociateMinMaxWithConstants` to fold the
following patterns:
```
umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
smin (umin X, nneg C0), nneg C1 --> umin X, (smin/umin C0, C1)
```
Alive2: https://alive2.llvm.org/ce/z/wfEj-e

Address the comment
#82472 (review).
@dtcxzyw dtcxzyw marked this pull request as draft February 29, 2024 14:56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants