Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CVP] Propagate constant range on icmp at use level #73767

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

XChy
Copy link
Member

@XChy XChy commented Nov 29, 2023

Solve a shallow version of #71383.
ProcessICmp in CVP is used to flip the signedness of icmp predicate. This patch reuses the constant range to directly fold icmp at use level.

However, this patch cannot solve #71383 now, for InstCombine canonicalizes select cond, binop(a, C1), op(a, C2) to binop(a, select cond, C1, C2).

@llvmbot
Copy link
Collaborator

llvmbot commented Nov 29, 2023

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: XChy (XChy)

Changes

Solve a shallow version of #71383.
ProcessICmp in CVP is used to flip the signedness of icmp predicate. This patch reuses the constant range to directly fold icmp at use level.

However, this patch cannot solve #71383 now, for InstCombine canonicalizes select cond, binop(a, C1), op(a, C2) to binop(a, select cond, C1, C2).


Full diff: https://github.com/llvm/llvm-project/pull/73767.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp (+19-4)
  • (modified) llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll (+102)
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 1bdc441d18ea65e..9e947a7e6d39c83 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -275,11 +275,26 @@ static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
   if (!Cmp->isSigned())
     return false;
 
+  auto LHSRange = LVI->getConstantRangeAtUse(Cmp->getOperandUse(0));
+  auto RHSRange = LVI->getConstantRangeAtUse(Cmp->getOperandUse(1));
+
+  // NOTE: It's not the original purpose.
+  // Do simple constant propagation at use level,
+  if (Cmp->hasOneUse()) {
+    if (LHSRange.icmp(Cmp->getPredicate(), RHSRange)) {
+      Cmp->replaceAllUsesWith(Constant::getAllOnesValue(Cmp->getType()));
+      return true;
+    }
+
+    if (LHSRange.icmp(Cmp->getInversePredicate(), RHSRange)) {
+      Cmp->replaceAllUsesWith(Constant::getNullValue(Cmp->getType()));
+      return true;
+    }
+  }
+
   ICmpInst::Predicate UnsignedPred =
-      ConstantRange::getEquivalentPredWithFlippedSignedness(
-          Cmp->getPredicate(),
-          LVI->getConstantRangeAtUse(Cmp->getOperandUse(0)),
-          LVI->getConstantRangeAtUse(Cmp->getOperandUse(1)));
+      ConstantRange::getEquivalentPredWithFlippedSignedness(Cmp->getPredicate(),
+                                                            LHSRange, RHSRange);
 
   if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
     return false;
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
index 101820a4c65f23b..ff9b5df666860c4 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -1455,3 +1455,105 @@ entry:
   %select = select i1 %cmp1, i1 %cmp2, i1 false
   ret i1 %select
 }
+
+define i1 @select_ternary_icmp1(i32 noundef %a) {
+; CHECK-LABEL: @select_ternary_icmp1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], 5
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[A]], 7
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 3
+  %cmp1 = icmp slt i32 %a, 5
+  %cmp2 = icmp slt i32 %a, 7
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}
+
+define i1 @select_ternary_icmp2(i32 noundef %a) {
+; CHECK-LABEL: @select_ternary_icmp2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], 7
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], 3
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 5
+  %cmp1 = icmp slt i32 %a, 7
+  %cmp2 = icmp slt i32 %a, 3
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}
+
+define i1 @select_ternary_icmp3(i32 noundef %a) {
+; CHECK-LABEL: @select_ternary_icmp3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], 3
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], 5
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 false
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 7
+  %cmp1 = icmp slt i32 %a, 3
+  %cmp2 = icmp slt i32 %a, 5
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}
+
+define i1 @select_ternary_icmp3_reverse(i32 noundef %a) {
+; CHECK-LABEL: @select_ternary_icmp3_reverse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[A]], 5
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp uge i32 [[A]], 7
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 false, i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 3
+  %cmp1 = icmp sge i32 %a, 5
+  %cmp2 = icmp sge i32 %a, 7
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}
+
+define i1 @select_ternary_icmp_fail1(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: @select_ternary_icmp_fail1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[B:%.*]], 5
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[B]], 7
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 3
+  %cmp1 = icmp slt i32 %b, 5
+  %cmp2 = icmp slt i32 %b, 7
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}
+
+define i1 @select_ternary_icmp_fail2(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: @select_ternary_icmp_fail2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 3
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sge i32 5, [[B:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 7, [[B]]
+; CHECK-NEXT:    [[COND_V:%.*]] = select i1 [[CMP]], i1 [[CMP1]], i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 [[COND_V]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 3
+  %cmp1 = icmp sge i32 5, %b
+  %cmp2 = icmp sge i32 7, %b
+  %cond.v = select i1 %cmp, i1 %cmp1, i1 %cmp2
+  ret i1 %cond.v
+}

@nikic
Copy link
Contributor

nikic commented Nov 29, 2023

This is duplicating the functionality of getPredicateAt() -- would it be better to introduce getPredicateAtUse()?

@XChy
Copy link
Member Author

XChy commented Nov 29, 2023

This is duplicating the functionality of getPredicateAt() -- would it be better to introduce getPredicateAtUse()?

I think so. I would introduce it in LazyValueInfo later.

@XChy
Copy link
Member Author

XChy commented Dec 4, 2023

ping.

1 similar comment
@XChy
Copy link
Member Author

XChy commented Dec 11, 2023

ping.

@XChy XChy requested review from dtcxzyw and topperc December 13, 2023 11:12
@nikic
Copy link
Contributor

nikic commented Dec 13, 2023

In 7de592b I have refactored the getConstantRangeAtUse() implementation to be based on an internal getValueAtUse() method that works on ValueLatticeElement. I think with that, it should be possible to integrate the implementations.

@XChy
Copy link
Member Author

XChy commented Dec 13, 2023

In 7de592b I have refactored the getConstantRangeAtUse() implementation to be based on an internal getValueAtUse() method that works on ValueLatticeElement. I think with that, it should be possible to integrate the implementations.

OK, I will rebase on it and integrate them.

@XChy XChy force-pushed the select-arm-fix branch 4 times, most recently from 232525c to 8a1fc0d Compare December 13, 2023 13:42
@XChy
Copy link
Member Author

XChy commented Dec 13, 2023

The implementations are integrated. Now constantFoldCmp and processICmp request range from LVI separately (don't share ConstantRange) to improve readibility.

llvm/test/Transforms/CorrelatedValuePropagation/basic.ll Outdated Show resolved Hide resolved
llvm/lib/Analysis/LazyValueInfo.cpp Outdated Show resolved Hide resolved
llvm/include/llvm/Analysis/LazyValueInfo.h Outdated Show resolved Hide resolved
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Please wait for additional approval from other reviewers.

@XChy
Copy link
Member Author

XChy commented Dec 15, 2023

Rebase this PR on latest main to avoid test failure in CI.

@XChy
Copy link
Member Author

XChy commented Dec 19, 2023

@nikic , what's your opinion on this patch?

@dtcxzyw
Copy link
Member

dtcxzyw commented Dec 19, 2023

Could you please have a look at dtcxzyw/llvm-opt-benchmark@a71358f?

Our CI detected a regression in simdjson.cpp.

--- a/bench/simdjson/optimized/simdjson.cpp.ll
+++ b/bench/simdjson/optimized/simdjson.cpp.ll
@@ -4867,8 +4867,10 @@ _ZNK8simdjson7haswell12_GLOBAL__N_16stage116buf_block_readerILm128EE13get_remain
   call fastcc void @_ZN8simdjson7haswell12_GLOBAL__N_16stage111bit_indexer5writeEjm(ptr noundef nonnull align 8 dereferenceable(8) %indexer.i.i, i32 noundef %conv.i.i, i64 noundef %15)
   %16 = getelementptr inbounds i8, ptr %indexer.i, i64 16
   %this.val.i.i = load i64, ptr %16, align 16
-  %tobool.i.not.i.not.i.i = icmp eq i64 %this.val.i.i, 0
+  %tobool.i.not.i.i.i = icmp ne i64 %this.val.i.i, 0
+  %tobool.i.not.i.not.i.i = xor i1 %tobool.i.not.i.i.i, true
   %brmerge.i.i = select i1 %cmp.i.i, i1 true, i1 %tobool.i.not.i.not.i.i
+  %tobool.i.not.i.mux.i.i = select i1 %cmp.i.i, i1 %tobool.i.not.i.i.i, i1 false
   br i1 %brmerge.i.i, label %if.end.i25.i, label %_ZN8simdjson7haswell12_GLOBAL__N_16stage123json_structural_indexer5indexILm128EEENS_10error_codeEPKhmRNS0_25dom_parser_implementationENS_11stage1_modeE.exit
 
 if.end.i25.i:                                     ; preds = %_ZNK8simdjson7haswell12_GLOBAL__N_16stage116buf_block_readerILm128EE13get_remainderEPh.exit.i

@XChy
Copy link
Member Author

XChy commented Dec 19, 2023

@dtcxzyw , there is the example for your report: https://alive2.llvm.org/ce/z/aausVv. And pipeline: https://godbolt.org/z/YETvK3Yhj.
CVP folds %cmp5 to false, impacting on InstCombine, SimplifyCFG, InstCombine pipeline. And it fails to invert related instructions.

@dtcxzyw
Copy link
Member

dtcxzyw commented Jan 23, 2024

Ping?

@XChy
Copy link
Member Author

XChy commented Jan 23, 2024

#73767 (comment) has been resolved now for the improvement in InstCombine. Any other review opinions?

@dtcxzyw
Copy link
Member

dtcxzyw commented Feb 4, 2024

Ping.

1 similar comment
@dtcxzyw
Copy link
Member

dtcxzyw commented Feb 25, 2024

Ping.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants