-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ValueTracking] Use select condition to help infer bits of arms #84699
[ValueTracking] Use select condition to help infer bits of arms #84699
Conversation
@llvm/pr-subscribers-llvm-analysis Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/84699.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 6d0e79e11eed43..070a53e6411313 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1026,6 +1026,20 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ // See what condition implies about the bits of the two select arms.
+ if (!Known.isConstant()) {
+ KnownBits KnownFromCond(Known.getBitWidth());
+ computeKnownBitsFromCond(I->getOperand(2), I->getOperand(0),
+ KnownFromCond, Depth + 1, Q, /*Invert=*/true);
+ Known = Known.unionWith(KnownFromCond);
+ }
+ if (!Known2.isConstant()) {
+ KnownBits KnownFromCond(Known2.getBitWidth());
+ computeKnownBitsFromCond(I->getOperand(1), I->getOperand(0),
+ KnownFromCond, Depth + 1, Q, /*Invert=*/false);
+ Known2 = Known2.unionWith(KnownFromCond);
+ }
+
// Only known if known in both the LHS and RHS.
Known = Known.intersectWith(Known2);
break;
diff --git a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
index e1c05c4b431f39..c9128522efcdaa 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
@@ -226,21 +226,21 @@ define void @umax(i32 %tmp3) {
; CHECK-NEXT: %tmp48 = select i1 %tmp47, i32 %tmp44, i32 %tmp46
; CHECK-NEXT: --> ((7 + (256 umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umax (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin (1 + (256 umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>))<nuw><nsw> umin {%tmp3,+,-256}<%bb4>)) U: [7,264) S: [7,264) Exits: <<Unknown>> LoopDispositions: { %bb4: Computable, %bb53: Invariant }
; CHECK-NEXT: %tmp49 = ashr i32 %tmp48, 3
-; CHECK-NEXT: --> %tmp49 U: [-268435456,268435456) S: [-268435456,268435456) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
+; CHECK-NEXT: --> %tmp49 U: [0,128) S: [0,128) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
; CHECK-NEXT: %tmp51 = select i1 %tmp50, i32 %tmp49, i32 0
-; CHECK-NEXT: --> %tmp49 U: [-268435456,268435456) S: [-268435456,268435456) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
+; CHECK-NEXT: --> %tmp49 U: [0,128) S: [0,128) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
; CHECK-NEXT: %tmp52 = zext i32 %tmp51 to i64
-; CHECK-NEXT: --> (zext i32 %tmp49 to i64) U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
+; CHECK-NEXT: --> (zext i32 %tmp49 to i64) U: [0,128) S: [0,128) Exits: <<Unknown>> LoopDispositions: { %bb4: Variant, %bb53: Invariant }
; CHECK-NEXT: %tmp54 = phi i64 [ undef, %bb4 ], [ %tmp59, %bb53 ]
; CHECK-NEXT: --> {undef,+,1}<nsw><%bb53> U: full-set S: full-set Exits: (-1 + (zext i32 %tmp49 to i64))<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp55 = trunc i64 %tmp54 to i32
; CHECK-NEXT: --> {(trunc i64 undef to i32),+,1}<%bb53> U: full-set S: full-set Exits: (-1 + %tmp49)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp56 = shl nsw i32 %tmp55, 3
-; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * %tmp49)<nsw>) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * %tmp49)<nuw><nsw>)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp57 = sext i32 %tmp56 to i64
-; CHECK-NEXT: --> (sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) U: [0,-7) S: [-2147483648,2147483641) Exits: (sext i32 (-8 + (8 * %tmp49)<nsw>) to i64) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> (sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (zext i32 %tmp49 to i64))<nuw><nsw>)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp58 = getelementptr inbounds i8, ptr null, i64 %tmp57
-; CHECK-NEXT: --> ((sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) + null) U: [0,-7) S: [-2147483648,2147483641) Exits: ((sext i32 (-8 + (8 * %tmp49)<nsw>) to i64) + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> ((sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) + null) U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (zext i32 %tmp49 to i64))<nuw><nsw> + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp59 = add nsw i64 %tmp54, 1
; CHECK-NEXT: --> {(1 + undef),+,1}<nsw><%bb53> U: full-set S: full-set Exits: (zext i32 %tmp49 to i64) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp62 = add nuw nsw i64 %tmp5, 1
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-select-from-cond.ll b/llvm/test/Analysis/ValueTracking/knownbits-select-from-cond.ll
new file mode 100644
index 00000000000000..edf499cc96020e
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/knownbits-select-from-cond.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+define i8 @select_condition_implies_highbits_op1(i8 %xx, i8 %y) {
+; CHECK-LABEL: @select_condition_implies_highbits_op1(
+; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 15
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[Y:%.*]], 3
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8 [[Y]], i8 [[X]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SEL]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and i8 %xx, 15
+ %cond = icmp ult i8 %y, 3
+ %sel = select i1 %cond, i8 %y, i8 %x
+ %r = add i8 %sel, 32
+ ret i8 %r
+}
+
+define i8 @select_condition_implies_highbits_op2(i8 %xx, i8 %y) {
+; CHECK-LABEL: @select_condition_implies_highbits_op2(
+; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 15
+; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[Y:%.*]], 3
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8 [[X]], i8 [[Y]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SEL]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and i8 %xx, 15
+ %cond = icmp ugt i8 %y, 3
+ %sel = select i1 %cond, i8 %x, i8 %y
+ %r = add i8 %sel, 32
+ ret i8 %r
+}
+
+define i8 @select_condition_implies_highbits_op1_and(i8 %xx, i8 %y, i1 %other_cond) {
+; CHECK-LABEL: @select_condition_implies_highbits_op1_and(
+; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 15
+; CHECK-NEXT: [[COND0:%.*]] = icmp ult i8 [[Y:%.*]], 3
+; CHECK-NEXT: [[COND:%.*]] = and i1 [[COND0]], [[OTHER_COND:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8 [[Y]], i8 [[X]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SEL]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and i8 %xx, 15
+ %cond0 = icmp ult i8 %y, 3
+ %cond = and i1 %cond0, %other_cond
+ %sel = select i1 %cond, i8 %y, i8 %x
+ %r = add i8 %sel, 32
+ ret i8 %r
+}
+
+define i8 @select_condition_implies_highbits_op2_or(i8 %xx, i8 %y, i1 %other_cond) {
+; CHECK-LABEL: @select_condition_implies_highbits_op2_or(
+; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], 15
+; CHECK-NEXT: [[COND0:%.*]] = icmp ugt i8 [[Y:%.*]], 3
+; CHECK-NEXT: [[COND:%.*]] = or i1 [[COND0]], [[OTHER_COND:%.*]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8 [[X]], i8 [[Y]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SEL]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and i8 %xx, 15
+ %cond0 = icmp ugt i8 %y, 3
+ %cond = or i1 %cond0, %other_cond
+ %sel = select i1 %cond, i8 %x, i8 %y
+ %r = add i8 %sel, 32
+ ret i8 %r
+}
|
0f32527
to
0310ed0
Compare
0310ed0
to
a4b5288
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The implementation looks correct to me now, but unfortunately this has also made it too expensive: http://llvm-compile-time-tracker.com/compare.php?from=1bee754fc62ed053e347891e8967b645e95ddbae&to=a4b5288b2bdd57cc31551977d5413abbb95413a9&stat=instructions:u
cest la vie Ill see if I can speed it up |
If we have something like `(select (icmp ult x, 8), x, y)`, we can use the `(icmp ult x, 8)` to help compute the knownbits of `x`.
Moved the undef check to the very end, seems to handle the bulk of the compile time regressions: |
a4b5288
to
ed056b0
Compare
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
return Res; | ||
|
||
// Finally make sure the information we found is valid. This is relatively | ||
// expensive so its left for the very end. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// expensive so its left for the very end. | |
// expensive so it's left for the very end. |
If we have something like
(select (icmp ult x, 8), x, y)
, we can usethe
(icmp ult x, 8)
to help compute the knownbits ofx
.