-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Make the (icmp eq/ne (and X, Y), X)
canonicalization work for non-const operands
#84688
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) ChangesWe currently do: As well, we can also do: Proofs: https://alive2.llvm.org/ce/z/yeWH3E Full diff: https://github.com/llvm/llvm-project/pull/84688.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 5b412a52e1644a..3a65bd03d60c34 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4651,6 +4651,22 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q,
if (Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
+ if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) {
+ // icmp (X & Y) eq/ne Y --> (X | ~Y) eq/ne -1 if Y is freely invertible and
+ // Y is non-constant. If Y is constant this form is preferable (and
+ // canonicalize too it elsewhere).
+ if (!match(Op1, m_ImmConstant()) &&
+ IC.isFreeToInvert(Op1, Op1->hasOneUse() || Op1->hasNUses(2)))
+ return new ICmpInst(Pred,
+ IC.Builder.CreateOr(A, IC.Builder.CreateNot(Op1)),
+ Constant::getAllOnesValue(Op1->getType()));
+ // icmp (X & Y) eq/ne Y --> (~X & Y) eq/ne 0 if X is freely invertible.
+ if (IC.isFreeToInvert(A, A->hasOneUse()))
+ return new ICmpInst(Pred,
+ IC.Builder.CreateAnd(Op1, IC.Builder.CreateNot(A)),
+ Constant::getNullValue(Op1->getType()));
+ }
+
return nullptr;
}
@@ -5185,9 +5201,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (Value *V = foldMultiplicationOverflowCheck(I))
return replaceInstUsesWith(I, V);
- if (Instruction *R = foldICmpAndXX(I, Q, *this))
- return R;
-
if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
return replaceInstUsesWith(I, V);
@@ -5427,21 +5440,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
- // canoncalize:
- // (icmp eq/ne (and X, C), X)
- // -> (icmp eq/ne (and X, ~C), 0)
- {
- Constant *CMask;
- A = nullptr;
- if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask)))))
- A = Op1;
- else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask)))))
- A = Op0;
- if (A)
- return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)),
- Constant::getNullValue(A->getType()));
- }
-
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
Value *OtherVal = A == Op0 ? B : A;
@@ -7221,6 +7219,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I))
return Res;
+ // Need this to be after foldICmpCommutative so we do mask folds before
+ // transforming the `and`.
+ if (Instruction *R = foldICmpAndXX(I, Q, *this))
+ return R;
+
if (I.isCommutative()) {
if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
replaceOperand(I, 0, Pair->first);
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
index dfd67eae8aafd4..85a3cae7ee07f8 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
@@ -269,9 +269,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) {
define i1 @n1(i8 %x, i8 %y) {
; CHECK-LABEL: @n1(
; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 1, %y ; not -1
@@ -284,9 +283,9 @@ define i1 @n1(i8 %x, i8 %y) {
define i1 @n2(i8 %x, i8 %y) {
; CHECK-LABEL: @n2(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], 1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[T0]], -2
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
index 608e133ec7f73c..95b8381095abcb 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
@@ -269,9 +269,8 @@ define i1 @n0(i8 %x, i8 %y, i8 %notx) {
define i1 @n1(i8 %x, i8 %y) {
; CHECK-LABEL: @n1(
; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 1, %y ; not -1
@@ -284,9 +283,9 @@ define i1 @n1(i8 %x, i8 %y) {
define i1 @n2(i8 %x, i8 %y) {
; CHECK-LABEL: @n2(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], 1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[T0]], -2
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
index a65be1e9ceeca3..946bb03e04f7e4 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
@@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) {
; CHECK-LABEL: @n1(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[T1:%.*]] = add i8 [[T0]], -1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[T0]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y ; not 1
@@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) {
; CHECK-LABEL: @n2(
; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[T1:%.*]] = add nuw i8 [[T0]], 1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw i8 -2, [[T0]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 1, %y
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
index f156d9bf007cbb..63d406d54179fc 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
@@ -251,9 +251,9 @@ define i1 @n1(i8 %x, i8 %y) {
; CHECK-LABEL: @n1(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[T1:%.*]] = add i8 [[T0]], -1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[T0]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y ; not 1
@@ -268,9 +268,9 @@ define i1 @n2(i8 %x, i8 %y) {
; CHECK-LABEL: @n2(
; CHECK-NEXT: [[T0:%.*]] = shl nuw i8 1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[T1:%.*]] = add nuw i8 [[T0]], 1
-; CHECK-NEXT: [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw i8 -2, [[T0]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 1, %y
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
index 640a95b0561602..9a7eacb303a8ac 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -22,11 +22,11 @@ define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
define i1 @src_is_mask_zext_fail_not_mask(i16 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_zext_fail_not_mask(
-; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -2, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT: [[AND:%.*]] = and i16 [[X]], [[MASK]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
@@ -99,12 +99,12 @@ define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) {
; CHECK-LABEL: @src_is_mask_and_fail_mixed(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[MY:%.*]] = ashr i8 -8, [[Y:%.*]]
; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[AND]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -152,11 +152,11 @@ define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_xor_fail_notmask(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[Y:%.*]]
; CHECK-NEXT: [[NOTMASK:%.*]] = xor i8 [[TMP1]], [[Y]]
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[NOTMASK]]
-; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[NOTMASK]], [[TMP2]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP3]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -342,12 +342,12 @@ define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_umin_fail_mismatch(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 -32)
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[MASK]], [[X]]
-; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -494,12 +494,12 @@ define i1 @src_is_notmask_lshr_shl(i8 %x_in, i8 %y) {
define i1 @src_is_notmask_lshr_shl_fail_mismatch_shifts(i8 %x_in, i8 %y, i8 %z) {
; CHECK-LABEL: @src_is_notmask_lshr_shl_fail_mismatch_shifts(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[MASK_SHR:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[NMASK:%.*]] = shl i8 [[MASK_SHR]], [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[NMASK]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[MASK]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index e95c72b75f97df..a83572bff3251a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -238,9 +238,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) {
define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y_todo(
-; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = select i1 %y, i8 7, i8 24
@@ -251,9 +251,8 @@ define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) {
define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y(
-; CHECK-NEXT: [[YY:%.*]] = xor i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[X]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%yy = xor i8 %y, -1
@@ -264,9 +263,9 @@ define i1 @icmp_eq_x_invertable_y(i8 %x, i8 %y) {
define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y2_todo(
-; CHECK-NEXT: [[YY:%.*]] = select i1 [[Y:%.*]], i8 7, i8 24
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[YY]], [[AND]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[Y:%.*]], i8 -8, i8 -25
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%yy = select i1 %y, i8 7, i8 24
@@ -277,9 +276,8 @@ define i1 @icmp_eq_x_invertable_y2_todo(i8 %x, i1 %y) {
define i1 @icmp_eq_x_invertable_y2(i8 %x, i8 %y) {
; CHECK-LABEL: @icmp_eq_x_invertable_y2(
-; CHECK-NEXT: [[YY:%.*]] = xor i8 [[Y:%.*]], -1
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[YY]], [[X:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], [[YY]]
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%yy = xor i8 %y, -1
|
(icmp eq/ne (and X, Y), X)
canonicalization work for non-const operands
@@ -7221,6 +7219,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { | |||
foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) | |||
return Res; | |||
|
|||
// Need this to be after foldICmpCommutative so we do mask folds before | |||
// transforming the `and`. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you share an example why this is necessary?
This kind of reordering is generally unreliable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We end up breaking ~Mask
patterns. Here are the regressions if we keep in its original place.
modified llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
@@ -16,7 +16,8 @@
define i1 @p0(i8 %x, i8 %y) {
; CHECK-LABEL: @p0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -33,7 +34,8 @@ define i1 @p0(i8 %x, i8 %y) {
define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <2 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
@@ -46,7 +48,8 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p2_vec_undef0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -59,7 +62,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p3_vec_undef0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -72,7 +76,8 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p4_vec_undef2(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -91,8 +96,9 @@ declare i8 @gen8()
define i1 @c0(i8 %y) {
; CHECK-LABEL: @c0(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -106,8 +112,9 @@ define i1 @c0(i8 %y) {
define i1 @c1(i8 %y) {
; CHECK-LABEL: @c1(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -121,8 +128,9 @@ define i1 @c1(i8 %y) {
define i1 @c2(i8 %y) {
; CHECK-LABEL: @c2(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -144,7 +152,7 @@ define i1 @oneuse0(i8 %x, i8 %y) {
; CHECK-LABEL: @oneuse0(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -161,7 +169,8 @@ define i1 @oneuse1(i8 %x, i8 %y) {
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
; CHECK-NEXT: call void @use8(i8 [[T1]])
-; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
@@ -195,7 +204,8 @@ define i1 @oneuse3(i8 %x, i8 %y) {
; CHECK-NEXT: call void @use8(i8 [[T0]])
; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
; CHECK-NEXT: call void @use8(i8 [[T1]])
-; CHECK-NEXT: [[RET:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
modified llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
@@ -16,7 +16,8 @@
define i1 @p0(i8 %x, i8 %y) {
; CHECK-LABEL: @p0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -33,7 +34,8 @@ define i1 @p0(i8 %x, i8 %y) {
define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <2 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[RET]]
;
@@ -46,7 +48,8 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p2_vec_undef0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -59,7 +62,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p3_vec_undef0(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -72,7 +76,8 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @p4_vec_undef2(
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and <3 x i8> [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
; CHECK-NEXT: ret <3 x i1> [[RET]]
;
@@ -91,8 +96,9 @@ declare i8 @gen8()
define i1 @c0(i8 %y) {
; CHECK-LABEL: @c0(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -106,8 +112,9 @@ define i1 @c0(i8 %y) {
define i1 @c1(i8 %y) {
; CHECK-LABEL: @c1(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -121,8 +128,9 @@ define i1 @c1(i8 %y) {
define i1 @c2(i8 %y) {
; CHECK-LABEL: @c2(
+; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[X:%.*]] = call i8 @gen8()
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[X]], [[T0]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -144,7 +152,7 @@ define i1 @oneuse0(i8 %x, i8 %y) {
; CHECK-LABEL: @oneuse0(
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: call void @use8(i8 [[T0]])
-; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[X_HIGHBITS:%.*]] = and i8 [[T0]], [[X:%.*]]
; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -161,7 +169,8 @@ define i1 @oneuse1(i8 %x, i8 %y) {
; CHECK-NEXT: [[T0:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
; CHECK-NEXT: call void @use8(i8 [[T1]])
-; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
@@ -195,7 +204,8 @@ define i1 @oneuse3(i8 %x, i8 %y) {
; CHECK-NEXT: call void @use8(i8 [[T0]])
; CHECK-NEXT: [[T1:%.*]] = xor i8 [[T0]], -1
; CHECK-NEXT: call void @use8(i8 [[T1]])
-; CHECK-NEXT: [[RET:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[T0]], [[X:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[RET]]
;
%t0 = shl i8 -1, %y
modified llvm/test/Transforms/InstCombine/icmp-and-lowbit-mask.ll
@@ -5,10 +5,11 @@ declare void @use.i8(i8)
declare void @use.i16(i16)
define i1 @src_is_mask_zext(i16 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_zext(
-; CHECK-NEXT: [[X:%.*]] = xor i16 [[X_IN:%.*]], 123
; CHECK-NEXT: [[M_IN:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = zext i8 [[M_IN]] to i16
-; CHECK-NEXT: [[R:%.*]] = icmp ule i16 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
@@ -80,11 +81,12 @@ define i1 @src_is_mask_sext_fail_multiuse(i16 %x_in, i8 %y) {
define i1 @src_is_mask_and(i8 %x_in, i8 %y, i8 %z) {
; CHECK-LABEL: @src_is_mask_and(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[MY:%.*]] = lshr i8 7, [[Y:%.*]]
; CHECK-NEXT: [[MZ:%.*]] = lshr i8 -1, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], [[MZ]]
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -119,10 +121,11 @@ define i1 @src_is_mask_and_fail_mixed(i8 %x_in, i8 %y, i8 %z) {
define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_or(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[MY:%.*]] = lshr i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[MY]], 7
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -136,10 +139,11 @@ define i1 @src_is_mask_or(i8 %x_in, i8 %y) {
define i1 @src_is_mask_xor(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_xor(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -170,11 +174,12 @@ define i1 @src_is_mask_xor_fail_notmask(i8 %x_in, i8 %y) {
define i1 @src_is_mask_select(i8 %x_in, i8 %y, i1 %cond) {
; CHECK-LABEL: @src_is_mask_select(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -246,12 +251,13 @@ define i1 @src_is_mask_shl_lshr_fail_not_allones(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-LABEL: @src_is_mask_lshr(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -266,12 +272,13 @@ define i1 @src_is_mask_lshr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
; CHECK-LABEL: @src_is_mask_ashr(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[SMASK:%.*]] = select i1 [[COND:%.*]], i8 [[YMASK]], i8 15
; CHECK-NEXT: [[MASK:%.*]] = ashr i8 [[SMASK]], [[Z:%.*]]
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -286,10 +293,11 @@ define i1 @src_is_mask_ashr(i8 %x_in, i8 %y, i8 %z, i1 %cond) {
define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_p2_m1(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[P2ORZ:%.*]] = shl i8 2, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = add i8 [[P2ORZ]], -1
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -302,11 +310,12 @@ define i1 @src_is_mask_p2_m1(i8 %x_in, i8 %y) {
define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_umax(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umax.i8(i8 [[YMASK]], i8 3)
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -321,12 +330,13 @@ define i1 @src_is_mask_umax(i8 %x_in, i8 %y) {
define i1 @src_is_mask_umin(i8 %x_in, i8 %y, i8 %z) {
; CHECK-LABEL: @src_is_mask_umin(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[ZMASK:%.*]] = lshr i8 15, [[Z:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.umin.i8(i8 [[YMASK]], i8 [[ZMASK]])
-; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -362,11 +372,12 @@ define i1 @src_is_mask_umin_fail_mismatch(i8 %x_in, i8 %y) {
define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_smax(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smax.i8(i8 [[YMASK]], i8 -1)
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -381,11 +392,12 @@ define i1 @src_is_mask_smax(i8 %x_in, i8 %y) {
define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_smin(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[Y_M1:%.*]] = add i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YMASK:%.*]] = xor i8 [[Y_M1]], [[Y]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.smin.i8(i8 [[YMASK]], i8 0)
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -400,10 +412,11 @@ define i1 @src_is_mask_smin(i8 %x_in, i8 %y) {
define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_mask_bitreverse_not_mask(
-; CHECK-NEXT: [[X:%.*]] = xor i8 [[X_IN:%.*]], 123
; CHECK-NEXT: [[NMASK:%.*]] = shl nsw i8 -1, [[Y:%.*]]
; CHECK-NEXT: [[MASK:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[NMASK]])
-; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[MASK]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X_IN:%.*]], -124
+; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[MASK]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -418,9 +431,11 @@ define i1 @src_is_mask_bitreverse_not_mask(i8 %x_in, i8 %y) {
define i1 @src_is_notmask_sext(i16 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_notmask_sext(
; CHECK-NEXT: [[M_IN:%.*]] = shl i8 -8, [[Y:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124
-; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[M_IN]] to i16
-; CHECK-NEXT: [[R:%.*]] = icmp uge i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[M_IN]], -1
+; CHECK-NEXT: [[MASK:%.*]] = sext i8 [[TMP1]] to i16
+; CHECK-NEXT: [[TMP2:%.*]] = xor i16 [[X_IN:%.*]], -128
+; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP2]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP3]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
@@ -479,8 +494,10 @@ define i1 @src_is_notmask_shl_fail_multiuse_invert(i8 %x_in, i8 %y, i1 %cond) {
define i1 @src_is_notmask_lshr_shl(i8 %x_in, i8 %y) {
; CHECK-LABEL: @src_is_notmask_lshr_shl(
; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i8 -1, [[Y:%.*]]
+; CHECK-NEXT: [[MASK:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X_IN:%.*]], -124
-; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP3]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i8 %x_in, 123
@@ -516,8 +533,10 @@ define i1 @src_is_notmask_ashr(i16 %x_in, i8 %y, i16 %z) {
; CHECK-NEXT: [[M_IN:%.*]] = shl i8 -32, [[Y:%.*]]
; CHECK-NEXT: [[NMASK:%.*]] = sext i8 [[M_IN]] to i16
; CHECK-NEXT: [[NMASK_SHR:%.*]] = ashr i16 [[NMASK]], [[Z:%.*]]
+; CHECK-NEXT: [[MASK:%.*]] = xor i16 [[NMASK_SHR]], -1
; CHECK-NEXT: [[TMP1:%.*]] = xor i16 [[X_IN:%.*]], -124
-; CHECK-NEXT: [[R:%.*]] = icmp uge i16 [[TMP1]], [[NMASK_SHR]]
+; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[MASK]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP2]], -1
; CHECK-NEXT: ret i1 [[R]]
;
%x = xor i16 %x_in, 123
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
originally I started handling some of these manually, but figured it made more sense to just ensure mask folds run first then to essentially re-handle all the patterns in the new form.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we canonicalize to a new form, then we do need to be able to handle that new form in transforms, otherwise it doesn't make a lot of sense...
Simplifying (-1 << y) | x == 0
to x >> y == 0
does seem like a generally valuable fold to me as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its &
not |
I think (https://alive2.llvm.org/ce/z/V56XYP). I did try adding it and it cleans up about half the regressions but it doesn't handle things like:
define i1 @src_is_notmask_ashr(i16 %x_in, i8 %y, i16 %z)
AFAICT, the or
case needs to be re-inverted and sent back through foldICmpWithLowBitMaskedVal
at which point I figured just moving it was better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Posted: #84691 for the shl -1
case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you are right actually, its non-nonsensical to label something a canonicalization but then have it work for less folds...
I will handle all regressions and move this back to its original place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic
Okay, Ive looked into this fair bit. I am back on the side this is more canonical. I have patches lined up to cleanup some of the regessions.
The remaining ones run into basically the following issue. not
instructions as a tool for transforms are a fixed resource. If a transform expected to be able to use not
instructions more profitably, I think it makes sense to order it first.
Some of these cases (non-single-use not) could theoretically be undone by iterating the use list of V
in getFreelyInverted
, but that seems a bit overkill.
Other than that, I don't really see a better approach that "try to order the most profitable first".
My preference would be to get some patches up that cover holes in the current folding that this patch highlighted, then submit this patch with the explicit "after foldICmpWithLowBitMaskedVal
"
Is that reasonable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please share a new diff of what regresses without the foldICmpWithLowBitMasked special case, now that your other changes have landed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah nevermind, can just look at the diff for the 3rd commit.
4ba8827
to
9b6938d
Compare
9b6938d
to
28ceece
Compare
Proof: https://alive2.llvm.org/ce/z/TAFmPw This is a lemma for clearing up some of the regressions that llvm#84688 causes.
Proof: https://alive2.llvm.org/ce/z/TAFmPw This is a lemma for clearing up some of the regressions that #84688 causes. Closes #84868
… y)` This cleans up basically all the regressions assosiated from llvm#84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb
… y)` This cleans up basically all the regressions assosiated from llvm#84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb
… y)` This cleans up basically all the regressions assosiated from #84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb Closes #85445
28ceece
to
5e6ea3c
Compare
@@ -238,9 +238,9 @@ define i1 @icmp_sle_negx_y_fail_maybe_zero(i8 %x, i8 %y) { | |||
|
|||
define i1 @icmp_eq_x_invertable_y_todo(i8 %x, i1 %y) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know what does the TODO
mean here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was todo before this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good to rename the function and add some header comments :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh no, these are from before we had isFreeToInvert...
This is from a year-old series (where isFreeToInvert...
was later on).
@@ -4709,6 +4709,26 @@ static Instruction *foldICmpAndXX(ICmpInst &I, const SimplifyQuery &Q, | |||
if (Pred == ICmpInst::ICMP_UGE) | |||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1); | |||
|
|||
if (ICmpInst::isEquality(Pred) && Op0->hasOneUse()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need a multi-use test.
// Y is non-constant. If Y is constant this form is preferable (and | ||
// canonicalize too it elsewhere). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain this?
// Since we may be consuming a `not` here, first check if we match | ||
// `foldICmpWithLowBitMaskedVal` as it is a "better" user of `not` | ||
// instructions. | ||
if (Value *R = foldICmpWithLowBitMaskedVal(Pred, Op0, Op1, Q, IC)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add some tests to demonstrate this behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just split it into a 3rd commit, so you can see the regressions/fixups between commit 2/3
5e6ea3c
to
e2d8443
Compare
… y)` This cleans up basically all the regressions assosiated from llvm#84688 Proof of all new cases: https://alive2.llvm.org/ce/z/5yYWLb Closes llvm#85445
ping |
…x, y), {x,y}); NFC
…work for non-const operands We currently do: `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (and ~X, Y), 0)` if `X` is constant. We can make this more general and do it if `X` is freely invertable (i.e say `X = ~Z`). As well, we can also do: `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (or X, ~Y), -1)` If `Y` is freely invertible. Proofs: https://alive2.llvm.org/ce/z/yeWH3E Differential Revision: https://reviews.llvm.org/D159059
…, Y` -> `(icmp eq/ne (~X, Y), 0)` This issue is the canonicalization can consume `not` instruction which are a limitted resource and are use to enable multiple transforms. In this case `foldICmpWithLowBitMaskedVal` is a "better" user of `not` instructions, so just check if that has a result first.
rebased |
e2d8443
to
b38251b
Compare
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first two commits LGTM. I don't think we need the last one. The cases it improves look very artificial to me (there's basically an extra-use not hanging around that can get reused -- we could cook up test cases like this for other transforms as well, but I doubt they're relevant). If you do want to have that one, I think it's best to make a separate PR so @dtcxzyw can check whether the problematic patterns occur in the wild. But in the meantime I don't mind regressing them.
Done, see: #93656 |
…work for non-const operands We currently do: `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (and ~X, Y), 0)` if `X` is constant. We can make this more general and do it if `X` is freely invertable (i.e say `X = ~Z`). As well, we can also do: `(icmp eq/ne (and X, Y), Y)` -> `(icmp eq/ne (or X, ~Y), -1)` If `Y` is freely invertible. Proofs: https://alive2.llvm.org/ce/z/yeWH3E Differential Revision: https://reviews.llvm.org/D159059 Closes llvm#84688
We currently do:
(icmp eq/ne (and X, Y), Y)
->(icmp eq/ne (and ~X, Y), 0)
if
X
is constant. We can make this more general and do it ifX
isfreely invertable (i.e say
X = ~Z
).As well, we can also do:
(icmp eq/ne (and X, Y), Y)
->(icmp eq/ne (or X, ~Y), -1)
If
Y
is freely invertible.Proofs: https://alive2.llvm.org/ce/z/yeWH3E