[InstCombine] Fold `(icmp eq/ne (xor x, y), C1)` even if multiuse #87275

goldsteinn · 2024-04-01T19:09:17Z

[InstCombine] Add tests for folding (icmp eq/ne (xor x, C0), C1); NFC
[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse

Two folds unlocked:
(icmp eq/ne (xor x, C0), C1) -> (icmp eq/ne x, C2)
(icmp eq/ne (xor x, y), 0) -> (icmp eq/ne x, y)

This fixes regressions assosiated with #87180

llvmbot · 2024-04-01T19:09:47Z

@llvm/pr-subscribers-llvm-transforms

Author: None (goldsteinn)

Changes

[InstCombine] Add tests for folding (icmp eq/ne (xor x, C0), C1); NFC
[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse

Full diff: https://github.com/llvm/llvm-project/pull/87275.diff

5 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+7-9)
(modified) llvm/test/Transforms/InstCombine/icmp-equality-xor.ll (+41-1)
(modified) llvm/test/Transforms/InstCombine/icmp-or.ll (+3-3)
(modified) llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll (+7-7)
(modified) llvm/test/Transforms/InstCombine/select.ll (+2-1)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..1200f08cef78f1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3461,15 +3461,13 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
     break;
   }
   case Instruction::Xor:
-    if (BO->hasOneUse()) {
-      if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
-        // For the xor case, we can xor two constants together, eliminating
-        // the explicit xor.
-        return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
-      } else if (C.isZero()) {
-        // Replace ((xor A, B) != 0) with (A != B)
-        return new ICmpInst(Pred, BOp0, BOp1);
-      }
+    if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
+      // For the xor case, we can xor two constants together, eliminating
+      // the explicit xor.
+      return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
+    } else if (C.isZero()) {
+      // Replace ((xor A, B) != 0) with (A != B)
+      return new ICmpInst(Pred, BOp0, BOp1);
     }
     break;
   case Instruction::Or: {
diff --git a/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll b/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
index f5d5ef32c81e81..e8a78df6d5f756 100644
--- a/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
@@ -88,7 +88,7 @@ define i1 @cmpeq_xor_cst1_commuted(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 10
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %b2 = mul i32 %b, %b ; thwart complexity-based canonicalization
+  %b2 = mul i32 %b, %b  ; thwart complexity-based canonicalization
   %c = xor i32 %a, 10
   %cmp = icmp eq i32 %b2, %c
   ret i1 %cmp
@@ -145,3 +145,43 @@ entry:
   %cmp = icmp ne <2 x i8> %xor, <i8 9, i8 79>
   ret <2 x i1> %cmp
 }
+
+declare void @use.i8(i8)
+define i1 @fold_xorC_eq0_multiuse(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_xorC_eq0_multiuse(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[X]], [[Y]]
+; CHECK-NEXT:    call void @use.i8(i8 [[XX]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xx = xor i8 %x, %y
+  %r = icmp eq i8 %xx, 0
+  call void @use.i8(i8 %xx)
+  ret i1 %r
+}
+
+define i1 @fold_xorC_eq1_multiuse_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_xorC_eq1_multiuse_fail(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[XX]], 1
+; CHECK-NEXT:    call void @use.i8(i8 [[XX]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xx = xor i8 %x, %y
+  %r = icmp eq i8 %xx, 1
+  call void @use.i8(i8 %xx)
+  ret i1 %r
+}
+
+define i1 @fold_xorC_neC_multiuse(i8 %x) {
+; CHECK-LABEL: @fold_xorC_neC_multiuse(
+; CHECK-NEXT:    [[XX:%.*]] = xor i8 [[X:%.*]], 45
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], 110
+; CHECK-NEXT:    call void @use.i8(i8 [[XX]])
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xx = xor i8 %x, 45
+  %r = icmp ne i8 %xx, 67
+  call void @use.i8(i8 %xx)
+  ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
index 922845c1e7e2d8..dce8b6a3b282e6 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
 
 declare void @use(i8)
 
@@ -434,7 +434,7 @@ define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[XOR]], 0
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[X1]], [[Y1]]
 ; CHECK-NEXT:    [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
 ; CHECK-NEXT:    ret i1 [[OR1]]
 ;
@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]]
 ; CHECK-NEXT:    [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
 ; CHECK-NEXT:    ret i1 [[OR1]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
index cd05022b0d35da..e5906e7a969730 100644
--- a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
+++ b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
@@ -7,9 +7,9 @@
 
 define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
 ; CHECK-NEXT:    [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[SEL]]
 ;
@@ -23,9 +23,9 @@ define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
 
 define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
 ; CHECK-LABEL: @test1_logical(
-; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[SEL]]
 ;
@@ -40,7 +40,7 @@ define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
 define zeroext i1 @test2(i32 %lhs, i32 %rhs) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[LHS:%.*]], [[RHS:%.*]]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[XOR]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[LHS]], [[RHS]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[XOR]], 32
 ; CHECK-NEXT:    [[SEL:%.*]] = xor i1 [[CMP1]], [[CMP2]]
 ; CHECK-NEXT:    ret i1 [[SEL]]
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 278cabdff9ed3e..30bbc86b091494 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -3698,7 +3698,8 @@ exit:
 define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) {
 ; CHECK-LABEL: @src_select_xxory_eq0_xorxy_y(
 ; CHECK-NEXT:    [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[Y]]
+; CHECK-NEXT:    [[XOR:%.*]] = select i1 [[XOR0]], i32 [[X]], i32 0
+; CHECK-NEXT:    [[COND:%.*]] = xor i32 [[XOR]], [[Y]]
 ; CHECK-NEXT:    ret i32 [[COND]]
 ;
   %xor = xor i32 %x, %y

goldsteinn · 2024-04-01T19:11:08Z

llvm/test/Transforms/InstCombine/icmp-or.ll

@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]]


We end up simplifying this entire thing on the next iteration.

Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?

We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.

In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.

Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?

I'm 100% sure what is unlocked.
It could also be we are hitting some analysis pattern in the new form.

We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.

In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.

Kk

Yeah its the creation of new one use. I added a comment explaining.
Think handling in handleUseCountDecrement() requires pretty deep adding. Doubt its worth it. Esp when we run InstCombine multiple times in the real pipeline.

goldsteinn · 2024-04-01T19:11:32Z

llvm/test/Transforms/InstCombine/select.ll

@@ -3698,7 +3698,8 @@ exit:
 define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) {
 ; CHECK-LABEL: @src_select_xxory_eq0_xorxy_y(
 ; CHECK-NEXT:    [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[Y]]
+; CHECK-NEXT:    [[XOR:%.*]] = select i1 [[XOR0]], i32 [[X]], i32 0
+; CHECK-NEXT:    [[COND:%.*]] = xor i32 [[XOR]], [[Y]]


This should be cleaned up by: #73362

Nevermind, had to cleaned up with bespoke code.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

PR Link: llvm/llvm-project#87275

nikic

Can you please move the foldSelectValueEquivalence() changes into a separate PR (to land first)?

nikic · 2024-04-10T06:44:25Z

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+    // would lead to an infinite replacement cycle.
+    // If we will be able to evaluate f(Y) to a constant, we can allow undef,
+    // otherwise Y cannot be undef as we might pick different values for undef
+    // in the icmp and in f(Y). Additionally


Cut off sentence.

goldsteinn · 2024-04-10T17:13:48Z

Can you please move the foldSelectValueEquivalence() changes into a separate PR (to land first)?

Done, see: #88298

goldsteinn · 2024-06-04T16:35:19Z

ping

goldsteinn · 2024-06-04T16:35:39Z

ping

NB: this depends on #88298

Two folds unlocked: `(icmp eq/ne (xor x, C0), C1)` -> `(icmp eq/ne x, C2)` `(icmp eq/ne (xor x, y), 0)` -> `(icmp eq/ne x, y)` This fixes regressions assosiated with llvm#87180

goldsteinn · 2024-06-07T04:18:09Z

Rebased, should be all green w/ the selectequivilence patch in.

nikic

LGTM

nikic · 2024-06-07T07:41:52Z

llvm/test/Transforms/InstCombine/icmp-or.ll

@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]]


Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?

We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.

In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.

PR Link: llvm/llvm-project#87275

dtcxzyw

LGTM.

goldsteinn requested a review from nikic as a code owner April 1, 2024 19:09

llvmbot added the llvm:transforms label Apr 1, 2024

goldsteinn changed the title ~~goldsteinn/icmp eq xor multiuse~~ [InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse Apr 1, 2024

goldsteinn changed the title ~~[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse~~ [InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse Apr 1, 2024

goldsteinn requested a review from dtcxzyw April 1, 2024 19:10

goldsteinn commented Apr 1, 2024

View reviewed changes

goldsteinn force-pushed the goldsteinn/icmp-eq-xor-multiuse branch 2 times, most recently from 32a4412 to a0cf8df Compare April 4, 2024 05:00

dtcxzyw reviewed Apr 4, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp Outdated Show resolved Hide resolved

goldsteinn force-pushed the goldsteinn/icmp-eq-xor-multiuse branch from a0cf8df to 41ce9ab Compare April 4, 2024 19:29

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Apr 4, 2024

pre-commit: test PR87275

17e8f5c

PR Link: llvm/llvm-project#87275

dtcxzyw mentioned this pull request Apr 4, 2024

pre-commit: test PR87275 dtcxzyw/llvm-opt-benchmark#479

Closed

nikic reviewed Apr 10, 2024

View reviewed changes

goldsteinn mentioned this pull request Apr 10, 2024

[InstCombine] Improve coverage of foldSelectValueEquivalence #88298

Closed

goldsteinn added 2 commits June 6, 2024 20:06

[InstCombine] Add tests for folding (icmp eq/ne (xor x, C0), C1); NFC

8f9dcce

[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse

7e70922

Two folds unlocked: `(icmp eq/ne (xor x, C0), C1)` -> `(icmp eq/ne x, C2)` `(icmp eq/ne (xor x, y), 0)` -> `(icmp eq/ne x, y)` This fixes regressions assosiated with llvm#87180

goldsteinn force-pushed the goldsteinn/icmp-eq-xor-multiuse branch from 41ce9ab to 7e70922 Compare June 7, 2024 04:15

nikic approved these changes Jun 7, 2024

View reviewed changes

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jun 7, 2024

pre-commit: test PR87275

3c79b51

PR Link: llvm/llvm-project#87275

dtcxzyw mentioned this pull request Jun 7, 2024

pre-commit: test PR87275 (New) dtcxzyw/llvm-opt-benchmark#668

Open

dtcxzyw approved these changes Jun 7, 2024

View reviewed changes

goldsteinn closed this in 166c184 Jun 7, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Fold `(icmp eq/ne (xor x, y), C1)` even if multiuse #87275

[InstCombine] Fold `(icmp eq/ne (xor x, y), C1)` even if multiuse #87275

goldsteinn commented Apr 1, 2024 •

edited

llvmbot commented Apr 1, 2024

goldsteinn Apr 1, 2024

nikic Jun 7, 2024

goldsteinn Jun 7, 2024

goldsteinn Jun 7, 2024

goldsteinn Apr 1, 2024

goldsteinn Apr 4, 2024

nikic left a comment

nikic Apr 10, 2024

goldsteinn commented Apr 10, 2024

goldsteinn commented Jun 4, 2024

goldsteinn commented Jun 4, 2024

goldsteinn commented Jun 7, 2024

nikic left a comment

nikic Jun 7, 2024

dtcxzyw left a comment

[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse #87275

[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse #87275

Conversation

goldsteinn commented Apr 1, 2024 • edited

llvmbot commented Apr 1, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

nikic left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

goldsteinn commented Apr 10, 2024

goldsteinn commented Jun 4, 2024

goldsteinn commented Jun 4, 2024

goldsteinn commented Jun 7, 2024

nikic left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dtcxzyw left a comment

Choose a reason for hiding this comment

[InstCombine] Fold `(icmp eq/ne (xor x, y), C1)` even if multiuse #87275

[InstCombine] Fold `(icmp eq/ne (xor x, y), C1)` even if multiuse #87275

goldsteinn commented Apr 1, 2024 •

edited