[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) #88193

AtariDreams · 2024-04-09T20:50:16Z

Proofs:
https://alive2.llvm.org/ce/z/N9dRzP
https://alive2.llvm.org/ce/z/Xrpc-Y
https://alive2.llvm.org/ce/z/BagBM6
https://alive2.llvm.org/ce/z/k2dtYZ

llvmbot · 2024-04-09T20:50:49Z

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

I also added mul nsw/nuw 3, div 2 since this was the canonical version of ((x << 1) + x) / 2, which is a specific expression which canonicalization causes the InstCombine to miss it.

Proof: https://alive2.llvm.org/ce/z/kDVTiL

Full diff: https://github.com/llvm/llvm-project/pull/88193.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp (+50-2)
(modified) llvm/test/Transforms/InstCombine/lshr.ll (+56)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 95aa2119e2d88b..1ae98be7f46d21 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1267,6 +1267,19 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
       match(Op1, m_SpecificIntAllowUndef(BitWidth - 1)))
     return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty);
 
+  // Special Case:
+  // if both the add and the shift are nuw, we can omit the AND entirely
+  // ((X << Y) nuw + Z nuw) >>u Z --> (X + (Y >>u Z))
+  Value *Y;
+  if (match(Op0, m_OneUse(m_c_NUWAdd((m_NUWShl(m_Value(X), m_Specific(Op1))),
+                                     m_Value(Y))))) {
+    Value *NewLshr = Builder.CreateLShr(Y, Op1, "", I.isExact());
+    auto *newAdd = BinaryOperator::CreateNUWAdd(NewLshr, X);
+    if (auto *Op0Bin = cast<BinaryOperator>(Op0))
+      newAdd->setHasNoSignedWrap(Op0Bin->hasNoSignedWrap());
+    return newAdd;
+  }
+
   if (match(Op1, m_APInt(C))) {
     unsigned ShAmtC = C->getZExtValue();
     auto *II = dyn_cast<IntrinsicInst>(Op0);
@@ -1283,7 +1296,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
       return new ZExtInst(Cmp, Ty);
     }
 
-    Value *X;
     const APInt *C1;
     if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
       if (C1->ult(ShAmtC)) {
@@ -1328,7 +1340,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
     // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
     // TODO: Consolidate with the more general transform that starts from shl
     //       (the shifts are in the opposite order).
-    Value *Y;
+
     if (match(Op0,
               m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))),
                                m_Value(Y))))) {
@@ -1450,9 +1462,25 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           NewMul->setHasNoSignedWrap(true);
           return NewMul;
         }
+
+        // Special case:
+        // lshr nuw (mul (X, 3), 1) -> add nuw nsw (X, lshr(X, 1)
+        if (ShAmtC == 1 && MulC->getZExtValue() == 3) {
+          auto *NewAdd = BinaryOperator::CreateNUWAdd(
+              X,
+              Builder.CreateLShr(X, ConstantInt::get(Ty, 1), "", I.isExact()));
+          NewAdd->setHasNoSignedWrap(true);
+          return NewAdd;
+        }
       }
     }
 
+    // // lshr nsw (mul (X, 3), 1) -> add nsw (X, lshr(X, 1)
+    if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_SpecificInt(3)))) &&
+        ShAmtC == 1)
+      return BinaryOperator::CreateNSWAdd(
+          X, Builder.CreateLShr(X, ConstantInt::get(Ty, 1), "", I.isExact()));
+
     // Try to narrow bswap.
     // In the case where the shift amount equals the bitwidth difference, the
     // shift is eliminated.
@@ -1656,6 +1684,26 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
       if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
         return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
     }
+
+    // Special case: ashr nuw (mul (X, 3), 1) -> add nuw nsw (X, lshr(X, 1)
+    if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_SpecificInt(3)))) &&
+        ShAmt == 1) {
+      Value *Shift;
+      if (auto *Op0Bin = cast<BinaryOperator>(Op0)) {
+        if (Op0Bin->hasNoUnsignedWrap())
+          // We can use lshr if the mul is nuw and nsw
+          Shift =
+              Builder.CreateLShr(X, ConstantInt::get(Ty, 1), "", I.isExact());
+        else
+          Shift =
+              Builder.CreateAShr(X, ConstantInt::get(Ty, 1), "", I.isExact());
+
+        auto *NewAdd = BinaryOperator::CreateNSWAdd(X, Shift);
+        NewAdd->setHasNoUnsignedWrap(Op0Bin->hasNoUnsignedWrap());
+
+        return NewAdd;
+      }
+    }
   }
 
   const SimplifyQuery Q = SQ.getWithInstruction(&I);
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 02c2bbc2819b8c..3124d05248c2eb 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -360,8 +360,64 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
   ret <3 x i14> %t
 }
 
+define i32 @mul_times_3_div_2 (i32 %x) {
+; CHECK-LABEL: @mul_times_3_div_2(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = mul nsw nuw i32 %x, 3
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
+  define i32 @shl_add_lshr (i32 %x, i32 %c, i32 %y) {
+; CHECK-LABEL: @shl_add_lshr(
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i32 [[TMP2:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %2 = shl nuw i32 %x, %c
+  %3 = add nsw nuw i32 %2, %y
+  %4 = lshr exact i32 %3, %c
+  ret i32 %4
+}
+
+  define i32 @ashr_mul_times_3_div_2 (i32 %0) {
+; CHECK-LABEL: @ashr_mul_times_3_div_2(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = mul nsw nuw i32 %0, 3
+  %3 = ashr i32 %2, 1
+  ret i32 %3
+}
+
+define i32 @ashr_mul_times_3_div_2_exact (i32 %0) {
+; CHECK-LABEL: @ashr_mul_times_3_div_2_exact(
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr exact i32 [[TMP2:%.*]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+  %2 = mul nsw i32 %0, 3
+  %3 = ashr exact i32 %2, 1
+  ret i32 %3
+}
+
 ; Negative test
 
+define i32 @mul_times_3_div_2_no_nsw (i32 %x) {
+; CHECK-LABEL: @mul_times_3_div_2_no_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %2 = mul i32 %x, 3
+  %3 = lshr i32 %2, 1
+  ret i32 %3
+}
+
 define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

AtariDreams · 2024-04-19T14:36:13Z

@nikic What do you think about this?

AtariDreams · 2024-04-21T12:52:39Z

@arsenm What do you think about this PR?

llvm/test/Transforms/InstCombine/lshr.ll

AtariDreams · 2024-04-21T22:12:07Z

@arsenm This is ready for review and merge! (I don't have commit perms)

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

AtariDreams · 2024-05-01T12:28:55Z

@arsenm I removed the constant specific stuff and moved a generalization of that to another PR.

AtariDreams · 2024-05-07T15:23:57Z

Okay it is resolved now, and I added new tests @dtcxzyw, I apologize for the inconvenience.

dtcxzyw · 2024-05-07T15:43:17Z

@AtariDreams It doesn't modify anything :( https://github.com/llvm/llvm-project/compare/c8214019d1da25bb8211107e36461f9c1e7c764a..cdabaae43b855a47611a3a02ab2882f1dcd222dd

Don't rebase your patch if there is no conflict to be resolved.
See https://llvm.org/docs/GitHub.html#rebasing-pull-requests-and-force-pushes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

dtcxzyw

LGTM.

Please don't force-push in this PR. The notifications flood my mailbox :(

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

llvm/test/Transforms/InstCombine/lshr.ll

…Y >>u Z) Proofs: https://alive2.llvm.org/ce/z/N9dRzP https://alive2.llvm.org/ce/z/Xrpc-Y https://alive2.llvm.org/ce/z/k2dtYZ

AtariDreams · 2024-05-07T19:13:38Z

@arsenm I do not have commit perms by the way.

…t (NFC) Fixed test for llvm#88193

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

It is inaccurate and needs to be corrected.

…t (NFC) (#91413) Fixed test for #88193

AtariDreams requested a review from nikic as a code owner April 9, 2024 20:50

llvmbot added the llvm:transforms label Apr 9, 2024

AtariDreams force-pushed the times3 branch from 60e9b96 to 8967469 Compare April 9, 2024 20:57

goldsteinn reviewed Apr 9, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Apr 9, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Outdated Show resolved Hide resolved

AtariDreams force-pushed the times3 branch 5 times, most recently from 0f47cfd to 906aa22 Compare April 11, 2024 01:37

AtariDreams requested a review from goldsteinn April 21, 2024 01:19

AtariDreams force-pushed the times3 branch from 906aa22 to 0fe7aa2 Compare April 21, 2024 01:20

AtariDreams force-pushed the times3 branch from 9faf626 to 59ca3c2 Compare April 21, 2024 13:04

arsenm added the llvm:instcombine label Apr 21, 2024

arsenm reviewed Apr 21, 2024

View reviewed changes

AtariDreams force-pushed the times3 branch 2 times, most recently from f15664c to 860242d Compare April 21, 2024 21:57

AtariDreams force-pushed the times3 branch from dd68760 to d5194aa Compare April 21, 2024 23:37

AtariDreams requested a review from arsenm April 22, 2024 00:22

AtariDreams force-pushed the times3 branch from d5194aa to fe6cf6f Compare April 22, 2024 14:17

arsenm reviewed Apr 25, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Outdated Show resolved Hide resolved

AtariDreams force-pushed the times3 branch from fe6cf6f to 1f244c9 Compare April 26, 2024 22:17

AtariDreams requested a review from arsenm April 26, 2024 22:18

AtariDreams force-pushed the times3 branch 2 times, most recently from dde583e to 118c473 Compare April 30, 2024 05:01

AtariDreams force-pushed the times3 branch from 0223aa0 to c821401 Compare May 7, 2024 15:22

AtariDreams force-pushed the times3 branch from c821401 to cdabaae Compare May 7, 2024 15:30

goldsteinn reviewed May 7, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Show resolved Hide resolved

goldsteinn reviewed May 7, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Show resolved Hide resolved

dtcxzyw approved these changes May 7, 2024

View reviewed changes

AtariDreams force-pushed the times3 branch from bab0230 to e7f1124 Compare May 7, 2024 15:59

AtariDreams added 2 commits May 7, 2024 12:34

[InstCombine] Pre-commit tests (NFC)

ae4ad96

[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (…

fb94d49

…Y >>u Z) Proofs: https://alive2.llvm.org/ce/z/N9dRzP https://alive2.llvm.org/ce/z/Xrpc-Y https://alive2.llvm.org/ce/z/k2dtYZ

AtariDreams force-pushed the times3 branch from e7f1124 to fb94d49 Compare May 7, 2024 16:34

arsenm approved these changes May 7, 2024

View reviewed changes

AtariDreams requested a review from goldsteinn May 7, 2024 19:15

arsenm merged commit 1e36c96 into llvm:main May 7, 2024
3 of 4 checks passed

AtariDreams deleted the times3 branch May 7, 2024 19:24

AtariDreams mentioned this pull request May 8, 2024

[InstCombine] Thwart complexity-based canonicalization in shl-add test (NFC) #91413

Merged

AtariDreams added a commit to AtariDreams/llvm-project that referenced this pull request May 8, 2024

[InstCombine] Thwart complexity-based canonicalization in shl-add tes…

fcb3c4c

…t (NFC) Fixed test for llvm#88193

dtcxzyw reviewed May 8, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp Show resolved Hide resolved

AtariDreams added a commit to AtariDreams/llvm-project that referenced this pull request May 8, 2024

[InstCombine] Fix comment from llvm#88193

2bf5a27

It is inaccurate and needs to be corrected.

AtariDreams added a commit to AtariDreams/llvm-project that referenced this pull request May 8, 2024

[InstCombine] Fix comment from llvm#88193 (NFC)

556c847

It is inaccurate and needs to be corrected.

nikic pushed a commit that referenced this pull request May 9, 2024

[InstCombine] Fix comment from #88193 (NFC) (#91427)

409ff97

It is inaccurate and needs to be corrected.

nikic pushed a commit that referenced this pull request May 9, 2024

[InstCombine] Thwart complexity-based canonicalization in shl-add tes…

ba5170f

…t (NFC) (#91413) Fixed test for #88193

nikic mentioned this pull request May 14, 2024

[InstCombine] Fold X * (2^N + 1) >> N -> X + X >> N, or directly to X if X >> N is 0 #90295

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) #88193

[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) #88193

AtariDreams commented Apr 9, 2024 •

edited

llvmbot commented Apr 9, 2024

AtariDreams commented Apr 19, 2024

AtariDreams commented Apr 21, 2024

AtariDreams commented Apr 21, 2024

AtariDreams commented May 1, 2024

AtariDreams commented May 7, 2024

dtcxzyw commented May 7, 2024

dtcxzyw left a comment

AtariDreams commented May 7, 2024

[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) #88193

[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) #88193

Conversation

AtariDreams commented Apr 9, 2024 • edited

llvmbot commented Apr 9, 2024

AtariDreams commented Apr 19, 2024

AtariDreams commented Apr 21, 2024

AtariDreams commented Apr 21, 2024

AtariDreams commented May 1, 2024

AtariDreams commented May 7, 2024

dtcxzyw commented May 7, 2024

dtcxzyw left a comment

Choose a reason for hiding this comment

AtariDreams commented May 7, 2024

AtariDreams commented Apr 9, 2024 •

edited