[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

HaohaiWen · 2023-10-07T09:28:27Z

Current implementation of matchFunnelShift only allows opposite shift pattern. Refactor it to allow more pattern.

llvmbot · 2023-10-07T09:29:34Z

@llvm/pr-subscribers-llvm-transforms

Changes

Current implementation of matchFunnelShift only allows opposite shift pattern. Refactor it to allow more pattern.

Full diff: https://github.com/llvm/llvm-project/pull/68474.diff

1 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+93-79)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cbdab3e9c5fb91d..b04e070fd19d7d1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2732,100 +2732,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
   // rotate matching code under visitSelect and visitTrunc?
   unsigned Width = Or.getType()->getScalarSizeInBits();
 
-  // First, find an or'd pair of opposite shifts:
-  // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
-  BinaryOperator *Or0, *Or1;
-  if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
-      !match(Or.getOperand(1), m_BinOp(Or1)))
-    return nullptr;
-
-  Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
-  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
-      !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
-      Or0->getOpcode() == Or1->getOpcode())
+  Instruction *Or0, *Or1;
+  if (!match(Or.getOperand(0), m_Instruction(Or0)) ||
+      !match(Or.getOperand(1), m_Instruction(Or1)))
     return nullptr;
 
-  // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
-  if (Or0->getOpcode() == BinaryOperator::LShr) {
-    std::swap(Or0, Or1);
-    std::swap(ShVal0, ShVal1);
-    std::swap(ShAmt0, ShAmt1);
-  }
-  assert(Or0->getOpcode() == BinaryOperator::Shl &&
-         Or1->getOpcode() == BinaryOperator::LShr &&
-         "Illegal or(shift,shift) pair");
-
-  // Match the shift amount operands for a funnel shift pattern. This always
-  // matches a subtraction on the R operand.
-  auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
-    // Check for constant shift amounts that sum to the bitwidth.
-    const APInt *LI, *RI;
-    if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
-      if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
-        return ConstantInt::get(L->getType(), *LI);
-
-    Constant *LC, *RC;
-    if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
-        match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
-        match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
-        match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
-      return ConstantExpr::mergeUndefsWith(LC, RC);
-
-    // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
-    // We limit this to X < Width in case the backend re-expands the intrinsic,
-    // and has to reintroduce a shift modulo operation (InstCombine might remove
-    // it after this fold). This still doesn't guarantee that the final codegen
-    // will match this original pattern.
-    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
-      KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
-      return KnownL.getMaxValue().ult(Width) ? L : nullptr;
-    }
+  bool IsFshl = true; // Sub on LSHR.
+  SmallVector<Value *, 3> FShiftArgs;
 
-    // For non-constant cases, the following patterns currently only work for
-    // rotation patterns.
-    // TODO: Add general funnel-shift compatible patterns.
-    if (ShVal0 != ShVal1)
+  // First, find an or'd pair of opposite shifts:
+  // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
+  if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
+    Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
+    if (!match(Or0,
+               m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
+        !match(Or1,
+               m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
+        Or0->getOpcode() == Or1->getOpcode())
       return nullptr;
 
-    // For non-constant cases we don't support non-pow2 shift masks.
-    // TODO: Is it worth matching urem as well?
-    if (!isPowerOf2_32(Width))
-      return nullptr;
+    // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
+    if (Or0->getOpcode() == BinaryOperator::LShr) {
+      std::swap(Or0, Or1);
+      std::swap(ShVal0, ShVal1);
+      std::swap(ShAmt0, ShAmt1);
+    }
+    assert(Or0->getOpcode() == BinaryOperator::Shl &&
+           Or1->getOpcode() == BinaryOperator::LShr &&
+           "Illegal or(shift,shift) pair");
+
+    // Match the shift amount operands for a funnel shift pattern. This always
+    // matches a subtraction on the R operand.
+    auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
+      // Check for constant shift amounts that sum to the bitwidth.
+      const APInt *LI, *RI;
+      if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
+        if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
+          return ConstantInt::get(L->getType(), *LI);
+
+      Constant *LC, *RC;
+      if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
+          match(L,
+                m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+          match(R,
+                m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+          match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
+        return ConstantExpr::mergeUndefsWith(LC, RC);
+
+      // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
+      // We limit this to X < Width in case the backend re-expands the
+      // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
+      // might remove it after this fold). This still doesn't guarantee that the
+      // final codegen will match this original pattern.
+      if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+        KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
+        return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+      }
 
-    // The shift amount may be masked with negation:
-    // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
-    Value *X;
-    unsigned Mask = Width - 1;
-    if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
-        match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
-      return X;
+      // For non-constant cases, the following patterns currently only work for
+      // rotation patterns.
+      // TODO: Add general funnel-shift compatible patterns.
+      if (ShVal0 != ShVal1)
+        return nullptr;
 
-    // Similar to above, but the shift amount may be extended after masking,
-    // so return the extended value as the parameter for the intrinsic.
-    if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
-        match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
-                       m_SpecificInt(Mask))))
-      return L;
+      // For non-constant cases we don't support non-pow2 shift masks.
+      // TODO: Is it worth matching urem as well?
+      if (!isPowerOf2_32(Width))
+        return nullptr;
 
-    if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
-        match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
-      return L;
+      // The shift amount may be masked with negation:
+      // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+      Value *X;
+      unsigned Mask = Width - 1;
+      if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+          match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+        return X;
+
+      // Similar to above, but the shift amount may be extended after masking,
+      // so return the extended value as the parameter for the intrinsic.
+      if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+          match(R,
+                m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
+                      m_SpecificInt(Mask))))
+        return L;
+
+      if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+          match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
+        return L;
 
-    return nullptr;
-  };
+      return nullptr;
+    };
 
-  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
-  bool IsFshl = true; // Sub on LSHR.
-  if (!ShAmt) {
-    ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
-    IsFshl = false; // Sub on SHL.
+    Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+    if (!ShAmt) {
+      ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+      IsFshl = false; // Sub on SHL.
+    }
+    if (!ShAmt)
+      return nullptr;
+
+    FShiftArgs = {ShVal0, ShVal1, ShAmt};
   }
-  if (!ShAmt)
+
+  if (FShiftArgs.empty())
     return nullptr;
 
   Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
   Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
-  return CallInst::Create(F, {ShVal0, ShVal1, ShAmt});
+  return CallInst::Create(F, FShiftArgs);
 }
 
 /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.

nikic

Missing patch description, alive2 proofs and tests.

HaohaiWen · 2023-10-07T09:38:57Z

Missing patch description, alive2 proofs and tests.

This is a NFC patch. No new tests are needed.

nikic · 2023-10-07T09:45:30Z

This is a NFC patch. No new tests are needed.

Sorry, I missed the NFC in the title. Here is a diff without whitespace changes that shows that this is mostly just an indentation change: https://github.com/llvm/llvm-project/pull/68474/files?diff=split&w=1

Please put up the non-NFC patch you want to base on this. It's hard to say whether this makes sense without the dependent patch.

HaohaiWen · 2023-10-08T03:16:49Z

This is a NFC patch. No new tests are needed.

Sorry, I missed the NFC in the title. Here is a diff without whitespace changes that shows that this is mostly just an indentation change: https://github.com/llvm/llvm-project/pull/68474/files?diff=split&w=1

Please put up the non-NFC patch you want to base on this. It's hard to say whether this makes sense without the dependent patch.

non-NFC patch: 68ab662

goldsteinn · 2023-10-13T04:54:15Z

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

-    if (ShVal0 != ShVal1)
+  // First, find an or'd pair of opposite shifts:
+  // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
+  if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {


What is the rationale for this check? Isn't it covered by the matching of logical shifts?

But either way, if it is indeed useful, can you invert it and early return to keep nested scopes down.

We need to match either
or (shl %x, 16) (shr %y, 16)
or (shl %x, 16) (zext %y)

if (isa(Or0) && isa(Or1)) is used to filter in first case and that is the original code matched.
68ab662 filters in second case.
If we want to further reduce nested scopes. We may need to split this function to several static functions to match each case, but this requires to pass some args and may not be concise.

This code seems new? Not that its wrong or anything.

But either way don't understand why it can't be an early return. If we don't enter the if FShiftArgs will be null and we will return anyways.

This code seems new? Not that its wrong or anything.

I added this if statement in this patch.

It can be early returned in this patch, but I'd like to add another else if on 5b3b1bb. If I removed this if statement, then I need to add the if back and indent the code in the if statement on #68502. This make the diff not clear.

Okay fair enough.

goldsteinn · 2023-10-15T18:24:46Z

LGTM. @nikic you still want changes?

HaohaiWen · 2023-10-17T00:36:00Z

If no objection, I'll merge it tomorrow.

goldsteinn · 2023-10-19T03:44:34Z

Rebase you other patch then I can review.

HaohaiWen · 2023-10-19T06:07:47Z

Rebase you other patch then I can review.

Already rebased. Please take a look #68502

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC)

5b3b1bb

Current implementation of matchFunnelShift only allows opposite shift pattern. Refactor it to allow more pattern.

HaohaiWen requested a review from nikic as a code owner October 7, 2023 09:28

llvmbot added the llvm:transforms label Oct 7, 2023

HaohaiWen requested a review from williamweixiao October 7, 2023 09:30

nikic requested changes Oct 7, 2023

View reviewed changes

Merge remote-tracking branch 'origin/main' into fshl

3d85fa3

HaohaiWen mentioned this pull request Oct 13, 2023

[InstCombine] Convert or concat to fshl if opposite or concat exists #68502

Merged

HaohaiWen requested a review from goldsteinn October 13, 2023 01:07

goldsteinn reviewed Oct 13, 2023

View reviewed changes

HaohaiWen merged commit 8ff3e4f into llvm:main Oct 19, 2023
3 checks passed

HaohaiWen deleted the fshl branch October 19, 2023 01:06

madhur13490 mentioned this pull request Oct 20, 2023

Revert commit ba8565fbcb975e2d067ce3ae5a7dbaae4953edd3 madhur13490/llvm-project#3

Closed

banach-space mentioned this pull request Oct 24, 2023

[mlir][vector] Add scalable vectors to tests for vector.contract #70039

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

HaohaiWen commented Oct 7, 2023

llvmbot commented Oct 7, 2023

nikic left a comment

HaohaiWen commented Oct 7, 2023

nikic commented Oct 7, 2023

HaohaiWen commented Oct 8, 2023

goldsteinn Oct 13, 2023

HaohaiWen Oct 13, 2023

goldsteinn Oct 13, 2023

HaohaiWen Oct 15, 2023

goldsteinn Oct 15, 2023

goldsteinn commented Oct 15, 2023

HaohaiWen commented Oct 17, 2023

goldsteinn commented Oct 19, 2023

HaohaiWen commented Oct 19, 2023

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

Conversation

HaohaiWen commented Oct 7, 2023

llvmbot commented Oct 7, 2023

nikic left a comment

Choose a reason for hiding this comment

HaohaiWen commented Oct 7, 2023

nikic commented Oct 7, 2023

HaohaiWen commented Oct 8, 2023

goldsteinn Oct 13, 2023

Choose a reason for hiding this comment

HaohaiWen Oct 13, 2023

Choose a reason for hiding this comment

goldsteinn Oct 13, 2023

Choose a reason for hiding this comment

HaohaiWen Oct 15, 2023

Choose a reason for hiding this comment

goldsteinn Oct 15, 2023

Choose a reason for hiding this comment

goldsteinn commented Oct 15, 2023

HaohaiWen commented Oct 17, 2023

goldsteinn commented Oct 19, 2023

HaohaiWen commented Oct 19, 2023