[SDAG] fold bitwise logic with shifted operands

LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z https://alive2.llvm.org/ce/z/QmR9rR This is a reassociation + factoring fold. The common shift operation is moved after a bitwise logic op on 2 input operands. We get simpler cases of these patterns in IR, but I suspect we would miss all of these exact tests in IR too. We also handle the simpler form of this plus several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands(). This is a partial implementation of a transform suggested in D111530 (only handles 'or' bitwise logic as a first step - need to stamp out more tests for other opcodes). Several of the same tests added for D111530 are altered here (but not fully optimized). I'm not sure yet if this would help/hinder that patch, but this should be an improvement for all tests added with ecf606c since it removes a shift operation in those examples. Differential Revision: https://reviews.llvm.org/D120516
llvm · Feb 27, 2022 · acb96ff · acb96ff
1 parent beb92af
commit acb96ff
Show file tree

Hide file tree

Showing 7 changed files with 231 additions and 246 deletions.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6696,6 +6696,52 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
   return SDValue();
 }
 
+/// Given a bitwise logic operation N with a matching bitwise logic operand,
+/// fold a pattern where 2 of the source operands are identically shifted
+/// values. For example:
+/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
+static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
+                                 SelectionDAG &DAG) {
+  // TODO: This should be extended to allow AND/XOR.
+  assert(N->getOpcode() == ISD::OR && "Expected bitwise logic operation");
+
+  if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
+    return SDValue();
+
+  // Match another bitwise logic op and a shift.
+  unsigned LogicOpcode = N->getOpcode();
+  unsigned ShiftOpcode = ShiftOp.getOpcode();
+  if (LogicOp.getOpcode() != LogicOpcode ||
+      !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
+        ShiftOpcode == ISD::SRA))
+    return SDValue();
+
+  // Match another shift op inside the first logic operand. Handle both commuted
+  // possibilities.
+  // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+  // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+  SDValue X1 = ShiftOp.getOperand(0);
+  SDValue Y = ShiftOp.getOperand(1);
+  SDValue X0, Z;
+  if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
+      LogicOp.getOperand(0).getOperand(1) == Y) {
+    X0 = LogicOp.getOperand(0).getOperand(0);
+    Z = LogicOp.getOperand(1);
+  } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
+             LogicOp.getOperand(1).getOperand(1) == Y) {
+    X0 = LogicOp.getOperand(1).getOperand(0);
+    Z = LogicOp.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
+  SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
+  return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
+}
+
 /// OR combines for which the commuted variant will be tried as well.
 static SDValue visitORCommutative(
     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
@@ -6710,6 +6756,9 @@ static SDValue visitORCommutative(
       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
   }
 
+  if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+    return R;
+
   auto peekThroughZext = [](SDValue V) {
     if (V->getOpcode() == ISD::ZERO_EXTEND)
       return V->getOperand(0);

diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
@@ -12,8 +12,8 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    adds x0, x0, #1
 ; CHECK-NEXT:    adcs x1, x1, xzr
-; CHECK-NEXT:    extr x8, x1, x0, #60
-; CHECK-NEXT:    orr x8, x8, x1, lsr #60
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    extr x8, x1, x8, #60
 ; CHECK-NEXT:    cbnz x8, .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -32,8 +32,8 @@ exit:
 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_eq_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #17
-; CHECK-NEXT:    orr x8, x8, x1, lsr #17
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    extr x8, x1, x8, #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -45,8 +45,8 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_ne_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #17
-; CHECK-NEXT:    orr x8, x8, x1, lsr #17
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    extr x8, x1, x8, #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -58,8 +58,8 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_eq_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #47
-; CHECK-NEXT:    orr x8, x8, x0, lsl #17
+; CHECK-NEXT:    orr x8, x1, x0
+; CHECK-NEXT:    extr x8, x8, x0, #47
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -71,8 +71,8 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #47
-; CHECK-NEXT:    orr x8, x8, x0, lsl #17
+; CHECK-NEXT:    orr x8, x1, x0
+; CHECK-NEXT:    extr x8, x8, x0, #47
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -106,8 +106,8 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x0, x1, #47
-; CHECK-NEXT:    orr x8, x8, x1, lsl #17
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    extr x8, x8, x1, #47
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -142,12 +142,12 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
 define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero_i256:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x3, x2, #47
-; CHECK-NEXT:    extr x9, x2, x1, #47
+; CHECK-NEXT:    orr x8, x2, x0
+; CHECK-NEXT:    extr x9, x3, x2, #47
 ; CHECK-NEXT:    extr x10, x1, x0, #47
-; CHECK-NEXT:    orr x9, x9, x0, lsl #17
-; CHECK-NEXT:    orr x8, x10, x8
-; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    extr x8, x8, x1, #47
+; CHECK-NEXT:    orr x9, x10, x9
+; CHECK-NEXT:    orr x8, x8, x9
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret

diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -4,13 +4,11 @@
 define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_lshr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    orr w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    and w9, w1, #0xff
+; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
-; CHECK-NEXT:    lsr w9, w9, w2
-; CHECK-NEXT:    orr w8, w8, w3
-; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i8 %x0, %y
   %sh2 = lshr i8 %x1, %y
@@ -22,10 +20,9 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: or_lshr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, w2
-; CHECK-NEXT:    lsr w9, w1, w2
-; CHECK-NEXT:    orr w8, w3, w8
-; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    lsr w8, w8, w2
+; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = lshr i32 %x0, %y
   %sh2 = lshr i32 %x1, %y
@@ -38,10 +35,9 @@ define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <
 ; CHECK-LABEL: or_lshr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.8h, v2.8h
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <8 x i16> %x0, %y
   %sh2 = lshr <8 x i16> %x1, %y
@@ -54,10 +50,9 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 ; CHECK-LABEL: or_lshr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.2d, v2.2d
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
-; CHECK-NEXT:    orr v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = lshr <2 x i64> %x0, %y
   %sh2 = lshr <2 x i64> %x1, %y
@@ -69,13 +64,11 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
 define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: or_ashr_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    orr w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    sxth w9, w1
+; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
-; CHECK-NEXT:    asr w9, w9, w2
-; CHECK-NEXT:    orr w8, w8, w3
-; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i16 %x0, %y
   %sh2 = ashr i16 %x1, %y
@@ -87,10 +80,9 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK-LABEL: or_ashr_commute1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    asr x8, x0, x2
-; CHECK-NEXT:    asr x9, x1, x2
-; CHECK-NEXT:    orr x8, x3, x8
-; CHECK-NEXT:    orr x0, x8, x9
+; CHECK-NEXT:    orr x8, x0, x1
+; CHECK-NEXT:    asr x8, x8, x2
+; CHECK-NEXT:    orr x0, x8, x3
 ; CHECK-NEXT:    ret
   %sh1 = ashr i64 %x0, %y
   %sh2 = ashr i64 %x1, %y
@@ -103,10 +95,9 @@ define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <
 ; CHECK-LABEL: or_ashr_commute2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.4s, v2.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sshl v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <4 x i32> %x0, %y
   %sh2 = ashr <4 x i32> %x1, %y
@@ -119,10 +110,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
 ; CHECK-LABEL: or_ashr_commute3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg v2.16b, v2.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sshl v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    sshl v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    orr v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = ashr <16 x i8> %x0, %y
   %sh2 = ashr <16 x i8> %x1, %y
@@ -134,10 +124,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
 define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 ; CHECK-LABEL: or_shl_commute0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    orr w8, w8, w3
-; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i32 %x0, %y
   %sh2 = shl i32 %x1, %y
@@ -149,11 +138,10 @@ define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
 define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_shl_commute1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, w1
 ; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-NEXT:    lsl w8, w0, w2
-; CHECK-NEXT:    lsl w9, w1, w2
-; CHECK-NEXT:    orr w8, w3, w8
-; CHECK-NEXT:    orr w0, w8, w9
+; CHECK-NEXT:    lsl w8, w8, w2
+; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
   %sh1 = shl i8 %x0, %y
   %sh2 = shl i8 %x1, %y
@@ -165,10 +153,9 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: or_shl_commute2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.2d, v0.2d, v2.2d
-; CHECK-NEXT:    ushl v1.2d, v1.2d, v2.2d
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <2 x i64> %x0, %y
   %sh2 = shl <2 x i64> %x1, %y
@@ -180,10 +167,9 @@ define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2
 define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
 ; CHECK-LABEL: or_shl_commute3:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ushl v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; CHECK-NEXT:    orr v0.16b, v3.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-NEXT:    ret
   %sh1 = shl <8 x i16> %x0, %y
   %sh2 = shl <8 x i16> %x1, %y
@@ -192,6 +178,8 @@ define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8
   ret <8 x i16> %r
 }
 
+; negative test - mismatched shift opcodes
+
 define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK-LABEL: or_mix_shr:
 ; CHECK:       // %bb.0:
@@ -207,6 +195,8 @@ define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
   ret i64 %r
 }
 
+; negative test - mixed shift amounts
+
 define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
 ; CHECK-LABEL: or_lshr_mix_shift_amount:
 ; CHECK:       // %bb.0:
@@ -222,6 +212,8 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
   ret i64 %r
 }
 
+; negative test - mismatched logic opcodes
+
 define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
 ; CHECK-LABEL: mix_logic_lshr:
 ; CHECK:       // %bb.0: