Skip to content

Commit

Permalink
[SDAG] fold bitwise logic with shifted operands
Browse files Browse the repository at this point in the history
LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z

https://alive2.llvm.org/ce/z/QmR9rR

This is a reassociation + factoring fold. The common shift operation is moved
after a bitwise logic op on 2 input operands.
We get simpler cases of these patterns in IR, but I suspect we would miss all
of these exact tests in IR too. We also handle the simpler form of this plus
several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands().

This is a partial implementation of a transform suggested in D111530
(only handles 'or' bitwise logic as a first step - need to stamp out more
tests for other opcodes).
Several of the same tests added for D111530 are altered here (but not
fully optimized). I'm not sure yet if this would help/hinder that patch,
but this should be an improvement for all tests added with ecf606c
since it removes a shift operation in those examples.

Differential Revision: https://reviews.llvm.org/D120516
  • Loading branch information
rotateright committed Feb 27, 2022
1 parent beb92af commit acb96ff
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 246 deletions.
49 changes: 49 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -6696,6 +6696,52 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}

/// Given a bitwise logic operation N with a matching bitwise logic operand,
/// fold a pattern where 2 of the source operands are identically shifted
/// values. For example:
/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
SelectionDAG &DAG) {
// TODO: This should be extended to allow AND/XOR.
assert(N->getOpcode() == ISD::OR && "Expected bitwise logic operation");

if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
return SDValue();

// Match another bitwise logic op and a shift.
unsigned LogicOpcode = N->getOpcode();
unsigned ShiftOpcode = ShiftOp.getOpcode();
if (LogicOp.getOpcode() != LogicOpcode ||
!(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
ShiftOpcode == ISD::SRA))
return SDValue();

// Match another shift op inside the first logic operand. Handle both commuted
// possibilities.
// LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
// LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
SDValue X1 = ShiftOp.getOperand(0);
SDValue Y = ShiftOp.getOperand(1);
SDValue X0, Z;
if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
LogicOp.getOperand(0).getOperand(1) == Y) {
X0 = LogicOp.getOperand(0).getOperand(0);
Z = LogicOp.getOperand(1);
} else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
LogicOp.getOperand(1).getOperand(1) == Y) {
X0 = LogicOp.getOperand(1).getOperand(0);
Z = LogicOp.getOperand(0);
} else {
return SDValue();
}

EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
}

/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(
SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
Expand All @@ -6710,6 +6756,9 @@ static SDValue visitORCommutative(
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}

if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;

auto peekThroughZext = [](SDValue V) {
if (V->getOpcode() == ISD::ZERO_EXTEND)
return V->getOperand(0);
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
Expand Up @@ -12,8 +12,8 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds x0, x0, #1
; CHECK-NEXT: adcs x1, x1, xzr
; CHECK-NEXT: extr x8, x1, x0, #60
; CHECK-NEXT: orr x8, x8, x1, lsr #60
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #60
; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
Expand All @@ -32,8 +32,8 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #17
; CHECK-NEXT: orr x8, x8, x1, lsr #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
Expand All @@ -45,8 +45,8 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #17
; CHECK-NEXT: orr x8, x8, x1, lsr #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
Expand All @@ -58,8 +58,8 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #47
; CHECK-NEXT: orr x8, x8, x0, lsl #17
; CHECK-NEXT: orr x8, x1, x0
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
Expand All @@ -71,8 +71,8 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #47
; CHECK-NEXT: orr x8, x8, x0, lsl #17
; CHECK-NEXT: orr x8, x1, x0
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
Expand Down Expand Up @@ -106,8 +106,8 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x0, x1, #47
; CHECK-NEXT: orr x8, x8, x1, lsl #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x8, x1, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
Expand Down Expand Up @@ -142,12 +142,12 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero_i256:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x3, x2, #47
; CHECK-NEXT: extr x9, x2, x1, #47
; CHECK-NEXT: orr x8, x2, x0
; CHECK-NEXT: extr x9, x3, x2, #47
; CHECK-NEXT: extr x10, x1, x0, #47
; CHECK-NEXT: orr x9, x9, x0, lsl #17
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: extr x8, x8, x1, #47
; CHECK-NEXT: orr x9, x10, x9
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
Expand Down
74 changes: 33 additions & 41 deletions llvm/test/CodeGen/AArch64/logic-shift.ll
Expand Up @@ -4,13 +4,11 @@
define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_lshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: and w9, w1, #0xff
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: lsr w9, w9, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i8 %x0, %y
%sh2 = lshr i8 %x1, %y
Expand All @@ -22,10 +20,9 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_lshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, w2
; CHECK-NEXT: lsr w9, w1, w2
; CHECK-NEXT: orr w8, w3, w8
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
Expand All @@ -38,10 +35,9 @@ define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <
; CHECK-LABEL: or_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
%sh2 = lshr <8 x i16> %x1, %y
Expand All @@ -54,10 +50,9 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
; CHECK-LABEL: or_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
Expand All @@ -69,13 +64,11 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK-LABEL: or_ashr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: sxth w9, w1
; CHECK-NEXT: sxth w8, w8
; CHECK-NEXT: asr w8, w8, w2
; CHECK-NEXT: asr w9, w9, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = ashr i16 %x0, %y
%sh2 = ashr i16 %x1, %y
Expand All @@ -87,10 +80,9 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_ashr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, x2
; CHECK-NEXT: asr x9, x1, x2
; CHECK-NEXT: orr x8, x3, x8
; CHECK-NEXT: orr x0, x8, x9
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: asr x8, x8, x2
; CHECK-NEXT: orr x0, x8, x3
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
Expand All @@ -103,10 +95,9 @@ define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <
; CHECK-LABEL: or_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: sshl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
Expand All @@ -119,10 +110,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
; CHECK-LABEL: or_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sshl v1.16b, v1.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
Expand All @@ -134,10 +124,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_shl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w0, w2
; CHECK-NEXT: lsl w9, w1, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i32 %x0, %y
%sh2 = shl i32 %x1, %y
Expand All @@ -149,11 +138,10 @@ define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_shl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: lsl w8, w0, w2
; CHECK-NEXT: lsl w9, w1, w2
; CHECK-NEXT: orr w8, w3, w8
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i8 %x0, %y
%sh2 = shl i8 %x1, %y
Expand All @@ -165,10 +153,9 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_shl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
Expand All @@ -180,10 +167,9 @@ define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2
define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_shl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <8 x i16> %x0, %y
%sh2 = shl <8 x i16> %x1, %y
Expand All @@ -192,6 +178,8 @@ define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8
ret <8 x i16> %r
}

; negative test - mismatched shift opcodes

define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_mix_shr:
; CHECK: // %bb.0:
Expand All @@ -207,6 +195,8 @@ define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
ret i64 %r
}

; negative test - mixed shift amounts

define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: or_lshr_mix_shift_amount:
; CHECK: // %bb.0:
Expand All @@ -222,6 +212,8 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
ret i64 %r
}

; negative test - mismatched logic opcodes

define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_lshr:
; CHECK: // %bb.0:
Expand Down

0 comments on commit acb96ff

Please sign in to comment.