diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4f669f15f8ef3a..299ba51d606d61 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5950,6 +5950,53 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); } +/// Given a bitwise logic operation N with a matching bitwise logic operand, +/// fold a pattern where 2 of the source operands are identically shifted +/// values. For example: +/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z +static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, + SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR) + && "Expected bitwise logic operation"); + + if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse()) + return SDValue(); + + // Match another bitwise logic op and a shift. + unsigned ShiftOpcode = ShiftOp.getOpcode(); + if (LogicOp.getOpcode() != LogicOpcode || + !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL || + ShiftOpcode == ISD::SRA)) + return SDValue(); + + // Match another shift op inside the first logic operand. Handle both commuted + // possibilities. + // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + SDValue X1 = ShiftOp.getOperand(0); + SDValue Y = ShiftOp.getOperand(1); + SDValue X0, Z; + if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode && + LogicOp.getOperand(0).getOperand(1) == Y) { + X0 = LogicOp.getOperand(0).getOperand(0); + Z = LogicOp.getOperand(1); + } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode && + LogicOp.getOperand(1).getOperand(1) == Y) { + X0 = LogicOp.getOperand(1).getOperand(0); + Z = LogicOp.getOperand(0); + } else { + return SDValue(); + } + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1); + SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y); + return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6219,6 +6266,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) @@ -6696,52 +6748,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { return SDValue(); } -/// Given a bitwise logic operation N with a matching bitwise logic operand, -/// fold a pattern where 2 of the source operands are identically shifted -/// values. For example: -/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z -static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, - SelectionDAG &DAG) { - // TODO: This should be extended to allow AND/XOR. - assert(N->getOpcode() == ISD::OR && "Expected bitwise logic operation"); - - if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse()) - return SDValue(); - - // Match another bitwise logic op and a shift. - unsigned LogicOpcode = N->getOpcode(); - unsigned ShiftOpcode = ShiftOp.getOpcode(); - if (LogicOp.getOpcode() != LogicOpcode || - !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL || - ShiftOpcode == ISD::SRA)) - return SDValue(); - - // Match another shift op inside the first logic operand. Handle both commuted - // possibilities. - // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z - // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z - SDValue X1 = ShiftOp.getOperand(0); - SDValue Y = ShiftOp.getOperand(1); - SDValue X0, Z; - if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode && - LogicOp.getOperand(0).getOperand(1) == Y) { - X0 = LogicOp.getOperand(0).getOperand(0); - Z = LogicOp.getOperand(1); - } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode && - LogicOp.getOperand(1).getOperand(1) == Y) { - X0 = LogicOp.getOperand(1).getOperand(0); - Z = LogicOp.getOperand(0); - } else { - return SDValue(); - } - - EVT VT = N->getValueType(0); - SDLoc DL(N); - SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1); - SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y); - return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); -} - /// OR combines for which the commuted variant will be tried as well. static SDValue visitORCommutative( SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { @@ -8394,6 +8400,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll index 058458c4dcd553..7889bda08a4f6e 100644 --- a/llvm/test/CodeGen/AArch64/logic-shift.ll +++ b/llvm/test/CodeGen/AArch64/logic-shift.ll @@ -232,13 +232,11 @@ define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) { define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK-LABEL: xor_lshr_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: lsr w8, w8, w2 -; CHECK-NEXT: lsr w9, w9, w2 -; CHECK-NEXT: eor w8, w8, w3 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: eor w0, w8, w3 ; CHECK-NEXT: ret %sh1 = lshr i8 %x0, %y %sh2 = lshr i8 %x1, %y @@ -250,10 +248,9 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK-LABEL: xor_lshr_commute1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w2 -; CHECK-NEXT: lsr w9, w1, w2 -; CHECK-NEXT: eor w8, w3, w8 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: lsr w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w3 ; CHECK-NEXT: ret %sh1 = lshr i32 %x0, %y %sh2 = lshr i32 %x1, %y @@ -266,10 +263,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, ; CHECK-LABEL: xor_lshr_commute2: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.8h, v2.8h +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = lshr <8 x i16> %x0, %y %sh2 = lshr <8 x i16> %x1, %y @@ -282,10 +278,9 @@ define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, ; CHECK-LABEL: xor_lshr_commute3: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.2d, v2.2d +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d -; CHECK-NEXT: eor v0.16b, v3.16b, v0.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = lshr <2 x i64> %x0, %y %sh2 = lshr <2 x i64> %x1, %y @@ -297,13 +292,11 @@ define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { ; CHECK-LABEL: xor_ashr_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: sxth w9, w1 +; CHECK-NEXT: sxth w8, w8 ; CHECK-NEXT: asr w8, w8, w2 -; CHECK-NEXT: asr w9, w9, w2 -; CHECK-NEXT: eor w8, w8, w3 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: eor w0, w8, w3 ; CHECK-NEXT: ret %sh1 = ashr i16 %x0, %y %sh2 = ashr i16 %x1, %y @@ -315,10 +308,9 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { ; CHECK-LABEL: xor_ashr_commute1: ; CHECK: // %bb.0: -; CHECK-NEXT: asr x8, x0, x2 -; CHECK-NEXT: asr x9, x1, x2 -; CHECK-NEXT: eor x8, x3, x8 -; CHECK-NEXT: eor x0, x8, x9 +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: asr x8, x8, x2 +; CHECK-NEXT: eor x0, x8, x3 ; CHECK-NEXT: ret %sh1 = ashr i64 %x0, %y %sh2 = ashr i64 %x1, %y @@ -331,10 +323,9 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, ; CHECK-LABEL: xor_ashr_commute2: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = ashr <4 x i32> %x0, %y %sh2 = ashr <4 x i32> %x1, %y @@ -347,10 +338,9 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-LABEL: xor_ashr_commute3: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.16b, v2.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b -; CHECK-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v3.16b, v0.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = ashr <16 x i8> %x0, %y %sh2 = ashr <16 x i8> %x1, %y @@ -362,10 +352,9 @@ define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK-LABEL: xor_shl_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w2 -; CHECK-NEXT: lsl w9, w1, w2 -; CHECK-NEXT: eor w8, w8, w3 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: lsl w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w3 ; CHECK-NEXT: ret %sh1 = shl i32 %x0, %y %sh2 = shl i32 %x1, %y @@ -377,11 +366,10 @@ define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK-LABEL: xor_shl_commute1: ; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: lsl w8, w0, w2 -; CHECK-NEXT: lsl w9, w1, w2 -; CHECK-NEXT: eor w8, w3, w8 -; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: lsl w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w3 ; CHECK-NEXT: ret %sh1 = shl i8 %x0, %y %sh2 = shl i8 %x1, %y @@ -393,10 +381,9 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: xor_shl_commute2: ; CHECK: // %bb.0: +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = shl <2 x i64> %x0, %y %sh2 = shl <2 x i64> %x1, %y @@ -408,10 +395,9 @@ define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, < define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: xor_shl_commute3: ; CHECK: // %bb.0: +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h -; CHECK-NEXT: eor v0.16b, v3.16b, v0.16b -; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = shl <8 x i16> %x0, %y %sh2 = shl <8 x i16> %x1, %y @@ -474,13 +460,11 @@ define i64 @mix_logic_ashr(i64 %x0, i64 %x1, i64 %y, i64 %z) { define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK-LABEL: and_lshr_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: and w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: lsr w8, w8, w2 -; CHECK-NEXT: lsr w9, w9, w2 -; CHECK-NEXT: and w8, w8, w3 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: and w0, w8, w3 ; CHECK-NEXT: ret %sh1 = lshr i8 %x0, %y %sh2 = lshr i8 %x1, %y @@ -492,10 +476,9 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK-LABEL: and_lshr_commute1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w2 -; CHECK-NEXT: lsr w9, w1, w2 -; CHECK-NEXT: and w8, w3, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: and w8, w0, w1 +; CHECK-NEXT: lsr w8, w8, w2 +; CHECK-NEXT: and w0, w8, w3 ; CHECK-NEXT: ret %sh1 = lshr i32 %x0, %y %sh2 = lshr i32 %x1, %y @@ -508,10 +491,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, ; CHECK-LABEL: and_lshr_commute2: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.8h, v2.8h +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = lshr <8 x i16> %x0, %y %sh2 = lshr <8 x i16> %x1, %y @@ -524,10 +506,9 @@ define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, ; CHECK-LABEL: and_lshr_commute3: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.2d, v2.2d +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d -; CHECK-NEXT: and v0.16b, v3.16b, v0.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = lshr <2 x i64> %x0, %y %sh2 = lshr <2 x i64> %x1, %y @@ -539,13 +520,11 @@ define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { ; CHECK-LABEL: and_ashr_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: and w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: sxth w9, w1 +; CHECK-NEXT: sxth w8, w8 ; CHECK-NEXT: asr w8, w8, w2 -; CHECK-NEXT: asr w9, w9, w2 -; CHECK-NEXT: and w8, w8, w3 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: and w0, w8, w3 ; CHECK-NEXT: ret %sh1 = ashr i16 %x0, %y %sh2 = ashr i16 %x1, %y @@ -557,10 +536,9 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { ; CHECK-LABEL: and_ashr_commute1: ; CHECK: // %bb.0: -; CHECK-NEXT: asr x8, x0, x2 -; CHECK-NEXT: asr x9, x1, x2 -; CHECK-NEXT: and x8, x3, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: and x8, x0, x1 +; CHECK-NEXT: asr x8, x8, x2 +; CHECK-NEXT: and x0, x8, x3 ; CHECK-NEXT: ret %sh1 = ashr i64 %x0, %y %sh2 = ashr i64 %x1, %y @@ -573,10 +551,9 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, ; CHECK-LABEL: and_ashr_commute2: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: sshl v1.4s, v1.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = ashr <4 x i32> %x0, %y %sh2 = ashr <4 x i32> %x1, %y @@ -589,10 +566,9 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, ; CHECK-LABEL: and_ashr_commute3: ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.16b, v2.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b -; CHECK-NEXT: sshl v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v3.16b, v0.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = ashr <16 x i8> %x0, %y %sh2 = ashr <16 x i8> %x1, %y @@ -604,10 +580,9 @@ define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK-LABEL: and_shl_commute0: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, w2 -; CHECK-NEXT: lsl w9, w1, w2 -; CHECK-NEXT: and w8, w8, w3 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: and w8, w0, w1 +; CHECK-NEXT: lsl w8, w8, w2 +; CHECK-NEXT: and w0, w8, w3 ; CHECK-NEXT: ret %sh1 = shl i32 %x0, %y %sh2 = shl i32 %x1, %y @@ -619,11 +594,10 @@ define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK-LABEL: and_shl_commute1: ; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: lsl w8, w0, w2 -; CHECK-NEXT: lsl w9, w1, w2 -; CHECK-NEXT: and w8, w3, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsl w8, w8, w2 +; CHECK-NEXT: and w0, w8, w3 ; CHECK-NEXT: ret %sh1 = shl i8 %x0, %y %sh2 = shl i8 %x1, %y @@ -635,10 +609,9 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: and_shl_commute2: ; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %sh1 = shl <2 x i64> %x0, %y %sh2 = shl <2 x i64> %x1, %y @@ -650,10 +623,9 @@ define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, < define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: and_shl_commute3: ; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h -; CHECK-NEXT: and v0.16b, v3.16b, v0.16b -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %sh1 = shl <8 x i16> %x0, %y %sh2 = shl <8 x i16> %x1, %y diff --git a/llvm/test/CodeGen/X86/logic-shift.ll b/llvm/test/CodeGen/X86/logic-shift.ll index 1a413a44f99a8d..829ed4f748dd3f 100644 --- a/llvm/test/CodeGen/X86/logic-shift.ll +++ b/llvm/test/CodeGen/X86/logic-shift.ll @@ -290,10 +290,9 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrb %cl, %dil +; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrb %cl, %sil -; CHECK-NEXT: xorb %sil, %al +; CHECK-NEXT: shrb %cl, %dil ; CHECK-NEXT: xorb %dil, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -309,11 +308,10 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrl %cl, %edi +; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: xorl %edi, %esi -; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: shrl %cl, %edi +; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %sh1 = lshr i32 %x0, %y %sh2 = lshr i32 %x1, %y @@ -325,17 +323,13 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: xor_lshr_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; CHECK-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %sh1 = lshr <8 x i16> %x0, %y @@ -348,10 +342,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: xor_lshr_commute3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = lshr <2 x i64> %x0, %y %sh2 = lshr <2 x i64> %x1, %y @@ -365,13 +358,11 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %r8d ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: movswl %si, %eax -; CHECK-NEXT: movswl %di, %edx -; CHECK-NEXT: sarl %cl, %edx +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: movswl %di, %eax ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %eax ; CHECK-NEXT: xorl %r8d, %eax -; CHECK-NEXT: xorl %edx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %sh1 = ashr i16 %x0, %y @@ -386,11 +377,10 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: sarq %cl, %rdi +; CHECK-NEXT: xorq %rsi, %rdi ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NEXT: sarq %cl, %rsi -; CHECK-NEXT: xorq %rdi, %rsi -; CHECK-NEXT: xorq %rsi, %rax +; CHECK-NEXT: sarq %cl, %rdi +; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %sh1 = ashr i64 %x0, %y %sh2 = ashr i64 %x1, %y @@ -402,10 +392,9 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: xor_ashr_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsravd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = ashr <4 x i32> %x0, %y %sh2 = ashr <4 x i32> %x1, %y @@ -417,49 +406,32 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) { ; CHECK-LABEL: xor_ashr_commute3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpsraw $4, %xmm4, %xmm5 ; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2 -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsraw $2, %xmm4, %xmm5 -; CHECK-NEXT: vpaddw %xmm6, %xmm6, %xmm7 -; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsraw $1, %xmm4, %xmm5 -; CHECK-NEXT: vpaddw %xmm7, %xmm7, %xmm8 -; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsrlw $8, %xmm4, %xmm9 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: vpsraw $4, %xmm0, %xmm5 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm10 -; CHECK-NEXT: vpblendvb %xmm10, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm9, %xmm0, %xmm9 -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpsraw $4, %xmm5, %xmm0 -; CHECK-NEXT: vpblendvb %xmm6, %xmm0, %xmm5, %xmm0 -; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5 -; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5 -; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5 -; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm1, %xmm1 -; CHECK-NEXT: vpsraw $2, %xmm1, %xmm2 -; CHECK-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpsraw $1, %xmm1, %xmm2 -; CHECK-NEXT: vpblendvb %xmm10, %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1 -; CHECK-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm0, %xmm9, %xmm0 -; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 +; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = ashr <16 x i8> %x0, %y %sh2 = ashr <16 x i8> %x1, %y @@ -473,10 +445,9 @@ define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shll %cl, %edi +; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %sh1 = shl i32 %x0, %y @@ -491,11 +462,10 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shlb %cl, %dil +; CHECK-NEXT: xorl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shlb %cl, %sil -; CHECK-NEXT: xorb %dil, %sil -; CHECK-NEXT: xorb %sil, %al +; CHECK-NEXT: shlb %cl, %dil +; CHECK-NEXT: xorb %dil, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %sh1 = shl i8 %x0, %y @@ -508,10 +478,9 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: xor_shl_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsllvq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = shl <2 x i64> %x0, %y %sh2 = shl <2 x i64> %x1, %y @@ -523,18 +492,13 @@ define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, < define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: xor_shl_commute3: ; CHECK: # %bb.0: +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; CHECK-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; CHECK-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; CHECK-NEXT: vpsllvd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpshufb %ymm4, %ymm1, %ymm1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %sh1 = shl <8 x i16> %x0, %y @@ -610,10 +574,9 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrb %cl, %dil +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrb %cl, %sil -; CHECK-NEXT: andb %sil, %al +; CHECK-NEXT: shrb %cl, %dil ; CHECK-NEXT: andb %dil, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -629,11 +592,10 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shrl %cl, %edi +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: andl %edi, %esi -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shrl %cl, %edi +; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %sh1 = lshr i32 %x0, %y %sh2 = lshr i32 %x1, %y @@ -645,17 +607,13 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) { define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: and_lshr_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; CHECK-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %sh1 = lshr <8 x i16> %x0, %y @@ -668,10 +626,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: and_lshr_commute3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = lshr <2 x i64> %x0, %y %sh2 = lshr <2 x i64> %x1, %y @@ -685,13 +642,11 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %r8d ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: movswl %si, %eax -; CHECK-NEXT: movswl %di, %edx -; CHECK-NEXT: sarl %cl, %edx +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: movswl %di, %eax ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %eax ; CHECK-NEXT: andl %r8d, %eax -; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %sh1 = ashr i16 %x0, %y @@ -706,11 +661,10 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: sarq %cl, %rdi +; CHECK-NEXT: andq %rsi, %rdi ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NEXT: sarq %cl, %rsi -; CHECK-NEXT: andq %rdi, %rsi -; CHECK-NEXT: andq %rsi, %rax +; CHECK-NEXT: sarq %cl, %rdi +; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %sh1 = ashr i64 %x0, %y %sh2 = ashr i64 %x1, %y @@ -722,10 +676,9 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) { define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: and_ashr_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsravd %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = ashr <4 x i32> %x0, %y %sh2 = ashr <4 x i32> %x1, %y @@ -737,49 +690,32 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) { ; CHECK-LABEL: and_ashr_commute3: ; CHECK: # %bb.0: -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpsraw $4, %xmm4, %xmm5 ; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2 -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsraw $2, %xmm4, %xmm5 -; CHECK-NEXT: vpaddw %xmm6, %xmm6, %xmm7 -; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsraw $1, %xmm4, %xmm5 -; CHECK-NEXT: vpaddw %xmm7, %xmm7, %xmm8 -; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm4, %xmm4 -; CHECK-NEXT: vpsrlw $8, %xmm4, %xmm9 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: vpsraw $4, %xmm0, %xmm5 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5 -; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm4 -; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5 -; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm10 -; CHECK-NEXT: vpblendvb %xmm10, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 -; CHECK-NEXT: vpackuswb %xmm9, %xmm0, %xmm9 -; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpsraw $4, %xmm5, %xmm0 -; CHECK-NEXT: vpblendvb %xmm6, %xmm0, %xmm5, %xmm0 -; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5 -; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5 -; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm0, %xmm0 -; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5 -; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm1, %xmm1 -; CHECK-NEXT: vpsraw $2, %xmm1, %xmm2 -; CHECK-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpsraw $1, %xmm1, %xmm2 -; CHECK-NEXT: vpblendvb %xmm10, %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 +; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5 +; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4 +; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1 ; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1 -; CHECK-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpand %xmm0, %xmm9, %xmm0 -; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4 +; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0 +; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0 +; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = ashr <16 x i8> %x0, %y %sh2 = ashr <16 x i8> %x1, %y @@ -793,10 +729,9 @@ define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shll %cl, %edi +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: shll %cl, %edi ; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %sh1 = shl i32 %x0, %y @@ -811,11 +746,10 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: shlb %cl, %dil +; CHECK-NEXT: andl %esi, %edi ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shlb %cl, %sil -; CHECK-NEXT: andb %dil, %sil -; CHECK-NEXT: andb %sil, %al +; CHECK-NEXT: shlb %cl, %dil +; CHECK-NEXT: andb %dil, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %sh1 = shl i8 %x0, %y @@ -828,10 +762,9 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) { define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) { ; CHECK-LABEL: and_shl_commute2: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vpsllvq %xmm2, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm1, %xmm3, %xmm1 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: retq %sh1 = shl <2 x i64> %x0, %y %sh2 = shl <2 x i64> %x1, %y @@ -843,18 +776,13 @@ define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, < define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) { ; CHECK-LABEL: and_shl_commute3: ; CHECK: # %bb.0: +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; CHECK-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 -; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] -; CHECK-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; CHECK-NEXT: vpsllvd %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vpshufb %ymm4, %ymm1, %ymm1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] -; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpand %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: vpand %xmm3, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %sh1 = shl <8 x i16> %x0, %y