diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0b69496d14f9fe..6619f1c42a8883 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7443,13 +7443,13 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - if (!IsNegative) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + + // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) + if (!IsNegative) + return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift); // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 98c8133282a264..1735c0ddd11a50 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return false; } -/// Target-specific DAG combining for ISD::XOR. +/// Target-specific DAG combining for ISD::SUB. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X /// select_cc setgt X, -1, X, -X /// select_cc setl[te] X, 0, -X, X /// select_cc setlt X, 1, -X, X /// which represent Integer ABS into: -/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) /// ARM instruction selection detects the latter and matches it to /// ARM::ABS or ARM::t2ABS machine node. bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ - SDValue XORSrc0 = N->getOperand(0); - SDValue XORSrc1 = N->getOperand(1); + SDValue SUBSrc0 = N->getOperand(0); + SDValue SUBSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) return false; - if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) + if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) return false; - SDValue ADDSrc0 = XORSrc0.getOperand(0); - SDValue ADDSrc1 = XORSrc0.getOperand(1); - SDValue SRASrc0 = XORSrc1.getOperand(0); - SDValue SRASrc1 = XORSrc1.getOperand(1); + SDValue XORSrc0 = SUBSrc0.getOperand(0); + SDValue XORSrc1 = SUBSrc0.getOperand(1); + SDValue SRASrc0 = SUBSrc1.getOperand(0); + SDValue SRASrc1 = SUBSrc1.getOperand(1); ConstantSDNode *SRAConstant = dyn_cast(SRASrc1); EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != nullptr && - Size == SRAConstant->getZExtValue()) { + if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && + SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; - CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); return true; } @@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryInlineAsm(N)) return; break; - case ISD::XOR: - // Select special operations if XOR node forms integer ABS pattern + case ISD::SUB: + // Select special operations if SUB node forms integer ABS pattern if (tryABSOp(N)) return; // Other cases are autogenerated. diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll index 1dc8a7b99bc374..ec5e433b57cf74 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -99,10 +99,10 @@ define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr { ; CHECK-PWR7-NEXT: sub r4, r5, r6 ; CHECK-PWR7-NEXT: sradi r5, r3, 63 ; CHECK-PWR7-NEXT: sradi r6, r4, 63 -; CHECK-PWR7-NEXT: add r3, r3, r5 -; CHECK-PWR7-NEXT: add r4, r4, r6 ; CHECK-PWR7-NEXT: xor r3, r3, r5 ; CHECK-PWR7-NEXT: xor r4, r4, r6 +; CHECK-PWR7-NEXT: sub r3, r3, r5 +; CHECK-PWR7-NEXT: sub r4, r4, r6 ; CHECK-PWR7-NEXT: std r3, -8(r1) ; CHECK-PWR7-NEXT: addi r3, r1, -16 ; CHECK-PWR7-NEXT: std r4, -16(r1) @@ -307,13 +307,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: sub r4, r7, r4 ; CHECK-PWR9-LE-NEXT: srawi r6, r3, 31 ; CHECK-PWR9-LE-NEXT: srawi r7, r4, 31 -; CHECK-PWR9-LE-NEXT: add r3, r3, r6 -; CHECK-PWR9-LE-NEXT: add r4, r4, r7 -; CHECK-PWR9-LE-NEXT: xor r6, r3, r6 -; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31 +; CHECK-PWR9-LE-NEXT: xor r3, r3, r6 ; CHECK-PWR9-LE-NEXT: xor r4, r4, r7 -; CHECK-PWR9-LE-NEXT: add r5, r5, r3 -; CHECK-PWR9-LE-NEXT: xor r3, r5, r3 +; CHECK-PWR9-LE-NEXT: sub r6, r3, r6 +; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31 +; CHECK-PWR9-LE-NEXT: sub r4, r4, r7 +; CHECK-PWR9-LE-NEXT: xor r5, r5, r3 +; CHECK-PWR9-LE-NEXT: sub r3, r5, r3 ; CHECK-PWR9-LE-NEXT: li r5, 3 ; CHECK-PWR9-LE-NEXT: vextubrx r7, r5, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3 @@ -321,8 +321,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24 ; CHECK-PWR9-LE-NEXT: sub r5, r7, r5 ; CHECK-PWR9-LE-NEXT: srawi r7, r5, 31 -; CHECK-PWR9-LE-NEXT: add r5, r5, r7 ; CHECK-PWR9-LE-NEXT: xor r5, r5, r7 +; CHECK-PWR9-LE-NEXT: sub r5, r5, r7 ; CHECK-PWR9-LE-NEXT: li r7, 4 ; CHECK-PWR9-LE-NEXT: vextubrx r8, r7, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r7, r7, v3 @@ -331,8 +331,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24 ; CHECK-PWR9-LE-NEXT: sub r7, r8, r7 ; CHECK-PWR9-LE-NEXT: srawi r8, r7, 31 -; CHECK-PWR9-LE-NEXT: add r7, r7, r8 ; CHECK-PWR9-LE-NEXT: xor r7, r7, r8 +; CHECK-PWR9-LE-NEXT: sub r7, r7, r8 ; CHECK-PWR9-LE-NEXT: li r8, 5 ; CHECK-PWR9-LE-NEXT: vextubrx r9, r8, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r8, r8, v3 @@ -340,8 +340,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24 ; CHECK-PWR9-LE-NEXT: sub r8, r9, r8 ; CHECK-PWR9-LE-NEXT: srawi r9, r8, 31 -; CHECK-PWR9-LE-NEXT: add r8, r8, r9 ; CHECK-PWR9-LE-NEXT: xor r8, r8, r9 +; CHECK-PWR9-LE-NEXT: sub r8, r8, r9 ; CHECK-PWR9-LE-NEXT: li r9, 6 ; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3 @@ -349,8 +349,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24 ; CHECK-PWR9-LE-NEXT: sub r9, r10, r9 ; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31 -; CHECK-PWR9-LE-NEXT: add r9, r9, r10 ; CHECK-PWR9-LE-NEXT: xor r9, r9, r10 +; CHECK-PWR9-LE-NEXT: sub r9, r9, r10 ; CHECK-PWR9-LE-NEXT: li r10, 7 ; CHECK-PWR9-LE-NEXT: vextubrx r11, r10, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r10, r10, v3 @@ -358,8 +358,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24 ; CHECK-PWR9-LE-NEXT: sub r10, r11, r10 ; CHECK-PWR9-LE-NEXT: srawi r11, r10, 31 -; CHECK-PWR9-LE-NEXT: add r10, r10, r11 ; CHECK-PWR9-LE-NEXT: xor r10, r10, r11 +; CHECK-PWR9-LE-NEXT: sub r10, r10, r11 ; CHECK-PWR9-LE-NEXT: li r11, 8 ; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3 @@ -368,8 +368,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24 ; CHECK-PWR9-LE-NEXT: sub r11, r12, r11 ; CHECK-PWR9-LE-NEXT: srawi r12, r11, 31 -; CHECK-PWR9-LE-NEXT: add r11, r11, r12 ; CHECK-PWR9-LE-NEXT: xor r11, r11, r12 +; CHECK-PWR9-LE-NEXT: sub r11, r11, r12 ; CHECK-PWR9-LE-NEXT: li r12, 9 ; CHECK-PWR9-LE-NEXT: vextubrx r0, r12, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r12, r12, v3 @@ -377,8 +377,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24 ; CHECK-PWR9-LE-NEXT: sub r12, r0, r12 ; CHECK-PWR9-LE-NEXT: srawi r0, r12, 31 -; CHECK-PWR9-LE-NEXT: add r12, r12, r0 ; CHECK-PWR9-LE-NEXT: xor r12, r12, r0 +; CHECK-PWR9-LE-NEXT: sub r12, r12, r0 ; CHECK-PWR9-LE-NEXT: li r0, 10 ; CHECK-PWR9-LE-NEXT: vextubrx r30, r0, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r0, r0, v3 @@ -386,8 +386,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24 ; CHECK-PWR9-LE-NEXT: sub r0, r30, r0 ; CHECK-PWR9-LE-NEXT: srawi r30, r0, 31 -; CHECK-PWR9-LE-NEXT: add r0, r0, r30 ; CHECK-PWR9-LE-NEXT: xor r0, r0, r30 +; CHECK-PWR9-LE-NEXT: sub r0, r0, r30 ; CHECK-PWR9-LE-NEXT: li r30, 11 ; CHECK-PWR9-LE-NEXT: vextubrx r29, r30, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r30, r30, v3 @@ -395,8 +395,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24 ; CHECK-PWR9-LE-NEXT: sub r30, r29, r30 ; CHECK-PWR9-LE-NEXT: srawi r29, r30, 31 -; CHECK-PWR9-LE-NEXT: add r30, r30, r29 ; CHECK-PWR9-LE-NEXT: xor r30, r30, r29 +; CHECK-PWR9-LE-NEXT: sub r30, r30, r29 ; CHECK-PWR9-LE-NEXT: li r29, 12 ; CHECK-PWR9-LE-NEXT: vextubrx r28, r29, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r29, r29, v3 @@ -404,8 +404,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24 ; CHECK-PWR9-LE-NEXT: sub r29, r28, r29 ; CHECK-PWR9-LE-NEXT: srawi r28, r29, 31 -; CHECK-PWR9-LE-NEXT: add r29, r29, r28 ; CHECK-PWR9-LE-NEXT: xor r29, r29, r28 +; CHECK-PWR9-LE-NEXT: sub r29, r29, r28 ; CHECK-PWR9-LE-NEXT: li r28, 13 ; CHECK-PWR9-LE-NEXT: vextubrx r27, r28, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r28, r28, v3 @@ -413,8 +413,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24 ; CHECK-PWR9-LE-NEXT: sub r28, r27, r28 ; CHECK-PWR9-LE-NEXT: srawi r27, r28, 31 -; CHECK-PWR9-LE-NEXT: add r28, r28, r27 ; CHECK-PWR9-LE-NEXT: xor r28, r28, r27 +; CHECK-PWR9-LE-NEXT: sub r28, r28, r27 ; CHECK-PWR9-LE-NEXT: li r27, 14 ; CHECK-PWR9-LE-NEXT: vextubrx r26, r27, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r27, r27, v3 @@ -422,8 +422,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24 ; CHECK-PWR9-LE-NEXT: sub r27, r26, r27 ; CHECK-PWR9-LE-NEXT: srawi r26, r27, 31 -; CHECK-PWR9-LE-NEXT: add r27, r27, r26 ; CHECK-PWR9-LE-NEXT: xor r27, r27, r26 +; CHECK-PWR9-LE-NEXT: sub r27, r27, r26 ; CHECK-PWR9-LE-NEXT: li r26, 15 ; CHECK-PWR9-LE-NEXT: vextubrx r25, r26, v2 ; CHECK-PWR9-LE-NEXT: vextubrx r26, r26, v3 @@ -441,10 +441,10 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-LE-NEXT: srawi r25, r26, 31 ; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3 ; CHECK-PWR9-LE-NEXT: mtvsrd v4, r9 -; CHECK-PWR9-LE-NEXT: add r26, r26, r25 +; CHECK-PWR9-LE-NEXT: xor r26, r26, r25 ; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4 ; CHECK-PWR9-LE-NEXT: mtvsrd v5, r30 -; CHECK-PWR9-LE-NEXT: xor r26, r26, r25 +; CHECK-PWR9-LE-NEXT: sub r26, r26, r25 ; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-PWR9-LE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-PWR9-LE-NEXT: mtvsrd v0, r26 @@ -499,13 +499,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: srawi r6, r3, 31 ; CHECK-PWR9-BE-NEXT: srawi r7, r4, 31 ; CHECK-PWR9-BE-NEXT: srawi r8, r5, 31 -; CHECK-PWR9-BE-NEXT: add r3, r3, r6 -; CHECK-PWR9-BE-NEXT: add r4, r4, r7 -; CHECK-PWR9-BE-NEXT: add r5, r5, r8 ; CHECK-PWR9-BE-NEXT: xor r3, r3, r6 -; CHECK-PWR9-BE-NEXT: li r6, 3 ; CHECK-PWR9-BE-NEXT: xor r4, r4, r7 ; CHECK-PWR9-BE-NEXT: xor r5, r5, r8 +; CHECK-PWR9-BE-NEXT: sub r3, r3, r6 +; CHECK-PWR9-BE-NEXT: li r6, 3 +; CHECK-PWR9-BE-NEXT: sub r4, r4, r7 +; CHECK-PWR9-BE-NEXT: sub r5, r5, r8 ; CHECK-PWR9-BE-NEXT: vextublx r7, r6, v2 ; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3 ; CHECK-PWR9-BE-NEXT: mtvsrwz v1, r3 @@ -513,8 +513,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24 ; CHECK-PWR9-BE-NEXT: sub r6, r7, r6 ; CHECK-PWR9-BE-NEXT: srawi r7, r6, 31 -; CHECK-PWR9-BE-NEXT: add r6, r6, r7 ; CHECK-PWR9-BE-NEXT: xor r6, r6, r7 +; CHECK-PWR9-BE-NEXT: sub r6, r6, r7 ; CHECK-PWR9-BE-NEXT: li r7, 4 ; CHECK-PWR9-BE-NEXT: vextublx r8, r7, v2 ; CHECK-PWR9-BE-NEXT: vextublx r7, r7, v3 @@ -522,8 +522,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24 ; CHECK-PWR9-BE-NEXT: sub r7, r8, r7 ; CHECK-PWR9-BE-NEXT: srawi r8, r7, 31 -; CHECK-PWR9-BE-NEXT: add r7, r7, r8 ; CHECK-PWR9-BE-NEXT: xor r7, r7, r8 +; CHECK-PWR9-BE-NEXT: sub r7, r7, r8 ; CHECK-PWR9-BE-NEXT: li r8, 5 ; CHECK-PWR9-BE-NEXT: vextublx r9, r8, v2 ; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3 @@ -531,8 +531,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24 ; CHECK-PWR9-BE-NEXT: sub r8, r9, r8 ; CHECK-PWR9-BE-NEXT: srawi r9, r8, 31 -; CHECK-PWR9-BE-NEXT: add r8, r8, r9 ; CHECK-PWR9-BE-NEXT: xor r8, r8, r9 +; CHECK-PWR9-BE-NEXT: sub r8, r8, r9 ; CHECK-PWR9-BE-NEXT: li r9, 6 ; CHECK-PWR9-BE-NEXT: vextublx r10, r9, v2 ; CHECK-PWR9-BE-NEXT: vextublx r9, r9, v3 @@ -540,8 +540,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24 ; CHECK-PWR9-BE-NEXT: sub r9, r10, r9 ; CHECK-PWR9-BE-NEXT: srawi r10, r9, 31 -; CHECK-PWR9-BE-NEXT: add r9, r9, r10 ; CHECK-PWR9-BE-NEXT: xor r9, r9, r10 +; CHECK-PWR9-BE-NEXT: sub r9, r9, r10 ; CHECK-PWR9-BE-NEXT: li r10, 7 ; CHECK-PWR9-BE-NEXT: vextublx r11, r10, v2 ; CHECK-PWR9-BE-NEXT: vextublx r10, r10, v3 @@ -549,8 +549,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24 ; CHECK-PWR9-BE-NEXT: sub r10, r11, r10 ; CHECK-PWR9-BE-NEXT: srawi r11, r10, 31 -; CHECK-PWR9-BE-NEXT: add r10, r10, r11 ; CHECK-PWR9-BE-NEXT: xor r10, r10, r11 +; CHECK-PWR9-BE-NEXT: sub r10, r10, r11 ; CHECK-PWR9-BE-NEXT: li r11, 8 ; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2 ; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3 @@ -558,8 +558,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24 ; CHECK-PWR9-BE-NEXT: sub r11, r12, r11 ; CHECK-PWR9-BE-NEXT: srawi r12, r11, 31 -; CHECK-PWR9-BE-NEXT: add r11, r11, r12 ; CHECK-PWR9-BE-NEXT: xor r11, r11, r12 +; CHECK-PWR9-BE-NEXT: sub r11, r11, r12 ; CHECK-PWR9-BE-NEXT: li r12, 9 ; CHECK-PWR9-BE-NEXT: vextublx r0, r12, v2 ; CHECK-PWR9-BE-NEXT: vextublx r12, r12, v3 @@ -568,8 +568,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24 ; CHECK-PWR9-BE-NEXT: sub r12, r0, r12 ; CHECK-PWR9-BE-NEXT: srawi r0, r12, 31 -; CHECK-PWR9-BE-NEXT: add r12, r12, r0 ; CHECK-PWR9-BE-NEXT: xor r12, r12, r0 +; CHECK-PWR9-BE-NEXT: sub r12, r12, r0 ; CHECK-PWR9-BE-NEXT: li r0, 10 ; CHECK-PWR9-BE-NEXT: vextublx r30, r0, v2 ; CHECK-PWR9-BE-NEXT: vextublx r0, r0, v3 @@ -577,8 +577,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24 ; CHECK-PWR9-BE-NEXT: sub r0, r30, r0 ; CHECK-PWR9-BE-NEXT: srawi r30, r0, 31 -; CHECK-PWR9-BE-NEXT: add r0, r0, r30 ; CHECK-PWR9-BE-NEXT: xor r0, r0, r30 +; CHECK-PWR9-BE-NEXT: sub r0, r0, r30 ; CHECK-PWR9-BE-NEXT: li r30, 11 ; CHECK-PWR9-BE-NEXT: vextublx r29, r30, v2 ; CHECK-PWR9-BE-NEXT: vextublx r30, r30, v3 @@ -586,8 +586,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24 ; CHECK-PWR9-BE-NEXT: sub r30, r29, r30 ; CHECK-PWR9-BE-NEXT: srawi r29, r30, 31 -; CHECK-PWR9-BE-NEXT: add r30, r30, r29 ; CHECK-PWR9-BE-NEXT: xor r30, r30, r29 +; CHECK-PWR9-BE-NEXT: sub r30, r30, r29 ; CHECK-PWR9-BE-NEXT: li r29, 12 ; CHECK-PWR9-BE-NEXT: vextublx r28, r29, v2 ; CHECK-PWR9-BE-NEXT: vextublx r29, r29, v3 @@ -595,8 +595,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24 ; CHECK-PWR9-BE-NEXT: sub r29, r28, r29 ; CHECK-PWR9-BE-NEXT: srawi r28, r29, 31 -; CHECK-PWR9-BE-NEXT: add r29, r29, r28 ; CHECK-PWR9-BE-NEXT: xor r29, r29, r28 +; CHECK-PWR9-BE-NEXT: sub r29, r29, r28 ; CHECK-PWR9-BE-NEXT: li r28, 13 ; CHECK-PWR9-BE-NEXT: vextublx r27, r28, v2 ; CHECK-PWR9-BE-NEXT: vextublx r28, r28, v3 @@ -606,8 +606,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24 ; CHECK-PWR9-BE-NEXT: sub r28, r27, r28 ; CHECK-PWR9-BE-NEXT: srawi r27, r28, 31 -; CHECK-PWR9-BE-NEXT: add r28, r28, r27 ; CHECK-PWR9-BE-NEXT: xor r28, r28, r27 +; CHECK-PWR9-BE-NEXT: sub r28, r28, r27 ; CHECK-PWR9-BE-NEXT: li r27, 14 ; CHECK-PWR9-BE-NEXT: vextublx r26, r27, v2 ; CHECK-PWR9-BE-NEXT: vextublx r27, r27, v3 @@ -615,8 +615,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24 ; CHECK-PWR9-BE-NEXT: sub r27, r26, r27 ; CHECK-PWR9-BE-NEXT: srawi r26, r27, 31 -; CHECK-PWR9-BE-NEXT: add r27, r27, r26 ; CHECK-PWR9-BE-NEXT: xor r27, r27, r26 +; CHECK-PWR9-BE-NEXT: sub r27, r27, r26 ; CHECK-PWR9-BE-NEXT: li r26, 15 ; CHECK-PWR9-BE-NEXT: vextublx r25, r26, v2 ; CHECK-PWR9-BE-NEXT: vextublx r26, r26, v3 @@ -629,8 +629,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload ; CHECK-PWR9-BE-NEXT: sub r26, r25, r26 ; CHECK-PWR9-BE-NEXT: srawi r25, r26, 31 -; CHECK-PWR9-BE-NEXT: add r26, r26, r25 ; CHECK-PWR9-BE-NEXT: xor r26, r26, r25 +; CHECK-PWR9-BE-NEXT: sub r26, r26, r25 ; CHECK-PWR9-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r26 ; CHECK-PWR9-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload @@ -707,25 +707,25 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: srawi r12, r7, 31 ; CHECK-PWR8-NEXT: clrlwi r10, r0, 24 ; CHECK-PWR8-NEXT: clrlwi r0, r30, 24 -; CHECK-PWR8-NEXT: add r4, r4, r3 -; CHECK-PWR8-NEXT: add r7, r7, r12 +; CHECK-PWR8-NEXT: xor r4, r4, r3 +; CHECK-PWR8-NEXT: xor r7, r7, r12 ; CHECK-PWR8-NEXT: sub r10, r10, r0 ; CHECK-PWR8-NEXT: std r20, -96(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: xor r3, r4, r3 +; CHECK-PWR8-NEXT: sub r3, r4, r3 ; CHECK-PWR8-NEXT: srawi r4, r9, 31 -; CHECK-PWR8-NEXT: xor r7, r7, r12 +; CHECK-PWR8-NEXT: sub r7, r7, r12 ; CHECK-PWR8-NEXT: std r22, -80(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: rldicl r29, r5, 24, 56 ; CHECK-PWR8-NEXT: rldicl r28, r6, 24, 56 -; CHECK-PWR8-NEXT: add r9, r9, r4 +; CHECK-PWR8-NEXT: xor r9, r9, r4 ; CHECK-PWR8-NEXT: mtvsrd v3, r7 ; CHECK-PWR8-NEXT: rldicl r27, r5, 16, 56 ; CHECK-PWR8-NEXT: rldicl r25, r6, 16, 56 ; CHECK-PWR8-NEXT: clrlwi r30, r29, 24 ; CHECK-PWR8-NEXT: clrlwi r29, r28, 24 ; CHECK-PWR8-NEXT: mtvsrd v2, r3 -; CHECK-PWR8-NEXT: xor r4, r9, r4 +; CHECK-PWR8-NEXT: sub r4, r9, r4 ; CHECK-PWR8-NEXT: srawi r7, r10, 31 ; CHECK-PWR8-NEXT: srawi r3, r11, 31 ; CHECK-PWR8-NEXT: clrlwi r9, r27, 24 @@ -733,15 +733,15 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: sub r0, r30, r29 ; CHECK-PWR8-NEXT: mtvsrd v4, r4 ; CHECK-PWR8-NEXT: std r23, -72(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: add r10, r10, r7 -; CHECK-PWR8-NEXT: add r11, r11, r3 +; CHECK-PWR8-NEXT: xor r10, r10, r7 +; CHECK-PWR8-NEXT: xor r11, r11, r3 ; CHECK-PWR8-NEXT: sub r9, r9, r12 ; CHECK-PWR8-NEXT: std r18, -112(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: std r19, -104(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: vmrghb v2, v3, v2 -; CHECK-PWR8-NEXT: xor r7, r10, r7 +; CHECK-PWR8-NEXT: sub r7, r10, r7 ; CHECK-PWR8-NEXT: rldicl r5, r5, 8, 56 -; CHECK-PWR8-NEXT: xor r3, r11, r3 +; CHECK-PWR8-NEXT: sub r3, r11, r3 ; CHECK-PWR8-NEXT: rldicl r6, r6, 8, 56 ; CHECK-PWR8-NEXT: srawi r4, r0, 31 ; CHECK-PWR8-NEXT: mtvsrd v0, r7 @@ -754,13 +754,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: clrlwi r5, r6, 24 ; CHECK-PWR8-NEXT: clrldi r22, r24, 56 ; CHECK-PWR8-NEXT: rldicl r21, r26, 56, 56 -; CHECK-PWR8-NEXT: add r10, r0, r4 -; CHECK-PWR8-NEXT: add r9, r9, r7 +; CHECK-PWR8-NEXT: xor r10, r0, r4 +; CHECK-PWR8-NEXT: xor r9, r9, r7 ; CHECK-PWR8-NEXT: rldicl r20, r24, 56, 56 ; CHECK-PWR8-NEXT: rldicl r19, r26, 48, 56 ; CHECK-PWR8-NEXT: sub r3, r3, r5 -; CHECK-PWR8-NEXT: xor r4, r10, r4 -; CHECK-PWR8-NEXT: xor r7, r9, r7 +; CHECK-PWR8-NEXT: sub r4, r10, r4 +; CHECK-PWR8-NEXT: sub r7, r9, r7 ; CHECK-PWR8-NEXT: clrlwi r9, r23, 24 ; CHECK-PWR8-NEXT: rldicl r18, r24, 48, 56 ; CHECK-PWR8-NEXT: clrlwi r10, r22, 24 @@ -779,7 +779,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: clrlwi r12, r18, 24 ; CHECK-PWR8-NEXT: vmrghb v4, v5, v4 ; CHECK-PWR8-NEXT: std r31, -8(r1) # 8-byte Folded Spill -; CHECK-PWR8-NEXT: add r3, r3, r4 +; CHECK-PWR8-NEXT: xor r3, r3, r4 ; CHECK-PWR8-NEXT: sub r7, r11, r12 ; CHECK-PWR8-NEXT: clrlwi r11, r17, 24 ; CHECK-PWR8-NEXT: clrlwi r12, r16, 24 @@ -787,7 +787,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: std r2, -152(r1) # 8-byte Folded Spill ; CHECK-PWR8-NEXT: rldicl r15, r26, 32, 56 ; CHECK-PWR8-NEXT: rldicl r14, r24, 32, 56 -; CHECK-PWR8-NEXT: xor r3, r3, r4 +; CHECK-PWR8-NEXT: sub r3, r3, r4 ; CHECK-PWR8-NEXT: sub r11, r11, r12 ; CHECK-PWR8-NEXT: srawi r4, r9, 31 ; CHECK-PWR8-NEXT: srawi r12, r10, 31 @@ -795,40 +795,40 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: clrlwi r30, r14, 24 ; CHECK-PWR8-NEXT: mtvsrd v5, r3 ; CHECK-PWR8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: add r9, r9, r4 -; CHECK-PWR8-NEXT: add r10, r10, r12 +; CHECK-PWR8-NEXT: xor r9, r9, r4 +; CHECK-PWR8-NEXT: xor r10, r10, r12 ; CHECK-PWR8-NEXT: sub r3, r0, r30 ; CHECK-PWR8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: srawi r28, r11, 31 -; CHECK-PWR8-NEXT: xor r4, r9, r4 -; CHECK-PWR8-NEXT: xor r10, r10, r12 +; CHECK-PWR8-NEXT: sub r4, r9, r4 +; CHECK-PWR8-NEXT: sub r10, r10, r12 ; CHECK-PWR8-NEXT: vmrghb v3, v5, v3 ; CHECK-PWR8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: srawi r29, r7, 31 ; CHECK-PWR8-NEXT: srawi r9, r3, 31 ; CHECK-PWR8-NEXT: mtvsrd v5, r4 -; CHECK-PWR8-NEXT: add r4, r11, r28 +; CHECK-PWR8-NEXT: xor r4, r11, r28 ; CHECK-PWR8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: mtvsrd v1, r10 ; CHECK-PWR8-NEXT: ld r10, -160(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: rldicl r31, r26, 24, 56 ; CHECK-PWR8-NEXT: rldicl r2, r24, 24, 56 -; CHECK-PWR8-NEXT: add r7, r7, r29 -; CHECK-PWR8-NEXT: add r3, r3, r9 +; CHECK-PWR8-NEXT: xor r7, r7, r29 +; CHECK-PWR8-NEXT: xor r3, r3, r9 ; CHECK-PWR8-NEXT: rldicl r8, r24, 16, 56 ; CHECK-PWR8-NEXT: rldicl r6, r26, 8, 56 -; CHECK-PWR8-NEXT: xor r4, r4, r28 +; CHECK-PWR8-NEXT: sub r4, r4, r28 ; CHECK-PWR8-NEXT: clrlwi r0, r31, 24 ; CHECK-PWR8-NEXT: clrlwi r30, r2, 24 -; CHECK-PWR8-NEXT: xor r7, r7, r29 +; CHECK-PWR8-NEXT: sub r7, r7, r29 ; CHECK-PWR8-NEXT: rldicl r5, r24, 8, 56 ; CHECK-PWR8-NEXT: clrlwi r10, r10, 24 ; CHECK-PWR8-NEXT: clrlwi r8, r8, 24 -; CHECK-PWR8-NEXT: xor r3, r3, r9 +; CHECK-PWR8-NEXT: sub r3, r3, r9 ; CHECK-PWR8-NEXT: mtvsrd v7, r4 ; CHECK-PWR8-NEXT: clrlwi r4, r6, 24 ; CHECK-PWR8-NEXT: clrlwi r5, r5, 24 @@ -845,18 +845,18 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: srawi r6, r7, 31 ; CHECK-PWR8-NEXT: srawi r5, r3, 31 -; CHECK-PWR8-NEXT: add r8, r0, r12 +; CHECK-PWR8-NEXT: xor r8, r0, r12 ; CHECK-PWR8-NEXT: vmrghb v5, v1, v5 ; CHECK-PWR8-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r24, -64(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: add r4, r7, r6 -; CHECK-PWR8-NEXT: add r3, r3, r5 -; CHECK-PWR8-NEXT: xor r8, r8, r12 +; CHECK-PWR8-NEXT: xor r4, r7, r6 +; CHECK-PWR8-NEXT: xor r3, r3, r5 +; CHECK-PWR8-NEXT: sub r8, r8, r12 ; CHECK-PWR8-NEXT: vmrghb v6, v7, v6 ; CHECK-PWR8-NEXT: ld r17, -120(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r16, -128(r1) # 8-byte Folded Reload -; CHECK-PWR8-NEXT: xor r4, r4, r6 -; CHECK-PWR8-NEXT: xor r3, r3, r5 +; CHECK-PWR8-NEXT: sub r4, r4, r6 +; CHECK-PWR8-NEXT: sub r3, r3, r5 ; CHECK-PWR8-NEXT: mtvsrd v9, r8 ; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload @@ -875,15 +875,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; ; CHECK-PWR7-LABEL: sub_absv_8_ext: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: stdu r1, -464(r1) -; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 464 -; CHECK-PWR7-NEXT: .cfi_offset r16, -128 -; CHECK-PWR7-NEXT: .cfi_offset r17, -120 -; CHECK-PWR7-NEXT: .cfi_offset r18, -112 -; CHECK-PWR7-NEXT: .cfi_offset r19, -104 -; CHECK-PWR7-NEXT: .cfi_offset r20, -96 -; CHECK-PWR7-NEXT: .cfi_offset r21, -88 -; CHECK-PWR7-NEXT: .cfi_offset r22, -80 +; CHECK-PWR7-NEXT: stdu r1, -416(r1) +; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 416 ; CHECK-PWR7-NEXT: .cfi_offset r23, -72 ; CHECK-PWR7-NEXT: .cfi_offset r24, -64 ; CHECK-PWR7-NEXT: .cfi_offset r25, -56 @@ -893,167 +886,156 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR7-NEXT: .cfi_offset r29, -24 ; CHECK-PWR7-NEXT: .cfi_offset r30, -16 ; CHECK-PWR7-NEXT: addi r3, r1, 304 -; CHECK-PWR7-NEXT: std r16, 336(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r23, 344(r1) # 8-byte Folded Spill ; CHECK-PWR7-NEXT: addi r4, r1, 320 -; CHECK-PWR7-NEXT: std r17, 344(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r18, 352(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r19, 360(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r20, 368(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r21, 376(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r22, 384(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r23, 392(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r24, 400(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r25, 408(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r26, 416(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r27, 424(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r28, 432(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r29, 440(r1) # 8-byte Folded Spill -; CHECK-PWR7-NEXT: std r30, 448(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r24, 352(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r25, 360(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r26, 368(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r27, 376(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r28, 384(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r29, 392(r1) # 8-byte Folded Spill +; CHECK-PWR7-NEXT: std r30, 400(r1) # 8-byte Folded Spill ; CHECK-PWR7-NEXT: stxvw4x v2, 0, r3 ; CHECK-PWR7-NEXT: lbz r3, 304(r1) ; CHECK-PWR7-NEXT: stxvw4x v3, 0, r4 -; CHECK-PWR7-NEXT: lbz r4, 320(r1) -; CHECK-PWR7-NEXT: lbz r5, 305(r1) -; CHECK-PWR7-NEXT: lbz r6, 321(r1) -; CHECK-PWR7-NEXT: lbz r7, 306(r1) -; CHECK-PWR7-NEXT: lbz r8, 322(r1) ; CHECK-PWR7-NEXT: lbz r9, 307(r1) -; CHECK-PWR7-NEXT: sub r3, r3, r4 ; CHECK-PWR7-NEXT: lbz r10, 323(r1) ; CHECK-PWR7-NEXT: lbz r11, 308(r1) -; CHECK-PWR7-NEXT: sub r5, r5, r6 ; CHECK-PWR7-NEXT: lbz r12, 324(r1) ; CHECK-PWR7-NEXT: lbz r0, 309(r1) -; CHECK-PWR7-NEXT: sub r6, r7, r8 ; CHECK-PWR7-NEXT: lbz r30, 325(r1) -; CHECK-PWR7-NEXT: lbz r29, 310(r1) ; CHECK-PWR7-NEXT: sub r9, r9, r10 +; CHECK-PWR7-NEXT: lbz r29, 310(r1) ; CHECK-PWR7-NEXT: lbz r28, 326(r1) -; CHECK-PWR7-NEXT: lbz r23, 313(r1) -; CHECK-PWR7-NEXT: sub r10, r11, r12 -; CHECK-PWR7-NEXT: lbz r22, 329(r1) -; CHECK-PWR7-NEXT: lbz r4, 314(r1) -; CHECK-PWR7-NEXT: sub r0, r0, r30 -; CHECK-PWR7-NEXT: lbz r21, 330(r1) -; CHECK-PWR7-NEXT: lbz r7, 315(r1) -; CHECK-PWR7-NEXT: sub r30, r29, r28 -; CHECK-PWR7-NEXT: srawi r20, r0, 31 -; CHECK-PWR7-NEXT: lbz r8, 331(r1) -; CHECK-PWR7-NEXT: lbz r11, 316(r1) -; CHECK-PWR7-NEXT: sub r23, r23, r22 -; CHECK-PWR7-NEXT: srawi r19, r30, 31 -; CHECK-PWR7-NEXT: lbz r12, 332(r1) -; CHECK-PWR7-NEXT: lbz r29, 317(r1) -; CHECK-PWR7-NEXT: sub r4, r4, r21 -; CHECK-PWR7-NEXT: add r0, r0, r20 -; CHECK-PWR7-NEXT: lbz r28, 333(r1) -; CHECK-PWR7-NEXT: lbz r22, 319(r1) -; CHECK-PWR7-NEXT: sub r7, r7, r8 -; CHECK-PWR7-NEXT: add r30, r30, r19 -; CHECK-PWR7-NEXT: lbz r21, 335(r1) +; CHECK-PWR7-NEXT: sub r11, r11, r12 ; CHECK-PWR7-NEXT: lbz r27, 311(r1) -; CHECK-PWR7-NEXT: sub r8, r11, r12 -; CHECK-PWR7-NEXT: xor r0, r0, r20 ; CHECK-PWR7-NEXT: lbz r26, 327(r1) +; CHECK-PWR7-NEXT: sub r0, r0, r30 ; CHECK-PWR7-NEXT: lbz r25, 312(r1) -; CHECK-PWR7-NEXT: sub r11, r29, r28 -; CHECK-PWR7-NEXT: srawi r28, r3, 31 ; CHECK-PWR7-NEXT: lbz r24, 328(r1) -; CHECK-PWR7-NEXT: sub r29, r22, r21 -; CHECK-PWR7-NEXT: add r3, r3, r28 -; CHECK-PWR7-NEXT: xor r30, r30, r19 +; CHECK-PWR7-NEXT: sub r29, r29, r28 +; CHECK-PWR7-NEXT: lbz r10, 315(r1) +; CHECK-PWR7-NEXT: lbz r12, 331(r1) ; CHECK-PWR7-NEXT: sub r27, r27, r26 -; CHECK-PWR7-NEXT: srawi r17, r29, 31 +; CHECK-PWR7-NEXT: lbz r30, 316(r1) +; CHECK-PWR7-NEXT: lbz r28, 332(r1) +; CHECK-PWR7-NEXT: sub r25, r25, r24 +; CHECK-PWR7-NEXT: lbz r4, 320(r1) +; CHECK-PWR7-NEXT: lbz r5, 305(r1) +; CHECK-PWR7-NEXT: sub r10, r10, r12 +; CHECK-PWR7-NEXT: lbz r6, 321(r1) +; CHECK-PWR7-NEXT: lbz r26, 317(r1) +; CHECK-PWR7-NEXT: sub r30, r30, r28 +; CHECK-PWR7-NEXT: lbz r24, 333(r1) +; CHECK-PWR7-NEXT: lbz r12, 319(r1) +; CHECK-PWR7-NEXT: sub r3, r3, r4 +; CHECK-PWR7-NEXT: lbz r28, 335(r1) +; CHECK-PWR7-NEXT: lbz r7, 306(r1) +; CHECK-PWR7-NEXT: sub r5, r5, r6 +; CHECK-PWR7-NEXT: lbz r8, 322(r1) +; CHECK-PWR7-NEXT: sub r26, r26, r24 +; CHECK-PWR7-NEXT: srawi r24, r5, 31 +; CHECK-PWR7-NEXT: lbz r23, 313(r1) +; CHECK-PWR7-NEXT: sub r12, r12, r28 +; CHECK-PWR7-NEXT: srawi r28, r3, 31 +; CHECK-PWR7-NEXT: xor r5, r5, r24 +; CHECK-PWR7-NEXT: lbz r4, 329(r1) +; CHECK-PWR7-NEXT: sub r7, r7, r8 ; CHECK-PWR7-NEXT: xor r3, r3, r28 -; CHECK-PWR7-NEXT: ld r20, 368(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: sub r26, r25, r24 -; CHECK-PWR7-NEXT: lbz r25, 318(r1) -; CHECK-PWR7-NEXT: lbz r24, 334(r1) -; CHECK-PWR7-NEXT: add r29, r29, r17 -; CHECK-PWR7-NEXT: xor r29, r29, r17 -; CHECK-PWR7-NEXT: srawi r18, r27, 31 -; CHECK-PWR7-NEXT: ld r19, 360(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: sub r12, r25, r24 -; CHECK-PWR7-NEXT: stb r29, 288(r1) -; CHECK-PWR7-NEXT: add r28, r27, r18 -; CHECK-PWR7-NEXT: srawi r29, r12, 31 -; CHECK-PWR7-NEXT: srawi r16, r26, 31 -; CHECK-PWR7-NEXT: xor r28, r28, r18 -; CHECK-PWR7-NEXT: ld r18, 352(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: add r12, r12, r29 -; CHECK-PWR7-NEXT: add r27, r26, r16 -; CHECK-PWR7-NEXT: xor r12, r12, r29 -; CHECK-PWR7-NEXT: srawi r29, r7, 31 -; CHECK-PWR7-NEXT: xor r27, r27, r16 -; CHECK-PWR7-NEXT: ld r16, 336(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: srawi r26, r8, 31 -; CHECK-PWR7-NEXT: srawi r25, r5, 31 -; CHECK-PWR7-NEXT: add r7, r7, r29 -; CHECK-PWR7-NEXT: add r8, r8, r26 -; CHECK-PWR7-NEXT: srawi r24, r6, 31 -; CHECK-PWR7-NEXT: add r5, r5, r25 -; CHECK-PWR7-NEXT: xor r7, r7, r29 -; CHECK-PWR7-NEXT: srawi r22, r9, 31 -; CHECK-PWR7-NEXT: srawi r21, r10, 31 -; CHECK-PWR7-NEXT: xor r8, r8, r26 -; CHECK-PWR7-NEXT: xor r5, r5, r25 -; CHECK-PWR7-NEXT: srawi r17, r11, 31 -; CHECK-PWR7-NEXT: srawi r26, r23, 31 -; CHECK-PWR7-NEXT: add r6, r6, r24 -; CHECK-PWR7-NEXT: add r9, r9, r22 -; CHECK-PWR7-NEXT: srawi r29, r4, 31 -; CHECK-PWR7-NEXT: add r10, r10, r21 -; CHECK-PWR7-NEXT: add r11, r11, r17 -; CHECK-PWR7-NEXT: add r25, r23, r26 -; CHECK-PWR7-NEXT: add r4, r4, r29 -; CHECK-PWR7-NEXT: xor r6, r6, r24 -; CHECK-PWR7-NEXT: xor r9, r9, r22 -; CHECK-PWR7-NEXT: xor r10, r10, r21 -; CHECK-PWR7-NEXT: xor r11, r11, r17 -; CHECK-PWR7-NEXT: xor r4, r4, r29 -; CHECK-PWR7-NEXT: xor r26, r25, r26 -; CHECK-PWR7-NEXT: addi r29, r1, 224 -; CHECK-PWR7-NEXT: stb r12, 272(r1) +; CHECK-PWR7-NEXT: lbz r6, 314(r1) +; CHECK-PWR7-NEXT: lbz r8, 330(r1) +; CHECK-PWR7-NEXT: sub r3, r3, r28 +; CHECK-PWR7-NEXT: srawi r28, r7, 31 +; CHECK-PWR7-NEXT: sub r5, r5, r24 +; CHECK-PWR7-NEXT: srawi r24, r9, 31 +; CHECK-PWR7-NEXT: xor r7, r7, r28 +; CHECK-PWR7-NEXT: xor r9, r9, r24 +; CHECK-PWR7-NEXT: sub r7, r7, r28 +; CHECK-PWR7-NEXT: srawi r28, r11, 31 +; CHECK-PWR7-NEXT: sub r9, r9, r24 +; CHECK-PWR7-NEXT: srawi r24, r0, 31 +; CHECK-PWR7-NEXT: xor r11, r11, r28 +; CHECK-PWR7-NEXT: xor r0, r0, r24 +; CHECK-PWR7-NEXT: sub r11, r11, r28 +; CHECK-PWR7-NEXT: srawi r28, r29, 31 +; CHECK-PWR7-NEXT: sub r0, r0, r24 +; CHECK-PWR7-NEXT: srawi r24, r27, 31 +; CHECK-PWR7-NEXT: sub r4, r23, r4 +; CHECK-PWR7-NEXT: xor r29, r29, r28 +; CHECK-PWR7-NEXT: lbz r23, 318(r1) +; CHECK-PWR7-NEXT: xor r27, r27, r24 +; CHECK-PWR7-NEXT: sub r29, r29, r28 +; CHECK-PWR7-NEXT: srawi r28, r25, 31 +; CHECK-PWR7-NEXT: sub r27, r27, r24 +; CHECK-PWR7-NEXT: srawi r24, r4, 31 +; CHECK-PWR7-NEXT: sub r6, r6, r8 +; CHECK-PWR7-NEXT: xor r25, r25, r28 +; CHECK-PWR7-NEXT: lbz r8, 334(r1) +; CHECK-PWR7-NEXT: xor r4, r4, r24 +; CHECK-PWR7-NEXT: sub r28, r25, r28 +; CHECK-PWR7-NEXT: srawi r25, r6, 31 +; CHECK-PWR7-NEXT: sub r4, r4, r24 +; CHECK-PWR7-NEXT: srawi r24, r10, 31 +; CHECK-PWR7-NEXT: xor r6, r6, r25 +; CHECK-PWR7-NEXT: xor r10, r10, r24 +; CHECK-PWR7-NEXT: sub r6, r6, r25 +; CHECK-PWR7-NEXT: srawi r25, r30, 31 +; CHECK-PWR7-NEXT: sub r10, r10, r24 +; CHECK-PWR7-NEXT: srawi r24, r26, 31 +; CHECK-PWR7-NEXT: sub r8, r23, r8 +; CHECK-PWR7-NEXT: xor r30, r30, r25 +; CHECK-PWR7-NEXT: ld r23, 344(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: xor r26, r26, r24 +; CHECK-PWR7-NEXT: sub r30, r30, r25 +; CHECK-PWR7-NEXT: srawi r25, r12, 31 +; CHECK-PWR7-NEXT: sub r26, r26, r24 +; CHECK-PWR7-NEXT: srawi r24, r8, 31 +; CHECK-PWR7-NEXT: xor r12, r12, r25 +; CHECK-PWR7-NEXT: xor r8, r8, r24 +; CHECK-PWR7-NEXT: sub r12, r12, r25 +; CHECK-PWR7-NEXT: addi r25, r1, 272 +; CHECK-PWR7-NEXT: sub r8, r8, r24 +; CHECK-PWR7-NEXT: stb r12, 288(r1) ; CHECK-PWR7-NEXT: addi r12, r1, 288 -; CHECK-PWR7-NEXT: addi r25, r1, 208 -; CHECK-PWR7-NEXT: stb r11, 256(r1) -; CHECK-PWR7-NEXT: addi r11, r1, 272 -; CHECK-PWR7-NEXT: ld r24, 400(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: stb r8, 240(r1) -; CHECK-PWR7-NEXT: stb r7, 224(r1) -; CHECK-PWR7-NEXT: stb r4, 208(r1) -; CHECK-PWR7-NEXT: stb r26, 192(r1) -; CHECK-PWR7-NEXT: stb r27, 176(r1) -; CHECK-PWR7-NEXT: stb r28, 160(r1) -; CHECK-PWR7-NEXT: stb r30, 144(r1) +; CHECK-PWR7-NEXT: stb r8, 272(r1) +; CHECK-PWR7-NEXT: stb r26, 256(r1) +; CHECK-PWR7-NEXT: stb r30, 240(r1) +; CHECK-PWR7-NEXT: stb r10, 224(r1) +; CHECK-PWR7-NEXT: stb r6, 208(r1) +; CHECK-PWR7-NEXT: stb r4, 192(r1) +; CHECK-PWR7-NEXT: stb r28, 176(r1) +; CHECK-PWR7-NEXT: stb r27, 160(r1) +; CHECK-PWR7-NEXT: stb r29, 144(r1) ; CHECK-PWR7-NEXT: stb r0, 128(r1) -; CHECK-PWR7-NEXT: stb r10, 112(r1) +; CHECK-PWR7-NEXT: stb r11, 112(r1) ; CHECK-PWR7-NEXT: stb r9, 96(r1) -; CHECK-PWR7-NEXT: stb r6, 80(r1) +; CHECK-PWR7-NEXT: stb r7, 80(r1) ; CHECK-PWR7-NEXT: stb r5, 64(r1) ; CHECK-PWR7-NEXT: stb r3, 48(r1) ; CHECK-PWR7-NEXT: addi r8, r1, 256 -; CHECK-PWR7-NEXT: addi r7, r1, 240 +; CHECK-PWR7-NEXT: addi r26, r1, 240 ; CHECK-PWR7-NEXT: lxvw4x v2, 0, r12 -; CHECK-PWR7-NEXT: lxvw4x v3, 0, r11 +; CHECK-PWR7-NEXT: lxvw4x v3, 0, r25 +; CHECK-PWR7-NEXT: addi r10, r1, 224 +; CHECK-PWR7-NEXT: addi r30, r1, 208 ; CHECK-PWR7-NEXT: addi r3, r1, 192 ; CHECK-PWR7-NEXT: addi r4, r1, 176 ; CHECK-PWR7-NEXT: addi r5, r1, 160 ; CHECK-PWR7-NEXT: addi r6, r1, 144 ; CHECK-PWR7-NEXT: lxvw4x v4, 0, r8 -; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7 -; CHECK-PWR7-NEXT: lxvw4x v0, 0, r29 -; CHECK-PWR7-NEXT: lxvw4x v1, 0, r25 +; CHECK-PWR7-NEXT: lxvw4x v5, 0, r26 ; CHECK-PWR7-NEXT: addi r7, r1, 128 ; CHECK-PWR7-NEXT: addi r8, r1, 112 -; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3 -; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4 +; CHECK-PWR7-NEXT: lxvw4x v0, 0, r10 +; CHECK-PWR7-NEXT: lxvw4x v1, 0, r30 ; CHECK-PWR7-NEXT: vmrghb v2, v3, v2 ; CHECK-PWR7-NEXT: addi r9, r1, 96 -; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5 -; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6 +; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3 +; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4 ; CHECK-PWR7-NEXT: addi r3, r1, 80 ; CHECK-PWR7-NEXT: addi r4, r1, 64 +; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5 +; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6 ; CHECK-PWR7-NEXT: addi r5, r1, 48 ; CHECK-PWR7-NEXT: vmrghb v4, v5, v4 ; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7 @@ -1063,29 +1045,26 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr ; CHECK-PWR7-NEXT: lxvw4x v10, 0, r3 ; CHECK-PWR7-NEXT: vmrghb v6, v7, v6 ; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4 -; CHECK-PWR7-NEXT: ld r30, 448(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghb v3, v8, v3 ; CHECK-PWR7-NEXT: lxvw4x v8, 0, r5 -; CHECK-PWR7-NEXT: ld r29, 440(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghb v5, v9, v5 -; CHECK-PWR7-NEXT: ld r28, 432(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r27, 424(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r30, 400(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r29, 392(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghb v1, v10, v1 -; CHECK-PWR7-NEXT: ld r26, 416(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r25, 408(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r28, 384(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r27, 376(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghb v7, v8, v7 -; CHECK-PWR7-NEXT: ld r23, 392(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r22, 384(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r26, 368(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r25, 360(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghh v2, v4, v2 -; CHECK-PWR7-NEXT: ld r21, 376(r1) # 8-byte Folded Reload -; CHECK-PWR7-NEXT: ld r17, 344(r1) # 8-byte Folded Reload +; CHECK-PWR7-NEXT: ld r24, 352(r1) # 8-byte Folded Reload ; CHECK-PWR7-NEXT: vmrghh v4, v6, v0 ; CHECK-PWR7-NEXT: vmrghh v3, v5, v3 ; CHECK-PWR7-NEXT: vmrghh v5, v7, v1 ; CHECK-PWR7-NEXT: vmrghw v2, v4, v2 ; CHECK-PWR7-NEXT: vmrghw v3, v5, v3 ; CHECK-PWR7-NEXT: xxmrghd v2, v3, v2 -; CHECK-PWR7-NEXT: addi r1, r1, 464 +; CHECK-PWR7-NEXT: addi r1, r1, 416 ; CHECK-PWR7-NEXT: blr entry: %vecext = extractelement <16 x i8> %a, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 64c9e35146f633..29e481198246c7 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -734,8 +734,8 @@ define i32 @abs_i32(i32 %x) { ; RV32I-LABEL: abs_i32: ; RV32I: # %bb.0: ; RV32I-NEXT: srai a1, a0, 31 -; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs_i32: diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index a5e3061f50953e..0127ac4d33a5b2 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -947,8 +947,8 @@ define i32 @abs_i32(i32 %x) { ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: srai a1, a0, 63 -; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: abs_i32: @@ -961,14 +961,13 @@ define i32 @abs_i32(i32 %x) { ret i32 %abs } -; FIXME: We can remove the sext.w by using addw for RV64I and negw for RV64ZBB. +; FIXME: We can remove the sext.w on RV64ZBB by using negw. define signext i32 @abs_i32_sext(i32 signext %x) { ; RV64I-LABEL: abs_i32_sext: ; RV64I: # %bb.0: ; RV64I-NEXT: srai a1, a0, 63 -; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: abs_i32_sext: @@ -987,8 +986,8 @@ define i64 @abs_i64(i64 %x) { ; RV64I-LABEL: abs_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: srai a1, a0, 63 -; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: abs_i64: diff --git a/llvm/test/CodeGen/Thumb/iabs.ll b/llvm/test/CodeGen/Thumb/iabs.ll index 2d51288b5242a5..6bebea67e265a3 100644 --- a/llvm/test/CodeGen/Thumb/iabs.ll +++ b/llvm/test/CodeGen/Thumb/iabs.ll @@ -6,8 +6,8 @@ define i8 @test_i8(i8 %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: sxtb r1, r0 ; CHECK-NEXT: asrs r1, r1, #7 -; CHECK-NEXT: adds r0, r0, r1 ; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: bx lr %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 @@ -20,8 +20,8 @@ define i16 @test_i16(i16 %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: sxth r1, r0 ; CHECK-NEXT: asrs r1, r1, #15 -; CHECK-NEXT: adds r0, r0, r1 ; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: bx lr %tmp1neg = sub i16 0, %a %b = icmp sgt i16 %a, -1 @@ -33,8 +33,8 @@ define i32 @test_i32(i32 %a) nounwind { ; CHECK-LABEL: test_i32: ; CHECK: @ %bb.0: ; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: adds r0, r0, r1 ; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: bx lr %tmp1neg = sub i32 0, %a %b = icmp sgt i32 %a, -1 @@ -46,10 +46,10 @@ define i64 @test_i64(i64 %a) nounwind { ; CHECK-LABEL: test_i64: ; CHECK: @ %bb.0: ; CHECK-NEXT: asrs r2, r1, #31 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r2 -; CHECK-NEXT: eors r0, r2 ; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbcs r1, r2 ; CHECK-NEXT: bx lr %tmp1neg = sub i64 0, %a %b = icmp sgt i64 %a, -1 diff --git a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll index bd091cf2b6f848..152ac0fa3f1684 100644 --- a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll +++ b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll @@ -1,7 +1,51 @@ -; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s -; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMB +; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMBV6 + +; The scheduler used to ignore OptionalDefs, and could unwittingly insert +; a flag-setting instruction in between an ADDS and the corresponding ADC. + +; FIXME: The ABS lowering changed to XOR followed by SUB so this may no longer +; be testing what it used to. define i1 @test(i64 %arg) { +; THUMB-LABEL: test: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: .save {r4, lr} +; THUMB-NEXT: push {r4, lr} +; THUMB-NEXT: asrs r2, r1, #31 +; THUMB-NEXT: movs r3, r1 +; THUMB-NEXT: eors r3, r2 +; THUMB-NEXT: movs r4, r0 +; THUMB-NEXT: eors r4, r2 +; THUMB-NEXT: subs r4, r4, r2 +; THUMB-NEXT: sbcs r3, r2 +; THUMB-NEXT: eors r3, r1 +; THUMB-NEXT: eors r0, r4 +; THUMB-NEXT: orrs r0, r3 +; THUMB-NEXT: rsbs r1, r0, #0 +; THUMB-NEXT: adcs r0, r1 +; THUMB-NEXT: pop {r4} +; THUMB-NEXT: pop {r1} +; THUMB-NEXT: bx r1 +; +; THUMBV6-LABEL: test: +; THUMBV6: @ %bb.0: @ %entry +; THUMBV6-NEXT: .save {r4, lr} +; THUMBV6-NEXT: push {r4, lr} +; THUMBV6-NEXT: asrs r2, r1, #31 +; THUMBV6-NEXT: mov r3, r1 +; THUMBV6-NEXT: eors r3, r2 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: eors r4, r2 +; THUMBV6-NEXT: subs r4, r4, r2 +; THUMBV6-NEXT: sbcs r3, r2 +; THUMBV6-NEXT: eors r3, r1 +; THUMBV6-NEXT: eors r0, r4 +; THUMBV6-NEXT: orrs r0, r3 +; THUMBV6-NEXT: rsbs r1, r0, #0 +; THUMBV6-NEXT: adcs r0, r1 +; THUMBV6-NEXT: pop {r4, pc} entry: %ispos = icmp sgt i64 %arg, -1 %neg = sub i64 0, %arg @@ -9,10 +53,3 @@ entry: %cmp2 = icmp eq i64 %sel, %arg ret i1 %cmp2 } - -; The scheduler used to ignore OptionalDefs, and could unwittingly insert -; a flag-setting instruction in between an ADDS and the corresponding ADC. - -; CHECK: adds -; CHECK-NOT: eors -; CHECK: adcs diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll index 02a2a14c2a5cc2..88259ba7588036 100644 --- a/llvm/test/CodeGen/Thumb2/abs.ll +++ b/llvm/test/CodeGen/Thumb2/abs.ll @@ -120,18 +120,18 @@ define i64 @abs64(i64 %x) { ; CHECKT1-LABEL: abs64: ; CHECKT1: @ %bb.0: ; CHECKT1-NEXT: asrs r2, r1, #31 -; CHECKT1-NEXT: adds r0, r0, r2 -; CHECKT1-NEXT: adcs r1, r2 -; CHECKT1-NEXT: eors r0, r2 ; CHECKT1-NEXT: eors r1, r2 +; CHECKT1-NEXT: eors r0, r2 +; CHECKT1-NEXT: subs r0, r0, r2 +; CHECKT1-NEXT: sbcs r1, r2 ; CHECKT1-NEXT: bx lr ; ; CHECKT2-LABEL: abs64: ; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: adds.w r0, r0, r1, asr #31 -; CHECKT2-NEXT: adc.w r2, r1, r1, asr #31 ; CHECKT2-NEXT: eor.w r0, r0, r1, asr #31 -; CHECKT2-NEXT: eor.w r1, r2, r1, asr #31 +; CHECKT2-NEXT: eor.w r2, r1, r1, asr #31 +; CHECKT2-NEXT: subs.w r0, r0, r1, asr #31 +; CHECKT2-NEXT: sbc.w r1, r2, r1, asr #31 ; CHECKT2-NEXT: bx lr %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) ret i64 %abs @@ -141,8 +141,8 @@ define i32 @abs32(i32 %x) { ; CHECKT1-LABEL: abs32: ; CHECKT1: @ %bb.0: ; CHECKT1-NEXT: asrs r1, r0, #31 -; CHECKT1-NEXT: adds r0, r0, r1 ; CHECKT1-NEXT: eors r0, r1 +; CHECKT1-NEXT: subs r0, r0, r1 ; CHECKT1-NEXT: bx lr ; ; CHECKT2-LABEL: abs32: @@ -160,15 +160,15 @@ define i16 @abs16(i16 %x) { ; CHECKT1: @ %bb.0: ; CHECKT1-NEXT: sxth r1, r0 ; CHECKT1-NEXT: asrs r1, r1, #15 -; CHECKT1-NEXT: adds r0, r0, r1 ; CHECKT1-NEXT: eors r0, r1 +; CHECKT1-NEXT: subs r0, r0, r1 ; CHECKT1-NEXT: bx lr ; ; CHECKT2-LABEL: abs16: ; CHECKT2: @ %bb.0: ; CHECKT2-NEXT: sxth r1, r0 -; CHECKT2-NEXT: add.w r0, r0, r1, asr #15 ; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15 +; CHECKT2-NEXT: sub.w r0, r0, r1, asr #15 ; CHECKT2-NEXT: bx lr %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) ret i16 %abs @@ -180,26 +180,26 @@ define i128 @abs128(i128 %x) { ; CHECKT1-NEXT: .save {r4, lr} ; CHECKT1-NEXT: push {r4, lr} ; CHECKT1-NEXT: asrs r4, r3, #31 -; CHECKT1-NEXT: adds r0, r0, r4 -; CHECKT1-NEXT: adcs r1, r4 -; CHECKT1-NEXT: adcs r2, r4 -; CHECKT1-NEXT: adcs r3, r4 -; CHECKT1-NEXT: eors r0, r4 -; CHECKT1-NEXT: eors r1, r4 -; CHECKT1-NEXT: eors r2, r4 ; CHECKT1-NEXT: eors r3, r4 +; CHECKT1-NEXT: eors r2, r4 +; CHECKT1-NEXT: eors r1, r4 +; CHECKT1-NEXT: eors r0, r4 +; CHECKT1-NEXT: subs r0, r0, r4 +; CHECKT1-NEXT: sbcs r1, r4 +; CHECKT1-NEXT: sbcs r2, r4 +; CHECKT1-NEXT: sbcs r3, r4 ; CHECKT1-NEXT: pop {r4, pc} ; ; CHECKT2-LABEL: abs128: ; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: adds.w r0, r0, r3, asr #31 -; CHECKT2-NEXT: adcs.w r1, r1, r3, asr #31 ; CHECKT2-NEXT: eor.w r0, r0, r3, asr #31 -; CHECKT2-NEXT: adcs.w r2, r2, r3, asr #31 ; CHECKT2-NEXT: eor.w r1, r1, r3, asr #31 -; CHECKT2-NEXT: adc.w r12, r3, r3, asr #31 +; CHECKT2-NEXT: subs.w r0, r0, r3, asr #31 ; CHECKT2-NEXT: eor.w r2, r2, r3, asr #31 -; CHECKT2-NEXT: eor.w r3, r12, r3, asr #31 +; CHECKT2-NEXT: sbcs.w r1, r1, r3, asr #31 +; CHECKT2-NEXT: eor.w r12, r3, r3, asr #31 +; CHECKT2-NEXT: sbcs.w r2, r2, r3, asr #31 +; CHECKT2-NEXT: sbc.w r3, r12, r3, asr #31 ; CHECKT2-NEXT: bx lr %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) ret i128 %abs diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll index 6a8dee1906f1bc..0913bf0eba220e 100644 --- a/llvm/test/CodeGen/WebAssembly/PR41149.ll +++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll @@ -13,9 +13,9 @@ define void @mod() { ; CHECK-NEXT: i32.const 31 ; CHECK-NEXT: i32.shr_s ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: i32.add -; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.xor +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.sub ; CHECK-NEXT: i32.store8 0 %tmp = load <4 x i8>, <4 x i8>* undef %tmp2 = icmp slt <4 x i8> %tmp, zeroinitializer diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index c03923aa47ff53..df83381ababd34 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -25,11 +25,11 @@ declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) define i8 @test_i8(i8 %a) nounwind { ; X64-LABEL: test_i8: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: sarb $7, %cl -; X64-NEXT: leal (%rdi,%rcx), %eax ; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; @@ -38,8 +38,8 @@ define i8 @test_i8(i8 %a) nounwind { ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarb $7, %cl -; X86-NEXT: addb %cl, %al ; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl %r = call i8 @llvm.abs.i8(i8 %a, i1 false) ret i8 %r @@ -197,8 +197,8 @@ define <2 x i32> @test_v2i32(<2 x i32> %a) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i32: @@ -226,8 +226,8 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v3i32: @@ -261,8 +261,8 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i32: @@ -309,12 +309,12 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 ; SSE-NEXT: psrad $31, %xmm2 -; SSE-NEXT: paddd %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: psubd %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: psrad $31, %xmm2 -; SSE-NEXT: paddd %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: psubd %xmm2, %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: test_v8i32: @@ -496,86 +496,86 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind { ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb %cl, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %cl ; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb %dl, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %dl ; X86-NEXT: xorb %al, %dl +; X86-NEXT: subb %al, %dl ; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb %ah, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %ah ; X86-NEXT: xorb %al, %ah +; X86-NEXT: subb %al, %ah ; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb %ch, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %ch ; X86-NEXT: xorb %al, %ch +; X86-NEXT: subb %al, %ch ; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb %dh, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %dh ; X86-NEXT: xorb %al, %dh +; X86-NEXT: subb %al, %dh ; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %bl ; X86-NEXT: xorb %al, %bl +; X86-NEXT: subb %al, %bl ; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb %bh, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %bh ; X86-NEXT: xorb %al, %bh +; X86-NEXT: subb %al, %bh ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %cl ; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %cl ; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: movb {{[0-9]+}}(%esp), %bh ; X86-NEXT: movb %bh, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %bh ; X86-NEXT: xorb %al, %bh +; X86-NEXT: subb %al, %bh ; X86-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %bl ; X86-NEXT: xorb %al, %bl +; X86-NEXT: subb %al, %bl ; X86-NEXT: movb {{[0-9]+}}(%esp), %dh ; X86-NEXT: movb %dh, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %dh ; X86-NEXT: xorb %al, %dh +; X86-NEXT: subb %al, %dh ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch ; X86-NEXT: movb %ch, %al ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %ch ; X86-NEXT: xorb %al, %ch +; X86-NEXT: subb %al, %ch ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: movl %edx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %dl ; X86-NEXT: xorb %al, %dl +; X86-NEXT: subb %al, %dl ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al -; X86-NEXT: addb %al, %cl ; X86-NEXT: xorb %al, %cl +; X86-NEXT: subb %al, %cl ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb %al, %ah ; X86-NEXT: sarb $7, %ah -; X86-NEXT: addb %ah, %al ; X86-NEXT: xorb %ah, %al +; X86-NEXT: subb %ah, %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb %al, 15(%esi) ; X86-NEXT: movb %cl, 14(%esi) diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index fd5930217cb06e..de20b4dccb20a7 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -110,13 +110,13 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE2-NEXT: paddq %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE2-NEXT: paddq %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSE42-LABEL: combine_v4i64_abs_abs: diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index a00ec41516c837..1cbb8360440d94 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -15,17 +15,17 @@ define i8 @test_i8(i8 %a) nounwind { ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarb $7, %cl -; X86-NEXT: addb %cl, %al ; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: sarb $7, %cl -; X64-NEXT: leal (%rdi,%rcx), %eax ; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp1neg = sub i8 0, %a @@ -40,8 +40,8 @@ define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X86-NO-CMOV-NEXT: movl %eax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx -; X86-NO-CMOV-NEXT: addl %ecx, %eax ; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: subl %ecx, %eax ; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NO-CMOV-NEXT: retl ; @@ -71,8 +71,8 @@ define i32 @test_i32(i32 %a) nounwind { ; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NO-CMOV-NEXT: movl %eax, %ecx ; X86-NO-CMOV-NEXT: sarl $31, %ecx -; X86-NO-CMOV-NEXT: addl %ecx, %eax ; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: subl %ecx, %eax ; X86-NO-CMOV-NEXT: retl ; ; X86-CMOV-LABEL: test_i32: diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll index f34f683db078e8..ee2564660a0668 100644 --- a/llvm/test/CodeGen/X86/neg-abs.ll +++ b/llvm/test/CodeGen/X86/neg-abs.ll @@ -154,24 +154,21 @@ define i128 @neg_abs_i128(i128 %x) nounwind { define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: sub_abs_i8: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarb $7, %dl -; X86-NEXT: addb %dl, %cl -; X86-NEXT: xorb %dl, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarb $7, %al +; X86-NEXT: xorb %al, %cl ; X86-NEXT: subb %cl, %al +; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; ; X64-LABEL: sub_abs_i8: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: sarb $7, %cl -; X64-NEXT: addb %cl, %dil -; X64-NEXT: xorb %cl, %dil +; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb $7, %al +; X64-NEXT: xorb %al, %dil ; X64-NEXT: subb %dil, %al -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: addb %sil, %al ; X64-NEXT: retq %abs = tail call i8 @llvm.abs.i8(i8 %x, i1 false) %neg = sub nsw i8 %y, %abs @@ -181,13 +178,12 @@ define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind { define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind { ; X86-LABEL: sub_abs_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $15, %edx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $15, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; @@ -207,13 +203,12 @@ define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind { define i32 @sub_abs_i32(i32 %x, i32 %y) nounwind { ; X86-LABEL: sub_abs_i32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: addl %edx, %ecx -; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sarl $31, %eax +; X86-NEXT: xorl %eax, %ecx ; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; ; X64-LABEL: sub_abs_i32: diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll index 405d9eaa2c8340..d892297d81ea64 100644 --- a/llvm/test/CodeGen/X86/viabs.ll +++ b/llvm/test/CodeGen/X86/viabs.ll @@ -12,8 +12,8 @@ define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v4i32: @@ -51,8 +51,8 @@ define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_ge_v4i32: @@ -176,8 +176,8 @@ define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_le_v4i32: @@ -215,12 +215,12 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubd %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v8i32: @@ -263,12 +263,12 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubd %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_ge_v8i32: @@ -413,12 +413,12 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 -; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubd %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_le_v8i32: @@ -461,20 +461,20 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm0 ; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: psubd %xmm4, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: psubd %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: psubd %xmm4, %xmm2 ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 -; SSE2-NEXT: paddd %xmm4, %xmm3 ; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: psubd %xmm4, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_le_16i32: @@ -527,8 +527,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: paddq %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubq %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_ge_v2i64: @@ -536,8 +536,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind { ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: psrad $31, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSSE3-NEXT: paddq %xmm1, %xmm0 ; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: psubq %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test_abs_ge_v2i64: @@ -577,13 +577,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE2-NEXT: paddq %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE2-NEXT: paddq %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v4i64: @@ -591,13 +591,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind { ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: psrad $31, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSSE3-NEXT: paddq %xmm2, %xmm0 ; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: psubq %xmm2, %xmm0 ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: psrad $31, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSSE3-NEXT: paddq %xmm2, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: psubq %xmm2, %xmm1 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test_abs_gt_v4i64: @@ -646,23 +646,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm0 ; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: psubq %xmm4, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: psubq %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: psubq %xmm4, %xmm2 ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm3 ; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: psubq %xmm4, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_le_v8i64: @@ -670,23 +670,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind { ; SSSE3-NEXT: movdqa %xmm0, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm0 ; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: psubq %xmm4, %xmm0 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm1 ; SSSE3-NEXT: pxor %xmm4, %xmm1 +; SSSE3-NEXT: psubq %xmm4, %xmm1 ; SSSE3-NEXT: movdqa %xmm2, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm2 ; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: psubq %xmm4, %xmm2 ; SSSE3-NEXT: movdqa %xmm3, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm3 ; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: psubq %xmm4, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test_abs_le_v8i64: @@ -754,23 +754,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm0 ; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: psubq %xmm4, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: psubq %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: psubq %xmm4, %xmm2 ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: paddq %xmm4, %xmm3 ; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: psubq %xmm4, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_le_v8i64_fold: @@ -782,23 +782,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind { ; SSSE3-NEXT: movdqa %xmm0, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm0 ; SSSE3-NEXT: pxor %xmm4, %xmm0 +; SSSE3-NEXT: psubq %xmm4, %xmm0 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm1 ; SSSE3-NEXT: pxor %xmm4, %xmm1 +; SSSE3-NEXT: psubq %xmm4, %xmm1 ; SSSE3-NEXT: movdqa %xmm2, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm2 ; SSSE3-NEXT: pxor %xmm4, %xmm2 +; SSSE3-NEXT: psubq %xmm4, %xmm2 ; SSSE3-NEXT: movdqa %xmm3, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: paddq %xmm4, %xmm3 ; SSSE3-NEXT: pxor %xmm4, %xmm3 +; SSSE3-NEXT: psubq %xmm4, %xmm3 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test_abs_le_v8i64_fold: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected index 78abaf51680689..9ae01c167b8da3 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected @@ -9,17 +9,17 @@ define i8 @test_i8(i8 %a) nounwind { ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarb $7, %cl -; X86-NEXT: addb %cl, %al ; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: movl %edi, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: sarb $7, %cl -; X64-NEXT: leal (%rdi,%rcx), %eax ; X64-NEXT: xorb %cl, %al +; X64-NEXT: subb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp1neg = sub i8 0, %a @@ -34,8 +34,8 @@ define i16 @test_i16(i16 %a) nounwind { ; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X86-NO-CMOV-NEXT: movl %eax, %ecx ; X86-NO-CMOV-NEXT: sarl $15, %ecx -; X86-NO-CMOV-NEXT: addl %ecx, %eax ; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: subl %ecx, %eax ; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NO-CMOV-NEXT: retl ; @@ -65,8 +65,8 @@ define i32 @test_i32(i32 %a) nounwind { ; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NO-CMOV-NEXT: movl %eax, %ecx ; X86-NO-CMOV-NEXT: sarl $31, %ecx -; X86-NO-CMOV-NEXT: addl %ecx, %eax ; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: subl %ecx, %eax ; X86-NO-CMOV-NEXT: retl ; ; X86-CMOV-LABEL: test_i32: