diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5c35b3327c16d..1556b63852280 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2790,25 +2790,40 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { [[fallthrough]]; case ARM::RSBrr: case ARM::RSBri: + case ARM::RSBrsi: + case ARM::RSBrsr: case ARM::RSCrr: case ARM::RSCri: + case ARM::RSCrsr: + case ARM::RSCrsi: case ARM::ADDrr: case ARM::ADDri: + case ARM::ADDrsi: + case ARM::ADDrsr: case ARM::ADCrr: case ARM::ADCri: case ARM::SUBrr: case ARM::SUBri: + case ARM::SUBrsr: + case ARM::SUBrsi: case ARM::SBCrr: case ARM::SBCri: + case ARM::SBCrsi: + case ARM::SBCrsr: case ARM::t2RSBri: + case ARM::t2RSBrr: + case ARM::t2RSBrs: case ARM::t2ADDrr: case ARM::t2ADDri: + case ARM::t2ADDrs: case ARM::t2ADCrr: case ARM::t2ADCri: case ARM::t2SUBrr: case ARM::t2SUBri: + case ARM::t2SUBrs: case ARM::t2SBCrr: case ARM::t2SBCri: + case ARM::t2SBCrs: case ARM::ANDrr: case ARM::ANDri: case ARM::ANDrsr: diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..89095ed4ed17c 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -182,7 +182,10 @@ class ARMDAGToDAGISel : public SelectionDAGISel { return ARM_AM::getT2SOImmVal(~Imm) != -1; } - // Include the pieces autogenerated from the target description. + // Preference helper: for SUB with encodable immediate LHS, select RSBri + // and materialize any RHS shift first when needed. Returns true if handled. + bool tryPreferRSBForSUB(SDNode *N); + #include "ARMGenDAGISel.inc" private: @@ -3492,6 +3495,81 @@ getContiguousRangeOfSetBits(const APInt &A) { return std::make_pair(FirstOne, LastOne); } +bool ARMDAGToDAGISel::tryPreferRSBForSUB(SDNode *N) { + if (Subtarget->isThumb1Only()) + return false; + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + const auto *CI = dyn_cast(LHS); + if (!CI) + return false; + + unsigned Imm = (unsigned)CI->getZExtValue(); + bool Encodable = Subtarget->isThumb() ? is_t2_so_imm(Imm) : is_so_imm(Imm); + if (!Encodable) + return false; + + SDLoc dl(N); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue ImmOp = CurDAG->getTargetConstant(Imm, dl, MVT::i32); + + // Materialize shift if RHS is shifted + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(RHS.getOpcode()); + SDValue Rn = RHS; + if (ShOpcVal != ARM_AM::no_shift) { + const ConstantSDNode *ShC = dyn_cast(RHS.getOperand(1)); + if (!ShC) + return false; // can't safely materialize variable shift here + unsigned ShAmt = ShC->getZExtValue(); + if (Subtarget->isThumb()) { + unsigned ShOpc = 0; + switch (ShOpcVal) { + default: + ShOpc = 0; + break; + case ARM_AM::lsl: + ShOpc = ARM::t2LSLri; + break; + case ARM_AM::lsr: + ShOpc = ARM::t2LSRri; + break; + case ARM_AM::asr: + ShOpc = ARM::t2ASRri; + break; + case ARM_AM::ror: + ShOpc = ARM::t2RORri; + break; + } + if (!ShOpc) + return false; + SDValue ShAmtOp = CurDAG->getTargetConstant(ShAmt, dl, MVT::i32); + SDValue OpsShift[] = {RHS.getOperand(0), ShAmtOp, getAL(CurDAG, dl), Reg0, + Reg0}; + MachineSDNode *ShN = + CurDAG->getMachineNode(ShOpc, dl, MVT::i32, OpsShift); + Rn = SDValue(ShN, 0); + } else { + unsigned SOpc = ARM_AM::getSORegOpc(ShOpcVal, ShAmt); + SDValue ShImmOp = CurDAG->getTargetConstant(SOpc, dl, MVT::i32); + SDValue OpsShift[] = {RHS.getOperand(0), ShImmOp, getAL(CurDAG, dl), Reg0, + Reg0}; + MachineSDNode *ShN = + CurDAG->getMachineNode(ARM::MOVsi, dl, MVT::i32, OpsShift); + Rn = SDValue(ShN, 0); + } + } + + if (Subtarget->isThumb()) { + SDValue Ops[] = {Rn, ImmOp, getAL(CurDAG, dl), Reg0, Reg0}; + CurDAG->SelectNodeTo(N, ARM::t2RSBri, MVT::i32, Ops); + } else { + SDValue Ops[] = {Rn, ImmOp, getAL(CurDAG, dl), Reg0, Reg0}; + CurDAG->SelectNodeTo(N, ARM::RSBri, MVT::i32, Ops); + } + return true; +} + void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { assert(N->getOpcode() == ARMISD::CMPZ); SwitchEQNEToPLMI = false; @@ -3643,6 +3721,10 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryInlineAsm(N)) return; break; + case ISD::SUB: + if (tryPreferRSBForSUB(N)) + return; + break; case ISD::Constant: { unsigned Val = N->getAsZExtVal(); // If we can't materialize the constant we need to use a literal pool diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9052cbfa89deb..079ac82d93463 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -647,8 +647,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, if (!Subtarget->hasV8_1MMainlineOps()) setOperationAction(ISD::UCMP, MVT::i32, Custom); - if (!Subtarget->isThumb1Only()) + if (!Subtarget->isThumb1Only()) { setOperationAction(ISD::ABS, MVT::i32, Custom); + setOperationAction(ISD::ABDS, MVT::i32, Custom); + } + setOperationAction(ISD::ABDU, MVT::i32, Custom); setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -5093,6 +5096,28 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // Canonicalise absolute difference patterns in SELECT before converting to + // SELECT_CC: + // select(setcc LHS, RHS, cc), sub(LHS, RHS), sub(RHS, LHS) -> + // select(setcc LHS, RHS, cc), sub(LHS, RHS), neg(sub(LHS, RHS)) + if (Cond.getOpcode() == ISD::SETCC && SelectTrue.getOpcode() == ISD::SUB && + SelectFalse.getOpcode() == ISD::SUB) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + + if (SelectTrue.getOperand(0) == LHS && SelectTrue.getOperand(1) == RHS && + SelectFalse.getOperand(0) == RHS && SelectFalse.getOperand(1) == LHS) { + SelectTrue->dropFlags(SDNodeFlags::PoisonGeneratingFlags); + SelectFalse = DAG.getNegative(SelectTrue, dl, SelectTrue.getValueType()); + } else if (SelectTrue.getOperand(0) == RHS && + SelectTrue.getOperand(1) == LHS && + SelectFalse.getOperand(0) == LHS && + SelectFalse.getOperand(1) == RHS) { + SelectFalse->dropFlags(SDNodeFlags::PoisonGeneratingFlags); + SelectTrue = DAG.getNegative(SelectFalse, dl, SelectFalse.getValueType()); + } + } + // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the // undefined bits before doing a full-word comparison with zero. Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, @@ -5383,6 +5408,28 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::AND, dl, VT, LHS, Shift); } + + // Canonicalise absolute difference patterns: + // select_cc LHS, RHS, sub(LHS, RHS), sub(RHS, LHS), cc -> + // select_cc LHS, RHS, sub(LHS, RHS), neg(sub(LHS, RHS)), cc + // + // select_cc LHS, RHS, sub(RHS, LHS), sub(LHS, RHS), cc -> + // select_cc LHS, RHS, neg(sub(LHS, RHS)), sub(LHS, RHS), cc + // The second forms can be matched into subs+cmov with negation. + // NOTE: Drop poison generating flags from the negated operand to avoid + // inadvertently propagating poison after the canonicalisation. + if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::SUB) { + if (TrueVal.getOperand(0) == LHS && TrueVal.getOperand(1) == RHS && + FalseVal.getOperand(0) == RHS && FalseVal.getOperand(1) == LHS) { + TrueVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); + FalseVal = DAG.getNegative(TrueVal, dl, TrueVal.getValueType()); + } else if (TrueVal.getOperand(0) == RHS && TrueVal.getOperand(1) == LHS && + FalseVal.getOperand(0) == LHS && + FalseVal.getOperand(1) == RHS) { + FalseVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); + TrueVal = DAG.getNegative(FalseVal, dl, FalseVal.getValueType()); + } + } } if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal && @@ -5509,6 +5556,62 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return Result; } +SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + // If the subtract doesn't overflow then just use abs(sub()) + bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS); + bool IsSigned = Op.getOpcode() == ISD::ABDS; + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS)) + return DAG.getNode(ISD::ABS, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, LHS, RHS)); + + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS)) + return DAG.getNode(ISD::ABS, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, RHS, LHS)); + + if (Subtarget->isThumb1Only()) { + assert(!IsSigned && "Signed ABS not supported on Thumb1"); + // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs + + // First subtraction: LHS - RHS + SDValue Sub1WithFlags = + DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS); + SDValue Sub1Result = Sub1WithFlags.getValue(0); + SDValue Flags1 = Sub1WithFlags.getValue(1); + + // sbcs r1,r1,r1 (mask from borrow) + SDValue Sbc1 = DAG.getNode(ARMISD::SUBE, DL, DAG.getVTList(VT, FlagsVT), + RHS, RHS, Flags1); + + // eors (XOR) + SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0)); + + // subs (final subtraction) + return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0)); + } + + // Generate SUBS and CSEL for absolute difference (like LowerABS) + // Compute a - b with flags + SDValue Cmp = + DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + + // Compute b - a (negative of a - b) + SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32), Cmp.getValue(0)); + + // For unsigned: use HS (a >= b) to select a-b, otherwise b-a + // For signed: use GE (a >= b) to select a-b, otherwise b-a + ARMCC::CondCodes CC = IsSigned ? ARMCC::LT : ARMCC::LO; + + // CSEL: if a > b, select a-b, otherwise b-a + return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Cmp.getValue(0), Neg, + DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1)); +} + /// canChangeToInt - Given the fp compare operand, return true if it is suitable /// to morph to an integer compare sequence. static bool canChangeToInt(SDValue Op, bool &SeenZero, @@ -10599,6 +10702,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::ABDS: + case ISD::ABDU: + return LowerABD(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8e417ac3e1a7b..3b5bf3a3d9db2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -871,6 +871,7 @@ class VectorType; SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/ARM/abds-neg.ll b/llvm/test/CodeGen/ARM/abds-neg.ll new file mode 100644 index 0000000000000..5715b1d4f71f4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/abds-neg.ll @@ -0,0 +1,1361 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(nabs(sub(sext(a),sext(b)))) -> nabds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: rsbs r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: blt .LBB4_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB4_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: blt .LBB6_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB6_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: blt .LBB7_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB7_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: blt .LBB8_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB8_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r0, r2 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: blt .LBB9_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB9_2: +; CHECK-THUMB-NEXT: blt .LBB9_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB9_4: +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r0, r2 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: blt .LBB10_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB10_2: +; CHECK-THUMB-NEXT: blt .LBB10_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB10_4: +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #60] +; CHECK-THUMB-NEXT: ldr r5, [sp, #56] +; CHECK-THUMB-NEXT: ldr r1, [sp, #52] +; CHECK-THUMB-NEXT: ldr r7, [sp, #48] +; CHECK-THUMB-NEXT: subs r2, r0, r7 +; CHECK-THUMB-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r2 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r0, r7, r0 +; CHECK-THUMB-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: sbcs r5, r6 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: blt .LBB11_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB11_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: .LBB11_4: +; CHECK-THUMB-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: .LBB11_6: +; CHECK-THUMB-NEXT: blt .LBB11_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB11_8: +; CHECK-THUMB-NEXT: movs r3, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: mov r2, r3 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #60] +; CHECK-THUMB-NEXT: ldr r5, [sp, #56] +; CHECK-THUMB-NEXT: ldr r1, [sp, #52] +; CHECK-THUMB-NEXT: ldr r7, [sp, #48] +; CHECK-THUMB-NEXT: subs r2, r0, r7 +; CHECK-THUMB-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r2 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r0, r7, r0 +; CHECK-THUMB-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: sbcs r5, r6 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: blt .LBB12_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB12_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: .LBB12_4: +; CHECK-THUMB-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: .LBB12_6: +; CHECK-THUMB-NEXT: blt .LBB12_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB12_8: +; CHECK-THUMB-NEXT: movs r3, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: mov r2, r3 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +; +; sub(smin(a,b),smax(a,b)) -> nabds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: cmp r0, r1 +; CHECK-THUMB-NEXT: mov r2, r0 +; CHECK-THUMB-NEXT: ble .LBB13_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB13_4 +; CHECK-THUMB-NEXT: .LBB13_2: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB13_3: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: blt .LBB13_2 +; CHECK-THUMB-NEXT: .LBB13_4: +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) + %sub = sub i8 %min, %max + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: cmp r0, r1 +; CHECK-THUMB-NEXT: mov r2, r0 +; CHECK-THUMB-NEXT: ble .LBB14_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB14_4 +; CHECK-THUMB-NEXT: .LBB14_2: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB14_3: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: blt .LBB14_2 +; CHECK-THUMB-NEXT: .LBB14_4: +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) + %sub = sub i16 %min, %max + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: cmp r0, r1 +; CHECK-THUMB-NEXT: mov r2, r0 +; CHECK-THUMB-NEXT: ble .LBB15_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB15_4 +; CHECK-THUMB-NEXT: .LBB15_2: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB15_3: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: blt .LBB15_2 +; CHECK-THUMB-NEXT: .LBB15_4: +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %sub = sub i32 %min, %max + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: mov lr, r2 +; CHECK-ARM-NEXT: sbcs r12, r3, r1 +; CHECK-ARM-NEXT: mov r12, r3 +; CHECK-ARM-NEXT: movlt lr, r0 +; CHECK-ARM-NEXT: movlt r12, r1 +; CHECK-ARM-NEXT: subs r4, r0, r2 +; CHECK-ARM-NEXT: sbcs r4, r1, r3 +; CHECK-ARM-NEXT: movlt r3, r1 +; CHECK-ARM-NEXT: movlt r2, r0 +; CHECK-ARM-NEXT: subs r0, r2, lr +; CHECK-ARM-NEXT: sbc r1, r3, r12 +; CHECK-ARM-NEXT: pop {r4, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: subs r4, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: blt .LBB16_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: blt .LBB16_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: subs r6, r0, r2 +; CHECK-THUMB-NEXT: mov r6, r1 +; CHECK-THUMB-NEXT: sbcs r6, r3 +; CHECK-THUMB-NEXT: blt .LBB16_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: .LBB16_6: +; CHECK-THUMB-NEXT: blt .LBB16_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: .LBB16_8: +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, pc} + %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) + %sub = sub i64 %min, %max + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #32] +; CHECK-ARM-NEXT: ldr r4, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r5, r0 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r6, r4, r1 +; CHECK-ARM-NEXT: mov r10, r4 +; CHECK-ARM-NEXT: mov r7, r5 +; CHECK-ARM-NEXT: sbcs r6, lr, r2 +; CHECK-ARM-NEXT: mov r8, r12 +; CHECK-ARM-NEXT: sbcs r6, r12, r3 +; CHECK-ARM-NEXT: mov r9, lr +; CHECK-ARM-NEXT: movlt r8, r3 +; CHECK-ARM-NEXT: movlt r9, r2 +; CHECK-ARM-NEXT: movlt r10, r1 +; CHECK-ARM-NEXT: movlt r7, r0 +; CHECK-ARM-NEXT: subs r6, r0, r5 +; CHECK-ARM-NEXT: sbcs r6, r1, r4 +; CHECK-ARM-NEXT: sbcs r6, r2, lr +; CHECK-ARM-NEXT: sbcs r6, r3, r12 +; CHECK-ARM-NEXT: movlt r12, r3 +; CHECK-ARM-NEXT: movlt lr, r2 +; CHECK-ARM-NEXT: movlt r4, r1 +; CHECK-ARM-NEXT: movlt r5, r0 +; CHECK-ARM-NEXT: subs r0, r5, r7 +; CHECK-ARM-NEXT: sbcs r1, r4, r10 +; CHECK-ARM-NEXT: sbcs r2, lr, r9 +; CHECK-ARM-NEXT: sbc r3, r12, r8 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #20 +; CHECK-THUMB-NEXT: sub sp, #20 +; CHECK-THUMB-NEXT: ldr r5, [sp, #52] +; CHECK-THUMB-NEXT: add r7, sp, #40 +; CHECK-THUMB-NEXT: ldm r7, {r4, r6, r7} +; CHECK-THUMB-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r4, r4, r0 +; CHECK-THUMB-NEXT: mov r4, r6 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r7 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: bge .LBB17_12 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: bge .LBB17_13 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: blt .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: blt .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r4, r0, r4 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r6 +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: sbcs r4, r7 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: bge .LBB17_14 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_15 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: bge .LBB17_16 +; CHECK-THUMB-NEXT: .LBB17_9: +; CHECK-THUMB-NEXT: blt .LBB17_11 +; CHECK-THUMB-NEXT: .LBB17_10: +; CHECK-THUMB-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_11: +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #20 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_12: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: blt .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_13: +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: bge .LBB17_3 +; CHECK-THUMB-NEXT: b .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_14: +; CHECK-THUMB-NEXT: mov r3, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_15: +; CHECK-THUMB-NEXT: mov r2, r7 +; CHECK-THUMB-NEXT: blt .LBB17_9 +; CHECK-THUMB-NEXT: .LBB17_16: +; CHECK-THUMB-NEXT: mov r1, r6 +; CHECK-THUMB-NEXT: bge .LBB17_10 +; CHECK-THUMB-NEXT: b .LBB17_11 + %min = call i128 @llvm.smin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.smax.i128(i128 %a, i128 %b) + %sub = sub i128 %min, %max + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r3, r0, #24 +; CHECK-ARM-NEXT: lsl r12, r1, #24 +; CHECK-ARM-NEXT: asr r3, r3, #24 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12, asr #24 +; CHECK-ARM-NEXT: suble r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r2, r1 +; CHECK-THUMB-NEXT: sxtb r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: ble .LBB18_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB18_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sle i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r3, r0, #16 +; CHECK-ARM-NEXT: lsl r12, r1, #16 +; CHECK-ARM-NEXT: asr r3, r3, #16 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12, asr #16 +; CHECK-ARM-NEXT: sublt r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r2, r1 +; CHECK-THUMB-NEXT: sxth r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: blt .LBB19_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB19_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: blt .LBB20_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB20_2: +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sge i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: sbc lr, r3, r1 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: movge r0, r12 +; CHECK-ARM-NEXT: movge r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: bge .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r5 +; CHECK-THUMB-NEXT: blt .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp slt i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r9, r0 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r8, r1 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, lr, r2 +; CHECK-ARM-NEXT: sbc r5, r12, r3 +; CHECK-ARM-NEXT: subs r0, r0, r9 +; CHECK-ARM-NEXT: sbcs r1, r1, r8 +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: movge r0, r6 +; CHECK-ARM-NEXT: movge r1, r7 +; CHECK-ARM-NEXT: movge r2, r4 +; CHECK-ARM-NEXT: movge r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: add r6, sp, #52 +; CHECK-THUMB-NEXT: ldm r6, {r0, r5, r6} +; CHECK-THUMB-NEXT: ldr r2, [sp, #48] +; CHECK-THUMB-NEXT: subs r7, r2, r1 +; CHECK-THUMB-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r0 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r6 +; CHECK-THUMB-NEXT: sbcs r7, r3 +; CHECK-THUMB-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r2, r1, r2 +; CHECK-THUMB-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r0 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: sbcs r3, r6 +; CHECK-THUMB-NEXT: bge .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: bge .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blt .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp slt i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; nabs(sub_nsw(x, y)) -> nabds(a,b) +; + +define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false) + %nabs = sub i8 0, %abs + ret i8 %nabs +} + +define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true) + %nabs = sub i8 0, %abs + ret i8 %nabs +} + +define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false) + %nabs = sub i16 0, %abs + ret i16 %nabs +} + +define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true) + %nabs = sub i16 0, %abs + ret i16 %nabs +} + +define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %nabs = sub i32 0, %abs + ret i32 %nabs +} + +define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbge r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) + %nabs = sub i32 0, %abs + ret i32 %nabs +} + +define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: mov r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + ret i64 %nabs +} + +define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: mov r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + ret i64 %nabs +} + +define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rscs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: rscs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rsc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r4, r0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + ret i128 %nabs +} + +define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rscs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: rscs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rsc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r4, r0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + ret i128 %nabs +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.smax.i8(i8, i8) +declare i16 @llvm.smax.i16(i16, i16) +declare i32 @llvm.smax.i32(i32, i32) +declare i64 @llvm.smax.i64(i64, i64) + +declare i8 @llvm.smin.i8(i8, i8) +declare i16 @llvm.smin.i16(i16, i16) +declare i32 @llvm.smin.i32(i32, i32) +declare i64 @llvm.smin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/abds.ll b/llvm/test/CodeGen/ARM/abds.ll new file mode 100644 index 0000000000000..fbf3c41c0a4ba --- /dev/null +++ b/llvm/test/CodeGen/ARM/abds.ll @@ -0,0 +1,1496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-THUMB + +; +; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: rsbs r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB4_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB4_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB6_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB6_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB7_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB7_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB8_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB8_2: +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB9_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB9_4 +; CHECK-THUMB-NEXT: .LBB9_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB9_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB9_2 +; CHECK-THUMB-NEXT: .LBB9_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB10_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB10_4 +; CHECK-THUMB-NEXT: .LBB10_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB10_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB10_2 +; CHECK-THUMB-NEXT: .LBB10_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB11_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB11_7 +; CHECK-THUMB-NEXT: .LBB11_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB11_8 +; CHECK-THUMB-NEXT: .LBB11_3: +; CHECK-THUMB-NEXT: blt .LBB11_5 +; CHECK-THUMB-NEXT: .LBB11_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB11_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB11_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_2 +; CHECK-THUMB-NEXT: .LBB11_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_3 +; CHECK-THUMB-NEXT: .LBB11_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB11_4 +; CHECK-THUMB-NEXT: b .LBB11_5 + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB12_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB12_7 +; CHECK-THUMB-NEXT: .LBB12_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB12_8 +; CHECK-THUMB-NEXT: .LBB12_3: +; CHECK-THUMB-NEXT: blt .LBB12_5 +; CHECK-THUMB-NEXT: .LBB12_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB12_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB12_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_2 +; CHECK-THUMB-NEXT: .LBB12_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_3 +; CHECK-THUMB-NEXT: .LBB12_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB12_4 +; CHECK-THUMB-NEXT: b .LBB12_5 + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +; +; sub(smax(a,b),smin(a,b)) -> abds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) + %sub = sub i8 %max, %min + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) + %sub = sub i16 %max, %min + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB15_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB15_2: +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %sub = sub i32 %max, %min + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB16_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB16_4 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB16_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB16_2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) + %sub = sub i64 %max, %min + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_7 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: blt .LBB17_5 +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB17_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_3 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB17_4 +; CHECK-THUMB-NEXT: b .LBB17_5 + %min = call i128 @llvm.smin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.smax.i128(i128 %a, i128 %b) + %sub = sub i128 %max, %min + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sgt i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sge i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB20_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB20_2: +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp sge i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blt .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp sge i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; abs(sub_nsw(x, y)) -> abds(a,b) +; + +define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: asrs r0, r0, #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false) + ret i8 %abs +} + +define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: asrs r0, r0, #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true) + ret i8 %abs +} + +define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: asrs r0, r0, #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false) + ret i16 %abs +} + +define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: asrs r0, r0, #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true) + ret i16 %abs +} + +define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + ret i32 %abs +} + +define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) + ret i32 %abs +} + +define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + ret i64 %abs +} + +define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + ret i64 %abs +} + +define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r0, r4 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + ret i128 %abs +} + +define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r0, r4 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + ret i128 %abs +} + +; +; negative tests +; + +define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_sub_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_sub_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + ret i32 %abs +} + +define i64 @vector_legalized(i16 %a, i16 %b) { +; CHECK-ARM-LABEL: vector_legalized: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: vector_legalized: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: bx lr + %ea = sext i16 %a to i32 + %eb = sext i16 %b to i32 + %s = sub i32 %ea, %eb + %ab = call i32 @llvm.abs.i32(i32 %s, i1 false) + %e = zext i32 %ab to i64 + %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer) + %z = add i64 %red, %e + ret i64 %z +} + +; +; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abds(a,b) +; + +define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i8 %a, %b + %ab = select i1 %cmp, i8 %a, i8 %b + %ba = select i1 %cmp, i8 %b, i8 %a + %sub = sub i8 %ba, %ab + ret i8 %sub +} + +define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sle i16 %a, %b + %ab = select i1 %cmp, i16 %a, i16 %b + %ba = select i1 %cmp, i16 %b, i16 %a + %sub = sub i16 %ba, %ab + ret i16 %sub +} + +define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bgt .LBB37_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: .LBB37_2: +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sgt i32 %a, %b + %ab = select i1 %cmp, i32 %a, i32 %b + %ba = select i1 %cmp, i32 %b, i32 %a + %sub = sub i32 %ab, %ba + ret i32 %sub +} + +define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB38_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB38_4 +; CHECK-THUMB-NEXT: .LBB38_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB38_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB38_2 +; CHECK-THUMB-NEXT: .LBB38_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp sge i64 %a, %b + %ab = select i1 %cmp, i64 %a, i64 %b + %ba = select i1 %cmp, i64 %b, i64 %a + %sub = sub i64 %ab, %ba + ret i64 %sub +} + +define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB39_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB39_7 +; CHECK-THUMB-NEXT: .LBB39_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB39_8 +; CHECK-THUMB-NEXT: .LBB39_3: +; CHECK-THUMB-NEXT: blt .LBB39_5 +; CHECK-THUMB-NEXT: .LBB39_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB39_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB39_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB39_2 +; CHECK-THUMB-NEXT: .LBB39_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB39_3 +; CHECK-THUMB-NEXT: .LBB39_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB39_4 +; CHECK-THUMB-NEXT: b .LBB39_5 + %cmp = icmp slt i128 %a, %b + %ab = select i1 %cmp, i128 %a, i128 %b + %ba = select i1 %cmp, i128 %b, i128 %a + %sub = sub i128 %ba, %ab + ret i128 %sub +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.smax.i8(i8, i8) +declare i16 @llvm.smax.i16(i16, i16) +declare i32 @llvm.smax.i32(i32, i32) +declare i64 @llvm.smax.i64(i64, i64) + +declare i8 @llvm.smin.i8(i8, i8) +declare i16 @llvm.smin.i16(i16, i16) +declare i32 @llvm.smin.i32(i32, i32) +declare i64 @llvm.smin.i64(i64, i64) diff --git a/llvm/test/CodeGen/ARM/abdu-neg.ll b/llvm/test/CodeGen/ARM/abdu-neg.ll new file mode 100644 index 0000000000000..01b97cd11216f --- /dev/null +++ b/llvm/test/CodeGen/ARM/abdu-neg.ll @@ -0,0 +1,996 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(nabs(sub(zext(a),zext(b)))) -> nabds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: adcs r2, r4 +; CHECK-THUMB-NEXT: movs r3, #1 +; CHECK-THUMB-NEXT: eors r3, r2 +; CHECK-THUMB-NEXT: rsbs r2, r3, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: adcs r2, r4 +; CHECK-THUMB-NEXT: movs r3, #1 +; CHECK-THUMB-NEXT: eors r3, r2 +; CHECK-THUMB-NEXT: rsbs r2, r3, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #32] +; CHECK-THUMB-NEXT: ldr r7, [sp, #28] +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r5, r0 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: sbcs r2, r6 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: adcs r5, r4 +; CHECK-THUMB-NEXT: movs r6, #1 +; CHECK-THUMB-NEXT: eors r6, r5 +; CHECK-THUMB-NEXT: rsbs r5, r6, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r0, r5 +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #32] +; CHECK-THUMB-NEXT: ldr r7, [sp, #28] +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r5, r0 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: sbcs r2, r6 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: adcs r5, r4 +; CHECK-THUMB-NEXT: movs r6, #1 +; CHECK-THUMB-NEXT: eors r6, r5 +; CHECK-THUMB-NEXT: rsbs r5, r6, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r0, r5 +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +; +; sub(umin(a,b),umax(a,b)) -> nabds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) + %sub = sub i8 %min, %max + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) + %sub = sub i16 %min, %max + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) + %sub = sub i32 %min, %max + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: mov lr, r2 +; CHECK-ARM-NEXT: sbcs r12, r3, r1 +; CHECK-ARM-NEXT: mov r12, r3 +; CHECK-ARM-NEXT: movlo lr, r0 +; CHECK-ARM-NEXT: movlo r12, r1 +; CHECK-ARM-NEXT: subs r4, r0, r2 +; CHECK-ARM-NEXT: sbcs r4, r1, r3 +; CHECK-ARM-NEXT: movlo r3, r1 +; CHECK-ARM-NEXT: movlo r2, r0 +; CHECK-ARM-NEXT: subs r0, r2, lr +; CHECK-ARM-NEXT: sbc r1, r3, r12 +; CHECK-ARM-NEXT: pop {r4, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: subs r4, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: blo .LBB16_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: blo .LBB16_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: subs r6, r0, r2 +; CHECK-THUMB-NEXT: mov r6, r1 +; CHECK-THUMB-NEXT: sbcs r6, r3 +; CHECK-THUMB-NEXT: blo .LBB16_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: .LBB16_6: +; CHECK-THUMB-NEXT: blo .LBB16_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: .LBB16_8: +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, pc} + %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) + %sub = sub i64 %min, %max + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #32] +; CHECK-ARM-NEXT: ldr r4, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r5, r0 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r6, r4, r1 +; CHECK-ARM-NEXT: mov r10, r4 +; CHECK-ARM-NEXT: mov r7, r5 +; CHECK-ARM-NEXT: sbcs r6, lr, r2 +; CHECK-ARM-NEXT: mov r8, r12 +; CHECK-ARM-NEXT: sbcs r6, r12, r3 +; CHECK-ARM-NEXT: mov r9, lr +; CHECK-ARM-NEXT: movlo r8, r3 +; CHECK-ARM-NEXT: movlo r9, r2 +; CHECK-ARM-NEXT: movlo r10, r1 +; CHECK-ARM-NEXT: movlo r7, r0 +; CHECK-ARM-NEXT: subs r6, r0, r5 +; CHECK-ARM-NEXT: sbcs r6, r1, r4 +; CHECK-ARM-NEXT: sbcs r6, r2, lr +; CHECK-ARM-NEXT: sbcs r6, r3, r12 +; CHECK-ARM-NEXT: movlo r12, r3 +; CHECK-ARM-NEXT: movlo lr, r2 +; CHECK-ARM-NEXT: movlo r4, r1 +; CHECK-ARM-NEXT: movlo r5, r0 +; CHECK-ARM-NEXT: subs r0, r5, r7 +; CHECK-ARM-NEXT: sbcs r1, r4, r10 +; CHECK-ARM-NEXT: sbcs r2, lr, r9 +; CHECK-ARM-NEXT: sbc r3, r12, r8 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #20 +; CHECK-THUMB-NEXT: sub sp, #20 +; CHECK-THUMB-NEXT: ldr r5, [sp, #52] +; CHECK-THUMB-NEXT: add r7, sp, #40 +; CHECK-THUMB-NEXT: ldm r7, {r4, r6, r7} +; CHECK-THUMB-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r4, r4, r0 +; CHECK-THUMB-NEXT: mov r4, r6 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r7 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: bhs .LBB17_12 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: bhs .LBB17_13 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: blo .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: blo .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r4, r0, r4 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r6 +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: sbcs r4, r7 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: bhs .LBB17_14 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: bhs .LBB17_15 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: bhs .LBB17_16 +; CHECK-THUMB-NEXT: .LBB17_9: +; CHECK-THUMB-NEXT: blo .LBB17_11 +; CHECK-THUMB-NEXT: .LBB17_10: +; CHECK-THUMB-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_11: +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #20 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_12: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: blo .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_13: +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: bhs .LBB17_3 +; CHECK-THUMB-NEXT: b .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_14: +; CHECK-THUMB-NEXT: mov r3, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_15: +; CHECK-THUMB-NEXT: mov r2, r7 +; CHECK-THUMB-NEXT: blo .LBB17_9 +; CHECK-THUMB-NEXT: .LBB17_16: +; CHECK-THUMB-NEXT: mov r1, r6 +; CHECK-THUMB-NEXT: bhs .LBB17_10 +; CHECK-THUMB-NEXT: b .LBB17_11 + %min = call i128 @llvm.umin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.umax.i128(i128 %a, i128 %b) + %sub = sub i128 %min, %max + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r12, r1, #255 +; CHECK-ARM-NEXT: and r3, r0, #255 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12 +; CHECK-ARM-NEXT: subls r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r2, r1 +; CHECK-THUMB-NEXT: uxtb r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: bls .LBB18_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB18_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ule i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: mov r12, #255 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: orr r12, r12, #65280 +; CHECK-ARM-NEXT: and lr, r1, r12 +; CHECK-ARM-NEXT: and r3, r0, r12 +; CHECK-ARM-NEXT: cmp r3, lr +; CHECK-ARM-NEXT: sublo r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r2, r1 +; CHECK-THUMB-NEXT: uxth r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: blo .LBB19_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB19_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp uge i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: sbc lr, r3, r1 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: movhs r0, r12 +; CHECK-ARM-NEXT: movhs r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: bhs .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bhs .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r5 +; CHECK-THUMB-NEXT: blo .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp ult i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r9, r0 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r8, r1 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, lr, r2 +; CHECK-ARM-NEXT: sbc r5, r12, r3 +; CHECK-ARM-NEXT: subs r0, r0, r9 +; CHECK-ARM-NEXT: sbcs r1, r1, r8 +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: movhs r0, r6 +; CHECK-ARM-NEXT: movhs r1, r7 +; CHECK-ARM-NEXT: movhs r2, r4 +; CHECK-ARM-NEXT: movhs r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: add r6, sp, #52 +; CHECK-THUMB-NEXT: ldm r6, {r0, r5, r6} +; CHECK-THUMB-NEXT: ldr r2, [sp, #48] +; CHECK-THUMB-NEXT: subs r7, r2, r1 +; CHECK-THUMB-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r0 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r6 +; CHECK-THUMB-NEXT: sbcs r7, r3 +; CHECK-THUMB-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r2, r1, r2 +; CHECK-THUMB-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r0 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: sbcs r3, r6 +; CHECK-THUMB-NEXT: bhs .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bhs .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: bhs .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blo .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: bhs .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp ult i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.umax.i8(i8, i8) +declare i16 @llvm.umax.i16(i16, i16) +declare i32 @llvm.umax.i32(i32, i32) +declare i64 @llvm.umax.i64(i64, i64) + +declare i8 @llvm.umin.i8(i8, i8) +declare i16 @llvm.umin.i16(i16, i16) +declare i32 @llvm.umin.i32(i32, i32) +declare i64 @llvm.umin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/abdu.ll b/llvm/test/CodeGen/ARM/abdu.ll new file mode 100644 index 0000000000000..6b727ca5017f7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/abdu.ll @@ -0,0 +1,1048 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +; +; sub(umax(a,b),umin(a,b)) -> abdu(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) + %sub = sub i8 %max, %min + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) + %sub = sub i16 %max, %min + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) + %sub = sub i32 %max, %min + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) + %sub = sub i64 %max, %min + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %min = call i128 @llvm.umin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.umax.i128(i128 %a, i128 %b) + %sub = sub i128 %max, %min + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ugt i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp uge i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %cmp = icmp uge i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %cmp = icmp uge i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; negative tests +; + +define i64 @vector_legalized(i16 %a, i16 %b) { +; CHECK-ARM-LABEL: vector_legalized: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: vector_legalized: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: bx lr + %ea = zext i16 %a to i32 + %eb = zext i16 %b to i32 + %s = sub i32 %ea, %eb + %ab = call i32 @llvm.abs.i32(i32 %s, i1 false) + %e = zext i32 %ab to i64 + %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer) + %z = add i64 %red, %e + ret i64 %z +} + +; +; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abdu(a,b) +; + +define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i8 %a, %b + %ab = select i1 %cmp, i8 %a, i8 %b + %ba = select i1 %cmp, i8 %b, i8 %a + %sub = sub i8 %ba, %ab + ret i8 %sub +} + +define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ule i16 %a, %b + %ab = select i1 %cmp, i16 %a, i16 %b + %ba = select i1 %cmp, i16 %b, i16 %a + %sub = sub i16 %ba, %ab + ret i16 %sub +} + +define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ugt i32 %a, %b + %ab = select i1 %cmp, i32 %a, i32 %b + %ba = select i1 %cmp, i32 %b, i32 %a + %sub = sub i32 %ab, %ba + ret i32 %sub +} + +define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %cmp = icmp uge i64 %a, %b + %ab = select i1 %cmp, i64 %a, i64 %b + %ba = select i1 %cmp, i64 %b, i64 %a + %sub = sub i64 %ab, %ba + ret i64 %sub +} + +define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %cmp = icmp ult i128 %a, %b + %ab = select i1 %cmp, i128 %a, i128 %b + %ba = select i1 %cmp, i128 %b, i128 %a + %sub = sub i128 %ba, %ab + ret i128 %sub +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.umax.i8(i8, i8) +declare i16 @llvm.umax.i16(i16, i16) +declare i32 @llvm.umax.i32(i32, i32) +declare i64 @llvm.umax.i64(i64, i64) + +declare i8 @llvm.umin.i8(i8, i8) +declare i16 @llvm.umin.i16(i16, i16) +declare i32 @llvm.umin.i32(i32, i32) +declare i64 @llvm.umin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll b/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll index c66ab00566ddf..08ff7192b6ad3 100644 --- a/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll +++ b/llvm/test/CodeGen/ARM/cmse-harden-entry-arguments.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc %s -mtriple=thumbv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-LE ; RUN: llc %s -mtriple=thumbebv8m.main -o - | FileCheck %s --check-prefixes V8M-COMMON,V8M-BE ; RUN: llc %s -mtriple=thumbv8.1m.main -o - | FileCheck %s --check-prefixes V81M-COMMON,V81M-LE @@ -234,28 +235,47 @@ entry: } define i32 @access_i33(i33 %arg) "cmse_nonsecure_entry" { -; V8M-COMMON-LABEL: access_i33: -; V8M-COMMON: @ %bb.0: @ %entry -; V8M-LE-NEXT: and r0, r1, #1 -; V8M-BE-NEXT: and r0, r0, #1 -; V8M-COMMON-NEXT: mov r1, lr -; V8M-COMMON-NEXT: rsbs r0, r0, #0 -; V8M-COMMON-NEXT: mov r2, lr -; V8M-COMMON-NEXT: mov r3, lr -; V8M-COMMON-NEXT: mov r12, lr -; V8M-COMMON-NEXT: msr apsr_nzcvq, lr -; V8M-COMMON-NEXT: bxns lr +; V8M-LE-LABEL: access_i33: +; V8M-LE: @ %bb.0: @ %entry +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-LE-NEXT: mov r1, lr +; V8M-LE-NEXT: rsbs r0, r0, #0 +; V8M-LE-NEXT: mov r2, lr +; V8M-LE-NEXT: mov r3, lr +; V8M-LE-NEXT: mov r12, lr +; V8M-LE-NEXT: msr apsr_nzcvq, lr +; V8M-LE-NEXT: bxns lr ; -; V81M-COMMON-LABEL: access_i33: -; V81M-COMMON: @ %bb.0: @ %entry -; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! -; V81M-LE-NEXT: and r0, r1, #1 -; V81M-BE-NEXT: and r0, r0, #1 -; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} -; V81M-COMMON-NEXT: rsbs r0, r0, #0 -; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 -; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} -; V81M-COMMON-NEXT: bxns lr +; V8M-BE-LABEL: access_i33: +; V8M-BE: @ %bb.0: @ %entry +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-BE-NEXT: mov r1, lr +; V8M-BE-NEXT: rsbs r0, r0, #0 +; V8M-BE-NEXT: mov r2, lr +; V8M-BE-NEXT: mov r3, lr +; V8M-BE-NEXT: mov r12, lr +; V8M-BE-NEXT: msr apsr_nzcvq, lr +; V8M-BE-NEXT: bxns lr +; +; V81M-LE-LABEL: access_i33: +; V81M-LE: @ %bb.0: @ %entry +; V81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-LE-NEXT: rsbs r0, r0, #0 +; V81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-LE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-LE-NEXT: bxns lr +; +; V81M-BE-LABEL: access_i33: +; V81M-BE: @ %bb.0: @ %entry +; V81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-BE-NEXT: rsbs r0, r0, #0 +; V81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-BE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-BE-NEXT: bxns lr entry: %shr = ashr i33 %arg, 32 %conv = trunc nsw i33 %shr to i32 @@ -263,26 +283,43 @@ entry: } define i32 @access_u33(i33 %arg) "cmse_nonsecure_entry" { -; V8M-COMMON-LABEL: access_u33: -; V8M-COMMON: @ %bb.0: @ %entry -; V8M-LE-NEXT: and r0, r1, #1 -; V8M-BE-NEXT: and r0, r0, #1 -; V8M-COMMON-NEXT: mov r1, lr -; V8M-COMMON-NEXT: mov r2, lr -; V8M-COMMON-NEXT: mov r3, lr -; V8M-COMMON-NEXT: mov r12, lr -; V8M-COMMON-NEXT: msr apsr_nzcvq, lr -; V8M-COMMON-NEXT: bxns lr +; V8M-LE-LABEL: access_u33: +; V8M-LE: @ %bb.0: @ %entry +; V8M-LE-NEXT: and r0, r1, #1 +; V8M-LE-NEXT: mov r1, lr +; V8M-LE-NEXT: mov r2, lr +; V8M-LE-NEXT: mov r3, lr +; V8M-LE-NEXT: mov r12, lr +; V8M-LE-NEXT: msr apsr_nzcvq, lr +; V8M-LE-NEXT: bxns lr ; -; V81M-COMMON-LABEL: access_u33: -; V81M-COMMON: @ %bb.0: @ %entry -; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! -; V81M-LE-NEXT: and r0, r1, #1 -; V81M-BE-NEXT: and r0, r0, #1 -; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} -; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 -; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} -; V81M-COMMON-NEXT: bxns lr +; V8M-BE-LABEL: access_u33: +; V8M-BE: @ %bb.0: @ %entry +; V8M-BE-NEXT: and r0, r0, #1 +; V8M-BE-NEXT: mov r1, lr +; V8M-BE-NEXT: mov r2, lr +; V8M-BE-NEXT: mov r3, lr +; V8M-BE-NEXT: mov r12, lr +; V8M-BE-NEXT: msr apsr_nzcvq, lr +; V8M-BE-NEXT: bxns lr +; +; V81M-LE-LABEL: access_u33: +; V81M-LE: @ %bb.0: @ %entry +; V81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: and r0, r1, #1 +; V81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-LE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-LE-NEXT: bxns lr +; +; V81M-BE-LABEL: access_u33: +; V81M-BE: @ %bb.0: @ %entry +; V81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-BE-NEXT: and r0, r0, #1 +; V81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-BE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-BE-NEXT: bxns lr entry: %shr = lshr i33 %arg, 32 %conv = trunc nuw nsw i33 %shr to i32 @@ -290,40 +327,65 @@ entry: } define i32 @access_i65(ptr byval(i65) %0) "cmse_nonsecure_entry" { -; V8M-COMMON-LABEL: access_i65: -; V8M-COMMON: @ %bb.0: @ %entry -; V8M-COMMON-NEXT: sub sp, #16 -; V8M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} -; V8M-LE-NEXT: ldrb.w r0, [sp, #8] -; V8M-LE-NEXT: and r0, r0, #1 -; V8M-LE-NEXT: rsbs r0, r0, #0 -; V8M-BE-NEXT: movs r1, #0 -; V8M-BE-NEXT: sub.w r0, r1, r0, lsr #24 -; V8M-COMMON-NEXT: add sp, #16 -; V8M-COMMON-NEXT: mov r1, lr -; V8M-COMMON-NEXT: mov r2, lr -; V8M-COMMON-NEXT: mov r3, lr -; V8M-COMMON-NEXT: mov r12, lr -; V8M-COMMON-NEXT: msr apsr_nzcvq, lr -; V8M-COMMON-NEXT: bxns lr +; V8M-LE-LABEL: access_i65: +; V8M-LE: @ %bb.0: @ %entry +; V8M-LE-NEXT: sub sp, #16 +; V8M-LE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-LE-NEXT: ldrb.w r0, [sp, #8] +; V8M-LE-NEXT: and r0, r0, #1 +; V8M-LE-NEXT: rsbs r0, r0, #0 +; V8M-LE-NEXT: add sp, #16 +; V8M-LE-NEXT: mov r1, lr +; V8M-LE-NEXT: mov r2, lr +; V8M-LE-NEXT: mov r3, lr +; V8M-LE-NEXT: mov r12, lr +; V8M-LE-NEXT: msr apsr_nzcvq, lr +; V8M-LE-NEXT: bxns lr ; -; V81M-COMMON-LABEL: access_i65: -; V81M-COMMON: @ %bb.0: @ %entry -; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! -; V81M-COMMON-NEXT: sub sp, #16 -; V81M-COMMON-NEXT: add sp, #4 -; V81M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} -; V81M-LE-NEXT: ldrb.w r0, [sp, #8] -; V81M-LE-NEXT: and r0, r0, #1 -; V81M-LE-NEXT: rsbs r0, r0, #0 -; V81M-BE-NEXT: movs r1, #0 -; V81M-BE-NEXT: sub.w r0, r1, r0, lsr #24 -; V81M-COMMON-NEXT: sub sp, #4 -; V81M-COMMON-NEXT: add sp, #16 -; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} -; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 -; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} -; V81M-COMMON-NEXT: bxns lr +; V8M-BE-LABEL: access_i65: +; V8M-BE: @ %bb.0: @ %entry +; V8M-BE-NEXT: sub sp, #16 +; V8M-BE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-BE-NEXT: lsrs r0, r0, #24 +; V8M-BE-NEXT: rsbs r0, r0, #0 +; V8M-BE-NEXT: add sp, #16 +; V8M-BE-NEXT: mov r1, lr +; V8M-BE-NEXT: mov r2, lr +; V8M-BE-NEXT: mov r3, lr +; V8M-BE-NEXT: mov r12, lr +; V8M-BE-NEXT: msr apsr_nzcvq, lr +; V8M-BE-NEXT: bxns lr +; +; V81M-LE-LABEL: access_i65: +; V81M-LE: @ %bb.0: @ %entry +; V81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: sub sp, #16 +; V81M-LE-NEXT: add sp, #4 +; V81M-LE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-LE-NEXT: ldrb.w r0, [sp, #8] +; V81M-LE-NEXT: and r0, r0, #1 +; V81M-LE-NEXT: rsbs r0, r0, #0 +; V81M-LE-NEXT: sub sp, #4 +; V81M-LE-NEXT: add sp, #16 +; V81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-LE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-LE-NEXT: bxns lr +; +; V81M-BE-LABEL: access_i65: +; V81M-BE: @ %bb.0: @ %entry +; V81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-BE-NEXT: sub sp, #16 +; V81M-BE-NEXT: add sp, #4 +; V81M-BE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-BE-NEXT: lsrs r0, r0, #24 +; V81M-BE-NEXT: rsbs r0, r0, #0 +; V81M-BE-NEXT: sub sp, #4 +; V81M-BE-NEXT: add sp, #16 +; V81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-BE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-BE-NEXT: bxns lr entry: %arg = load i65, ptr %0, align 8 %shr = ashr i65 %arg, 64 @@ -332,34 +394,59 @@ entry: } define i32 @access_u65(ptr byval(i65) %0) "cmse_nonsecure_entry" { -; V8M-COMMON-LABEL: access_u65: -; V8M-COMMON: @ %bb.0: @ %entry -; V8M-COMMON-NEXT: sub sp, #16 -; V8M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} -; V8M-LE-NEXT: ldrb.w r0, [sp, #8] -; V8M-BE-NEXT: lsrs r0, r0, #24 -; V8M-COMMON-NEXT: add sp, #16 -; V8M-COMMON-NEXT: mov r1, lr -; V8M-COMMON-NEXT: mov r2, lr -; V8M-COMMON-NEXT: mov r3, lr -; V8M-COMMON-NEXT: mov r12, lr -; V8M-COMMON-NEXT: msr apsr_nzcvq, lr -; V8M-COMMON-NEXT: bxns lr +; V8M-LE-LABEL: access_u65: +; V8M-LE: @ %bb.0: @ %entry +; V8M-LE-NEXT: sub sp, #16 +; V8M-LE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-LE-NEXT: ldrb.w r0, [sp, #8] +; V8M-LE-NEXT: add sp, #16 +; V8M-LE-NEXT: mov r1, lr +; V8M-LE-NEXT: mov r2, lr +; V8M-LE-NEXT: mov r3, lr +; V8M-LE-NEXT: mov r12, lr +; V8M-LE-NEXT: msr apsr_nzcvq, lr +; V8M-LE-NEXT: bxns lr ; -; V81M-COMMON-LABEL: access_u65: -; V81M-COMMON: @ %bb.0: @ %entry -; V81M-COMMON-NEXT: vstr fpcxtns, [sp, #-4]! -; V81M-COMMON-NEXT: sub sp, #16 -; V81M-COMMON-NEXT: add sp, #4 -; V81M-COMMON-NEXT: stm.w sp, {r0, r1, r2, r3} -; V81M-LE-NEXT: ldrb.w r0, [sp, #8] -; V81M-BE-NEXT: lsrs r0, r0, #24 -; V81M-COMMON-NEXT: sub sp, #4 -; V81M-COMMON-NEXT: add sp, #16 -; V81M-COMMON-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} -; V81M-COMMON-NEXT: vldr fpcxtns, [sp], #4 -; V81M-COMMON-NEXT: clrm {r1, r2, r3, r12, apsr} -; V81M-COMMON-NEXT: bxns lr +; V8M-BE-LABEL: access_u65: +; V8M-BE: @ %bb.0: @ %entry +; V8M-BE-NEXT: sub sp, #16 +; V8M-BE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V8M-BE-NEXT: lsrs r0, r0, #24 +; V8M-BE-NEXT: add sp, #16 +; V8M-BE-NEXT: mov r1, lr +; V8M-BE-NEXT: mov r2, lr +; V8M-BE-NEXT: mov r3, lr +; V8M-BE-NEXT: mov r12, lr +; V8M-BE-NEXT: msr apsr_nzcvq, lr +; V8M-BE-NEXT: bxns lr +; +; V81M-LE-LABEL: access_u65: +; V81M-LE: @ %bb.0: @ %entry +; V81M-LE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-LE-NEXT: sub sp, #16 +; V81M-LE-NEXT: add sp, #4 +; V81M-LE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-LE-NEXT: ldrb.w r0, [sp, #8] +; V81M-LE-NEXT: sub sp, #4 +; V81M-LE-NEXT: add sp, #16 +; V81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-LE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-LE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-LE-NEXT: bxns lr +; +; V81M-BE-LABEL: access_u65: +; V81M-BE: @ %bb.0: @ %entry +; V81M-BE-NEXT: vstr fpcxtns, [sp, #-4]! +; V81M-BE-NEXT: sub sp, #16 +; V81M-BE-NEXT: add sp, #4 +; V81M-BE-NEXT: stm.w sp, {r0, r1, r2, r3} +; V81M-BE-NEXT: lsrs r0, r0, #24 +; V81M-BE-NEXT: sub sp, #4 +; V81M-BE-NEXT: add sp, #16 +; V81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; V81M-BE-NEXT: vldr fpcxtns, [sp], #4 +; V81M-BE-NEXT: clrm {r1, r2, r3, r12, apsr} +; V81M-BE-NEXT: bxns lr entry: %arg = load i65, ptr %0, align 8 %shr = lshr i65 %arg, 64 diff --git a/llvm/test/CodeGen/ARM/iabs.ll b/llvm/test/CodeGen/ARM/iabs.ll index 758fe7507c0b2..22a86a1563ee2 100644 --- a/llvm/test/CodeGen/ARM/iabs.ll +++ b/llvm/test/CodeGen/ARM/iabs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB ;; Integer absolute value, should produce something as good as: ARM: ;; movs r0, r0 @@ -7,11 +8,18 @@ ;; bx lr define i32 @test(i32 %a) { -; CHECK-LABEL: test: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: cmp r0, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr %tmp1neg = sub i32 0, %a %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg @@ -24,11 +32,19 @@ define i32 @test(i32 %a) { ;; rsbmi ;; bx define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp { -; CHECK-LABEL: test2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, r0, r1 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test2: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblt r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test2: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr entry: %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, -1 @@ -38,13 +54,22 @@ entry: } define i64 @test3(i64 %a) { -; CHECK-LABEL: test3: -; CHECK: @ %bb.0: -; CHECK-NEXT: eor r0, r0, r1, asr #31 -; CHECK-NEXT: eor r2, r1, r1, asr #31 -; CHECK-NEXT: subs r0, r0, r1, asr #31 -; CHECK-NEXT: sbc r1, r2, r1, asr #31 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test3: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test3: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr %tmp1neg = sub i64 0, %a %b = icmp sgt i64 %a, -1 %abs = select i1 %b, i64 %a, i64 %tmp1neg @@ -54,24 +79,43 @@ define i64 @test3(i64 %a) { declare void @callee(...) define void @testcallframe(i32 %a) { -; CHECK-LABEL: testcallframe: -; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, sp, #8 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: str r1, [sp] -; CHECK-NEXT: bl callee -; CHECK-NEXT: add sp, sp, #8 -; CHECK-NEXT: pop {r11, lr} -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: testcallframe: +; CHECK-ARM: @ %bb.0: @ %bb +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: .pad #8 +; CHECK-ARM-NEXT: sub sp, sp, #8 +; CHECK-ARM-NEXT: cmp r0, #0 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: mov r3, #0 +; CHECK-ARM-NEXT: str r1, [sp] +; CHECK-ARM-NEXT: bl callee +; CHECK-ARM-NEXT: add sp, sp, #8 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: testcallframe: +; CHECK-THUMB: @ %bb.0: @ %bb +; CHECK-THUMB-NEXT: .save {r7, lr} +; CHECK-THUMB-NEXT: push {r7, lr} +; CHECK-THUMB-NEXT: .pad #8 +; CHECK-THUMB-NEXT: sub sp, #8 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: str r1, [sp] +; CHECK-THUMB-NEXT: asrs r2, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: bl callee +; CHECK-THUMB-NEXT: add sp, #8 +; CHECK-THUMB-NEXT: pop {r7, pc} bb: %i = tail call i32 @llvm.abs.i32(i32 %a, i1 false) tail call void @callee(i32 %i, i32 0, i32 0, i32 0, i32 0) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll index eb076776ee743..660d233e66926 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -299,10 +299,10 @@ define hidden void @testNeon(ptr %ref_data, i32 %ref_stride, i32 %limit, ptr noc ; A9-NEXT: cmp r2, #1 ; A9-NEXT: blt .LBB4_4 ; A9-NEXT: @ %bb.1: @ %.lr.ph -; A9-NEXT: movs r5, #0 ; A9-NEXT: movw r4, #64464 -; A9-NEXT: sub.w r12, r5, r2, lsl #6 -; A9-NEXT: sub.w lr, r1, r1, lsl #4 +; A9-NEXT: lsls r5, r2, #6 +; A9-NEXT: sub.w r12, r1, r1, lsl #4 +; A9-NEXT: rsb.w lr, r5, #0 ; A9-NEXT: movt r4, #65535 ; A9-NEXT: mov r5, r3 ; A9-NEXT: .LBB4_2: @ =>This Inner Loop Header: Depth=1 @@ -319,13 +319,13 @@ define hidden void @testNeon(ptr %ref_data, i32 %ref_stride, i32 %limit, ptr noc ; A9-NEXT: vst1.8 {d22, d23}, [r5]! ; A9-NEXT: vld1.64 {d20}, [r0], r1 ; A9-NEXT: vadd.i8 q9, q9, q11 -; A9-NEXT: vld1.64 {d21}, [r0], lr +; A9-NEXT: vld1.64 {d21}, [r0], r12 ; A9-NEXT: vadd.i8 q9, q9, q10 ; A9-NEXT: vadd.i8 q8, q8, q9 ; A9-NEXT: vst1.8 {d20, d21}, [r5], r4 ; A9-NEXT: bne .LBB4_2 ; A9-NEXT: @ %bb.3: @ %._crit_edge -; A9-NEXT: add.w r3, r3, r12, lsl #4 +; A9-NEXT: add.w r3, r3, lr, lsl #4 ; A9-NEXT: .LBB4_4: ; A9-NEXT: vst1.32 {d16, d17}, [r3] ; A9-NEXT: pop {r4, r5, r7, pc}