diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c80bac02f41af..6fe48616312a2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::ABS, MVT::i64, Custom); } + setOperationAction(ISD::ABDS, MVT::i32, Custom); + setOperationAction(ISD::ABDS, MVT::i64, Custom); + setOperationAction(ISD::ABDU, MVT::i32, Custom); + setOperationAction(ISD::ABDU, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); for (MVT VT : MVT::fixedlen_vector_valuetypes()) { @@ -7200,6 +7205,40 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1)); } +// Generate SUBS and CNEG for absolute difference. +SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + + if (VT.isVector()) { + if (Op.getOpcode() == ISD::ABDS) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED); + else + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); + } + + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + // Generate SUBS and CSEL for absolute difference (like LowerABS) + // Compute a - b with flags + SDValue Cmp = + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS); + + // Compute b - a (negative of a - b) + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + Cmp.getValue(0)); + + // For unsigned: use HS (a >= b) to select a-b, otherwise b-a + // For signed: use GE (a >= b) to select a-b, otherwise b-a + AArch64CC::CondCode CC = + (Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS; + + // CSEL: if a > b, select a-b, otherwise b-a + return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg, + getCondCode(DAG, CC), Cmp.getValue(1)); +} + static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); @@ -7649,9 +7688,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::ABS: return LowerABS(Op, DAG); case ISD::ABDS: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED); case ISD::ABDU: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); + return LowerABD(Op, DAG); case ISD::AVGFLOORS: return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED); case ISD::AVGFLOORU: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6c6ae782f779f..20405653bb5fa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 4a158ef5bcae0..70ece55b65383 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, if (!Subtarget->hasV8_1MMainlineOps()) setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::ABS, MVT::i32, Custom); + setOperationAction(ISD::ABDS, MVT::i32, Custom); + setOperationAction(ISD::ABDU, MVT::i32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -5621,6 +5625,79 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// Generate SUBS and CSEL for integer abs. +SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + // Thumb1-only sequence: + // asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1 + if (Subtarget->isThumb1Only()) { + SDValue X = Op.getOperand(0); + SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32); + SDValue S = DAG.getNode(ISD::SRA, DL, MVT::i32, X, ShiftAmt); + SDValue T = DAG.getNode(ISD::XOR, DL, MVT::i32, X, S); + return DAG.getNode(ISD::SUB, DL, MVT::i32, T, S); + } + SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32), Op.getOperand(0)); + // Generate SUBS & CMOV. + SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0), + DAG.getConstant(0, DL, MVT::i32)); + return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg, + DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp); +} + +// Generate SUBS and CNEG for absolute difference. +SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + // Thumb1-only custom sequences for i32 + if (Subtarget->isThumb1Only()) { + if (Op.getOpcode() == ISD::ABDS) { + // subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1 + SDValue D = DAG.getNode(ISD::SUB, DL, MVT::i32, LHS, RHS); + SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32); + SDValue S = DAG.getNode(ISD::SRA, DL, MVT::i32, D, ShiftAmt); + SDValue T = DAG.getNode(ISD::XOR, DL, MVT::i32, D, S); + return DAG.getNode(ISD::SUB, DL, MVT::i32, T, S); + } else { + // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs + // First subtraction: LHS - RHS + SDValue Sub1WithFlags = DAG.getNode( + ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + SDValue Sub1Result = Sub1WithFlags.getValue(0); + SDValue Flags1 = Sub1WithFlags.getValue(1); + + SDValue Sbc1 = DAG.getNode( + ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1); + + SDValue Xor = + DAG.getNode(ISD::XOR, DL, MVT::i32, Sub1Result, Sbc1.getValue(0)); + + return DAG.getNode(ISD::SUB, DL, MVT::i32, Xor, Sbc1.getValue(0)); + } + } + + // Generate SUBS and CMOV for absolute difference (like LowerABS) + // Compute a - b with flags + SDValue Cmp = + DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + + // Compute b - a (negative of a - b) + SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32), Cmp.getValue(0)); + + // For unsigned: use LO (a < b) to select -(a-b), which is the same as b-a in + // twos complement, otherwise a-b For signed: use MI (a - b < 0) to select + // -(a-b), otherwise a-b + ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO; + + // CMOV: if a > b, select a-b, otherwise negare + return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Cmp.getValue(0), Neg, + DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1)); +} + SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); @@ -10670,6 +10747,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSTORE(Op, DAG, Subtarget); case ISD::MLOAD: return LowerMLOAD(Op, DAG); + case ISD::ABS: + return LowerABS(Op, DAG); + case ISD::ABDS: + case ISD::ABDU: + return LowerABD(Op, DAG); case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: @@ -14087,6 +14169,48 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) { CSINC.getOperand(3)); } +static bool isNegatedInteger(SDValue Op) { + return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)); +} + +static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, Zero, Op); +} + +// Try to fold +// +// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y)) +// +// The folding helps cmov to be matched with csneg without generating +// redundant neg instruction. +static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) { + if (!isNegatedInteger(SDValue(N, 0))) + return SDValue(); + + SDValue CSel = N->getOperand(1); + if (CSel.getOpcode() != ARMISD::CMOV || !CSel->hasOneUse()) + return SDValue(); + + SDValue N0 = CSel.getOperand(0); + SDValue N1 = CSel.getOperand(1); + + // If both of them is not negations, it's not worth the folding as it + // introduces two additional negations while reducing one negation. + if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) + return SDValue(); + + SDValue N0N = getNegatedInteger(N0, DAG); + SDValue N1N = getNegatedInteger(N1, DAG); + + SDLoc DL(N); + EVT VT = CSel.getValueType(); + return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CSel.getOperand(2), + CSel.getOperand(3)); +} + /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. /// static SDValue PerformSUBCombine(SDNode *N, @@ -14103,6 +14227,9 @@ static SDValue PerformSUBCombine(SDNode *N, if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG)) return R; + if (SDValue Val = performNegCMovCombine(N, DCI.DAG)) + return Val; + if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector()) return SDValue(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 0185c8ddd4928..878d1ade096dc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -888,6 +888,9 @@ class VectorType; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const; + + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const; void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed, SmallVectorImpl &Results) const; SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll index 254c191569f8b..42563e8e34a94 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll @@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1) define i32 @abs(i32 %arg) { ; MVE-LABEL: 'abs' -; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 02c76ba7343a0..7f879413cf449 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w1, w8 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w8, w1 +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w8, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1, sxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, pl ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, pl ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -226,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lt -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) @@ -242,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lt -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) @@ -258,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, lt -; CHECK-NEXT: csel w9, w0, w1, gt -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -272,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, lt -; CHECK-NEXT: csel x9, x0, x1, gt -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, pl ; CHECK-NEXT: ret %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) @@ -343,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %cmp = icmp sge i32 %a, %b %ab = sub i32 %a, %b @@ -356,7 +345,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: cneg x0, x8, pl ; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index bf52e71ec21fe..e461a747243a4 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 ; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: subs w8, w0, w1, sxth +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, mi ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, mi ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -236,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -248,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, mi ; CHECK-NEXT: ret %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) @@ -308,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b @@ -321,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, mi ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = sub i64 %a, %b @@ -551,7 +550,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -564,7 +563,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, mi ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 400031b64cb84..cc24bdc9e5c2d 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i8 %b to i64 @@ -26,7 +26,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i16 %b to i64 @@ -42,7 +42,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i8 %b to i64 @@ -58,7 +58,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i16 %b to i64 @@ -73,7 +73,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w1, w8 +; CHECK-NEXT: subs w8, w8, w1 ; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -90,7 +90,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i16 %b to i64 @@ -104,7 +104,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: subs w8, w0, w1 ; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 @@ -119,8 +119,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: subs w8, w0, w1, uxth ; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 @@ -135,7 +134,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs w8, w1, w0 +; CHECK-NEXT: subs w8, w0, w1 ; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 @@ -150,7 +149,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: subs x8, x0, x1 ; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 @@ -165,7 +164,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: subs x8, x0, x1 ; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 @@ -230,12 +229,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lo -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) @@ -246,12 +242,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lo -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) @@ -262,10 +255,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, lo -; CHECK-NEXT: csel w9, w0, w1, hi -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -276,10 +267,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, lo -; CHECK-NEXT: csel x9, x0, x1, hi -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 8d2b0b0742d7d..075a03a55d147 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i8 %b to i64 @@ -25,7 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i16 %b to i64 @@ -40,7 +40,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i8 %a to i64 %bext = zext i8 %b to i64 @@ -55,7 +55,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i16 %b to i64 @@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -85,7 +85,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i16 %b to i64 @@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: subs w8, w0, w8 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: subs w8, w0, w1, uxth +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -215,7 +214,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) @@ -228,7 +227,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) @@ -240,7 +239,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -252,7 +251,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) @@ -287,7 +286,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b %ab = sub i8 %a, %b @@ -301,7 +300,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b %ab = sub i16 %a, %b @@ -314,7 +313,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ult i32 %a, %b %ab = sub i32 %a, %b @@ -327,7 +326,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = sub i64 %a, %b @@ -365,7 +364,7 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: cneg w8, w8, lo ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: add x0, x9, x8 @@ -389,7 +388,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b %ab = select i1 %cmp, i8 %a, i8 %b @@ -403,7 +402,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b %ab = select i1 %cmp, i16 %a, i16 %b @@ -416,7 +415,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ugt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -429,7 +428,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 79bba5363188b..d014362dc1b84 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -15,7 +15,7 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -36,7 +36,7 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w9, w9, ls +; CHECK-NEXT: cneg w9, w9, lo ; CHECK-NEXT: cneg w8, w8, ls ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -60,7 +60,7 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w10, w9, w1 -; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cneg w10, w10, mi ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 @@ -83,7 +83,7 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w9, w0, w9 -; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -107,7 +107,7 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w10, w9, w10 -; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cneg w10, w10, mi ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 @@ -136,7 +136,7 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: cneg x9, x9, mi ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -157,7 +157,7 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cneg x9, x9, ls +; CHECK-NEXT: cneg x9, x9, lo ; CHECK-NEXT: cneg x8, x8, ls ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -181,7 +181,7 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x10, x9, x1 -; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: cneg x10, x10, mi ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 @@ -204,7 +204,7 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x9, x0, x9 -; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: cneg x9, x9, mi ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -228,7 +228,7 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x10, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x10, x9, x10 -; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: cneg x10, x10, mi ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 @@ -281,7 +281,7 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w9, w9, w1, uxth ; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: cneg w9, w9, lo ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -405,7 +405,7 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w9, w9, w1, uxtb ; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: cneg w9, w9, lo ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/ARM/abds-neg.ll b/llvm/test/CodeGen/ARM/abds-neg.ll new file mode 100644 index 0000000000000..18ddc6780c965 --- /dev/null +++ b/llvm/test/CodeGen/ARM/abds-neg.ll @@ -0,0 +1,1326 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(nabs(sub(sext(a),sext(b)))) -> nabds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: rsbs r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r0, r2 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: blt .LBB9_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB9_2: +; CHECK-THUMB-NEXT: blt .LBB9_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB9_4: +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r0, r2 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: blt .LBB10_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB10_2: +; CHECK-THUMB-NEXT: blt .LBB10_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB10_4: +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #60] +; CHECK-THUMB-NEXT: ldr r5, [sp, #56] +; CHECK-THUMB-NEXT: ldr r1, [sp, #52] +; CHECK-THUMB-NEXT: ldr r7, [sp, #48] +; CHECK-THUMB-NEXT: subs r2, r0, r7 +; CHECK-THUMB-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r2 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r0, r7, r0 +; CHECK-THUMB-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: sbcs r5, r6 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: blt .LBB11_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB11_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: .LBB11_4: +; CHECK-THUMB-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: .LBB11_6: +; CHECK-THUMB-NEXT: blt .LBB11_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB11_8: +; CHECK-THUMB-NEXT: movs r3, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: mov r2, r3 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #60] +; CHECK-THUMB-NEXT: ldr r5, [sp, #56] +; CHECK-THUMB-NEXT: ldr r1, [sp, #52] +; CHECK-THUMB-NEXT: ldr r7, [sp, #48] +; CHECK-THUMB-NEXT: subs r2, r0, r7 +; CHECK-THUMB-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r3, r1 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r2 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r0, r7, r0 +; CHECK-THUMB-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: sbcs r5, r6 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: blt .LBB12_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB12_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: .LBB12_4: +; CHECK-THUMB-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: .LBB12_6: +; CHECK-THUMB-NEXT: blt .LBB12_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: .LBB12_8: +; CHECK-THUMB-NEXT: movs r3, #0 +; CHECK-THUMB-NEXT: rsbs r0, r5, #0 +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: mov r2, r3 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +; +; sub(smin(a,b),smax(a,b)) -> nabds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) + %sub = sub i8 %min, %max + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) + %sub = sub i16 %min, %max + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %sub = sub i32 %min, %max + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: mov lr, r2 +; CHECK-ARM-NEXT: sbcs r12, r3, r1 +; CHECK-ARM-NEXT: mov r12, r3 +; CHECK-ARM-NEXT: movlt lr, r0 +; CHECK-ARM-NEXT: movlt r12, r1 +; CHECK-ARM-NEXT: subs r4, r0, r2 +; CHECK-ARM-NEXT: sbcs r4, r1, r3 +; CHECK-ARM-NEXT: movlt r3, r1 +; CHECK-ARM-NEXT: movlt r2, r0 +; CHECK-ARM-NEXT: subs r0, r2, lr +; CHECK-ARM-NEXT: sbc r1, r3, r12 +; CHECK-ARM-NEXT: pop {r4, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: subs r4, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: blt .LBB16_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: blt .LBB16_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: subs r6, r0, r2 +; CHECK-THUMB-NEXT: mov r6, r1 +; CHECK-THUMB-NEXT: sbcs r6, r3 +; CHECK-THUMB-NEXT: blt .LBB16_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: .LBB16_6: +; CHECK-THUMB-NEXT: blt .LBB16_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: .LBB16_8: +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, pc} + %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) + %sub = sub i64 %min, %max + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #32] +; CHECK-ARM-NEXT: ldr r4, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r5, r0 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r6, r4, r1 +; CHECK-ARM-NEXT: mov r10, r4 +; CHECK-ARM-NEXT: mov r7, r5 +; CHECK-ARM-NEXT: sbcs r6, lr, r2 +; CHECK-ARM-NEXT: mov r8, r12 +; CHECK-ARM-NEXT: sbcs r6, r12, r3 +; CHECK-ARM-NEXT: mov r9, lr +; CHECK-ARM-NEXT: movlt r8, r3 +; CHECK-ARM-NEXT: movlt r9, r2 +; CHECK-ARM-NEXT: movlt r10, r1 +; CHECK-ARM-NEXT: movlt r7, r0 +; CHECK-ARM-NEXT: subs r6, r0, r5 +; CHECK-ARM-NEXT: sbcs r6, r1, r4 +; CHECK-ARM-NEXT: sbcs r6, r2, lr +; CHECK-ARM-NEXT: sbcs r6, r3, r12 +; CHECK-ARM-NEXT: movlt r12, r3 +; CHECK-ARM-NEXT: movlt lr, r2 +; CHECK-ARM-NEXT: movlt r4, r1 +; CHECK-ARM-NEXT: movlt r5, r0 +; CHECK-ARM-NEXT: subs r0, r5, r7 +; CHECK-ARM-NEXT: sbcs r1, r4, r10 +; CHECK-ARM-NEXT: sbcs r2, lr, r9 +; CHECK-ARM-NEXT: sbc r3, r12, r8 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #20 +; CHECK-THUMB-NEXT: sub sp, #20 +; CHECK-THUMB-NEXT: ldr r5, [sp, #52] +; CHECK-THUMB-NEXT: add r7, sp, #40 +; CHECK-THUMB-NEXT: ldm r7, {r4, r6, r7} +; CHECK-THUMB-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r4, r4, r0 +; CHECK-THUMB-NEXT: mov r4, r6 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r7 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: bge .LBB17_12 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: bge .LBB17_13 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: blt .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: blt .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r4, r0, r4 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r6 +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: sbcs r4, r7 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: bge .LBB17_14 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_15 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: bge .LBB17_16 +; CHECK-THUMB-NEXT: .LBB17_9: +; CHECK-THUMB-NEXT: blt .LBB17_11 +; CHECK-THUMB-NEXT: .LBB17_10: +; CHECK-THUMB-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_11: +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #20 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_12: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: blt .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_13: +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: bge .LBB17_3 +; CHECK-THUMB-NEXT: b .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_14: +; CHECK-THUMB-NEXT: mov r3, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_15: +; CHECK-THUMB-NEXT: mov r2, r7 +; CHECK-THUMB-NEXT: blt .LBB17_9 +; CHECK-THUMB-NEXT: .LBB17_16: +; CHECK-THUMB-NEXT: mov r1, r6 +; CHECK-THUMB-NEXT: bge .LBB17_10 +; CHECK-THUMB-NEXT: b .LBB17_11 + %min = call i128 @llvm.smin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.smax.i128(i128 %a, i128 %b) + %sub = sub i128 %min, %max + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r3, r0, #24 +; CHECK-ARM-NEXT: lsl r12, r1, #24 +; CHECK-ARM-NEXT: asr r3, r3, #24 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12, asr #24 +; CHECK-ARM-NEXT: suble r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r2, r1 +; CHECK-THUMB-NEXT: sxtb r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: ble .LBB18_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB18_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sle i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r3, r0, #16 +; CHECK-ARM-NEXT: lsl r12, r1, #16 +; CHECK-ARM-NEXT: asr r3, r3, #16 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12, asr #16 +; CHECK-ARM-NEXT: sublt r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r2, r1 +; CHECK-THUMB-NEXT: sxth r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: blt .LBB19_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB19_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sge i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: sbc lr, r3, r1 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: movge r0, r12 +; CHECK-ARM-NEXT: movge r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: bge .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r5 +; CHECK-THUMB-NEXT: blt .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp slt i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r9, r0 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r8, r1 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, lr, r2 +; CHECK-ARM-NEXT: sbc r5, r12, r3 +; CHECK-ARM-NEXT: subs r0, r0, r9 +; CHECK-ARM-NEXT: sbcs r1, r1, r8 +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: movge r0, r6 +; CHECK-ARM-NEXT: movge r1, r7 +; CHECK-ARM-NEXT: movge r2, r4 +; CHECK-ARM-NEXT: movge r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: add r6, sp, #52 +; CHECK-THUMB-NEXT: ldm r6, {r0, r5, r6} +; CHECK-THUMB-NEXT: ldr r2, [sp, #48] +; CHECK-THUMB-NEXT: subs r7, r2, r1 +; CHECK-THUMB-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r0 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r6 +; CHECK-THUMB-NEXT: sbcs r7, r3 +; CHECK-THUMB-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r2, r1, r2 +; CHECK-THUMB-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r0 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: sbcs r3, r6 +; CHECK-THUMB-NEXT: bge .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: bge .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blt .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp slt i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; nabs(sub_nsw(x, y)) -> nabds(a,b) +; + +define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false) + %nabs = sub i8 0, %abs + ret i8 %nabs +} + +define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #7 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true) + %nabs = sub i8 0, %abs + ret i8 %nabs +} + +define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false) + %nabs = sub i16 0, %abs + ret i16 %nabs +} + +define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsb r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r1, r0 +; CHECK-THUMB-NEXT: asrs r1, r1, #15 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true) + %nabs = sub i16 0, %abs + ret i16 %nabs +} + +define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + %nabs = sub i32 0, %abs + ret i32 %nabs +} + +define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbpl r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) + %nabs = sub i32 0, %abs + ret i32 %nabs +} + +define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: mov r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + ret i64 %nabs +} + +define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: rsc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r2, r0 +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: mov r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + ret i64 %nabs +} + +define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rscs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: rscs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rsc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r4, r0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + ret i128 %nabs +} + +define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: rsbs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rscs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: rscs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: rsc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r4, r0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + ret i128 %nabs +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.smax.i8(i8, i8) +declare i16 @llvm.smax.i16(i16, i16) +declare i32 @llvm.smax.i32(i32, i32) +declare i64 @llvm.smax.i64(i64, i64) + +declare i8 @llvm.smin.i8(i8, i8) +declare i16 @llvm.smin.i16(i16, i16) +declare i32 @llvm.smin.i32(i32, i32) +declare i64 @llvm.smin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/abds.ll b/llvm/test/CodeGen/ARM/abds.ll new file mode 100644 index 0000000000000..93f97b3db09e5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/abds.ll @@ -0,0 +1,1495 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i8 %a to i64 + %bext = sext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: rsbs r0, r1, r0, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i16 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = sext i32 %a to i64 + %bext = sext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB9_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB9_4 +; CHECK-THUMB-NEXT: .LBB9_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB9_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB9_2 +; CHECK-THUMB-NEXT: .LBB9_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB10_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB10_4 +; CHECK-THUMB-NEXT: .LBB10_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB10_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB10_2 +; CHECK-THUMB-NEXT: .LBB10_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %aext = sext i64 %a to i128 + %bext = sext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB11_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB11_7 +; CHECK-THUMB-NEXT: .LBB11_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB11_8 +; CHECK-THUMB-NEXT: .LBB11_3: +; CHECK-THUMB-NEXT: blt .LBB11_5 +; CHECK-THUMB-NEXT: .LBB11_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB11_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB11_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_2 +; CHECK-THUMB-NEXT: .LBB11_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB11_3 +; CHECK-THUMB-NEXT: .LBB11_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB11_4 +; CHECK-THUMB-NEXT: b .LBB11_5 + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB12_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB12_7 +; CHECK-THUMB-NEXT: .LBB12_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB12_8 +; CHECK-THUMB-NEXT: .LBB12_3: +; CHECK-THUMB-NEXT: blt .LBB12_5 +; CHECK-THUMB-NEXT: .LBB12_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB12_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB12_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_2 +; CHECK-THUMB-NEXT: .LBB12_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB12_3 +; CHECK-THUMB-NEXT: .LBB12_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB12_4 +; CHECK-THUMB-NEXT: b .LBB12_5 + %aext = sext i128 %a to i256 + %bext = sext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +; +; sub(smax(a,b),smin(a,b)) -> abds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) + %sub = sub i8 %max, %min + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) + %sub = sub i16 %max, %min + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) + %sub = sub i32 %max, %min + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB16_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB16_4 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB16_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB16_2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) + %sub = sub i64 %max, %min + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_7 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: blt .LBB17_5 +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB17_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB17_3 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB17_4 +; CHECK-THUMB-NEXT: b .LBB17_5 + %min = call i128 @llvm.smin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.smax.i128(i128 %a, i128 %b) + %sub = sub i128 %max, %min + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sgt i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sge i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp sge i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blt .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp sge i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; abs(sub_nsw(x, y)) -> abds(a,b) +; + +define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: asrs r0, r1, #24 +; CHECK-ARM-NEXT: submi r0, r2, r1, asr #24 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 false) + ret i8 %abs +} + +define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: lsl r1, r0, #24 +; CHECK-ARM-NEXT: asrs r0, r1, #24 +; CHECK-ARM-NEXT: submi r0, r2, r1, asr #24 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i8 %a, %b + %abs = call i8 @llvm.abs.i8(i8 %sub, i1 true) + ret i8 %abs +} + +define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: asrs r0, r1, #16 +; CHECK-ARM-NEXT: submi r0, r2, r1, asr #16 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 false) + ret i16 %abs +} + +define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: sub r0, r0, r1 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: lsl r1, r0, #16 +; CHECK-ARM-NEXT: asrs r0, r1, #16 +; CHECK-ARM-NEXT: submi r0, r2, r1, asr #16 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i16 %a, %b + %abs = call i16 @llvm.abs.i16(i16 %sub, i1 true) + ret i16 %abs +} + +define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + ret i32 %abs +} + +define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) + ret i32 %abs +} + +define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + ret i64 %abs +} + +define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r3 +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr + %sub = sub nsw i64 %a, %b + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + ret i64 %abs +} + +define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r0, r4 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + ret i128 %abs +} + +define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_subnsw_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r11, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbc r3, r3, r12 +; CHECK-ARM-NEXT: eor r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r3, asr #31 +; CHECK-ARM-NEXT: eor r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r1, r1, r3, asr #31 +; CHECK-ARM-NEXT: eor r5, r3, r3, asr #31 +; CHECK-ARM-NEXT: sbcs r2, r2, r3, asr #31 +; CHECK-ARM-NEXT: sbc r3, r5, r3, asr #31 +; CHECK-ARM-NEXT: pop {r4, r5, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_subnsw_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: ldr r4, [sp, #36] +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: subs r0, r0, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: asrs r4, r3, #31 +; CHECK-THUMB-NEXT: eors r3, r4 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: eors r1, r4 +; CHECK-THUMB-NEXT: eors r0, r4 +; CHECK-THUMB-NEXT: subs r0, r0, r4 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: sbcs r2, r4 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %sub = sub nsw i128 %a, %b + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + ret i128 %abs +} + +; +; negative tests +; + +define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_sub_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_sub_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %sub = sub i32 %a, %b + %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) + ret i32 %abs +} + +define i64 @vector_legalized(i16 %a, i16 %b) { +; CHECK-ARM-LABEL: vector_legalized: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: vector_legalized: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: bx lr + %ea = sext i16 %a to i32 + %eb = sext i16 %b to i32 + %s = sub i32 %ea, %eb + %ab = call i32 @llvm.abs.i32(i32 %s, i1 false) + %e = zext i32 %ab to i64 + %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer) + %z = add i64 %red, %e + ret i64 %z +} + +; +; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abds(a,b) +; + +define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #24 +; CHECK-ARM-NEXT: lsl r1, r1, #24 +; CHECK-ARM-NEXT: asr r0, r0, #24 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #24 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxtb r1, r1 +; CHECK-THUMB-NEXT: sxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp slt i8 %a, %b + %ab = select i1 %cmp, i8 %a, i8 %b + %ba = select i1 %cmp, i8 %b, i8 %a + %sub = sub i8 %ba, %ab + ret i8 %sub +} + +define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: lsl r0, r0, #16 +; CHECK-ARM-NEXT: lsl r1, r1, #16 +; CHECK-ARM-NEXT: asr r0, r0, #16 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #16 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: sxth r1, r1 +; CHECK-THUMB-NEXT: sxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sle i16 %a, %b + %ab = select i1 %cmp, i16 %a, i16 %b + %ba = select i1 %cmp, i16 %b, i16 %a + %sub = sub i16 %ba, %ab + ret i16 %sub +} + +define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp sgt i32 %a, %b + %ab = select i1 %cmp, i32 %a, i32 %b + %ba = select i1 %cmp, i32 %b, i32 %a + %sub = sub i32 %ab, %ba + ret i32 %sub +} + +define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r0, r2 +; CHECK-ARM-NEXT: sbc lr, r1, r3 +; CHECK-ARM-NEXT: subs r0, r2, r0 +; CHECK-ARM-NEXT: sbcs r1, r3, r1 +; CHECK-ARM-NEXT: movlt r0, r12 +; CHECK-ARM-NEXT: movlt r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: subs r2, r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: bge .LBB38_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bge .LBB38_4 +; CHECK-THUMB-NEXT: .LBB38_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB38_3: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: blt .LBB38_2 +; CHECK-THUMB-NEXT: .LBB38_4: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp sge i64 %a, %b + %ab = select i1 %cmp, i64 %a, i64 %b + %ba = select i1 %cmp, i64 %b, i64 %a + %sub = sub i64 %ab, %ba + ret i64 %sub +} + +define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r0, r9 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r1, r8 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, r2, lr +; CHECK-ARM-NEXT: sbc r5, r3, r12 +; CHECK-ARM-NEXT: subs r0, r9, r0 +; CHECK-ARM-NEXT: sbcs r1, r8, r1 +; CHECK-ARM-NEXT: sbcs r2, lr, r2 +; CHECK-ARM-NEXT: sbcs r3, r12, r3 +; CHECK-ARM-NEXT: movlt r0, r6 +; CHECK-ARM-NEXT: movlt r1, r7 +; CHECK-ARM-NEXT: movlt r2, r4 +; CHECK-ARM-NEXT: movlt r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r6, r2 +; CHECK-THUMB-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB-NEXT: ldr r2, [sp, #56] +; CHECK-THUMB-NEXT: ldr r7, [sp, #52] +; CHECK-THUMB-NEXT: ldr r5, [sp, #48] +; CHECK-THUMB-NEXT: subs r3, r1, r5 +; CHECK-THUMB-NEXT: str r3, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: str r6, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r3, r6 +; CHECK-THUMB-NEXT: sbcs r3, r0 +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r5, r5, r1 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r1 +; CHECK-THUMB-NEXT: sbcs r0, r6 +; CHECK-THUMB-NEXT: bge .LBB39_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB39_7 +; CHECK-THUMB-NEXT: .LBB39_2: +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: bge .LBB39_8 +; CHECK-THUMB-NEXT: .LBB39_3: +; CHECK-THUMB-NEXT: blt .LBB39_5 +; CHECK-THUMB-NEXT: .LBB39_4: +; CHECK-THUMB-NEXT: mov r3, r0 +; CHECK-THUMB-NEXT: .LBB39_5: +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB39_6: +; CHECK-THUMB-NEXT: str r5, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB39_2 +; CHECK-THUMB-NEXT: .LBB39_7: +; CHECK-THUMB-NEXT: mov r1, r7 +; CHECK-THUMB-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: blt .LBB39_3 +; CHECK-THUMB-NEXT: .LBB39_8: +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: bge .LBB39_4 +; CHECK-THUMB-NEXT: b .LBB39_5 + %cmp = icmp slt i128 %a, %b + %ab = select i1 %cmp, i128 %a, i128 %b + %ba = select i1 %cmp, i128 %b, i128 %a + %sub = sub i128 %ba, %ab + ret i128 %sub +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.smax.i8(i8, i8) +declare i16 @llvm.smax.i16(i16, i16) +declare i32 @llvm.smax.i32(i32, i32) +declare i64 @llvm.smax.i64(i64, i64) + +declare i8 @llvm.smin.i8(i8, i8) +declare i16 @llvm.smin.i16(i16, i16) +declare i32 @llvm.smin.i32(i32, i32) +declare i64 @llvm.smin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/abdu-neg.ll b/llvm/test/CodeGen/ARM/abdu-neg.ll new file mode 100644 index 0000000000000..94e966bc686d1 --- /dev/null +++ b/llvm/test/CodeGen/ARM/abdu-neg.ll @@ -0,0 +1,996 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(nabs(sub(zext(a),zext(b)))) -> nabds(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %nabs = sub i64 0, %abs + %trunc = trunc i64 %nabs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: adcs r2, r4 +; CHECK-THUMB-NEXT: movs r3, #1 +; CHECK-THUMB-NEXT: eors r3, r2 +; CHECK-THUMB-NEXT: rsbs r2, r3, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rsc r1, r1, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: adcs r2, r4 +; CHECK-THUMB-NEXT: movs r3, #1 +; CHECK-THUMB-NEXT: eors r3, r2 +; CHECK-THUMB-NEXT: rsbs r2, r3, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %nabs = sub i128 0, %abs + %trunc = trunc i128 %nabs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #32] +; CHECK-THUMB-NEXT: ldr r7, [sp, #28] +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r5, r0 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: sbcs r2, r6 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: adcs r5, r4 +; CHECK-THUMB-NEXT: movs r6, #1 +; CHECK-THUMB-NEXT: eors r6, r5 +; CHECK-THUMB-NEXT: rsbs r5, r6, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r0, r5 +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: rsbs r0, r0, #0 +; CHECK-ARM-NEXT: rscs r1, r1, #0 +; CHECK-ARM-NEXT: rscs r2, r2, #0 +; CHECK-ARM-NEXT: rsc r3, r3, #0 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r6, [sp, #32] +; CHECK-THUMB-NEXT: ldr r7, [sp, #28] +; CHECK-THUMB-NEXT: ldr r0, [sp, #24] +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r5, r0 +; CHECK-THUMB-NEXT: sbcs r1, r7 +; CHECK-THUMB-NEXT: sbcs r2, r6 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: adcs r5, r4 +; CHECK-THUMB-NEXT: movs r6, #1 +; CHECK-THUMB-NEXT: eors r6, r5 +; CHECK-THUMB-NEXT: rsbs r5, r6, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r0, r5 +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: rsbs r0, r0, #0 +; CHECK-THUMB-NEXT: mov r5, r4 +; CHECK-THUMB-NEXT: sbcs r5, r1 +; CHECK-THUMB-NEXT: mov r6, r4 +; CHECK-THUMB-NEXT: sbcs r6, r2 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r1, r5 +; CHECK-THUMB-NEXT: mov r2, r6 +; CHECK-THUMB-NEXT: mov r3, r4 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %nabs = sub i256 0, %abs + %trunc = trunc i256 %nabs to i128 + ret i128 %trunc +} + +; +; sub(umin(a,b),umax(a,b)) -> nabds(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) + %sub = sub i8 %min, %max + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) + %sub = sub i16 %min, %max + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) + %sub = sub i32 %min, %max + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, lr} +; CHECK-ARM-NEXT: push {r4, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: mov lr, r2 +; CHECK-ARM-NEXT: sbcs r12, r3, r1 +; CHECK-ARM-NEXT: mov r12, r3 +; CHECK-ARM-NEXT: movlo lr, r0 +; CHECK-ARM-NEXT: movlo r12, r1 +; CHECK-ARM-NEXT: subs r4, r0, r2 +; CHECK-ARM-NEXT: sbcs r4, r1, r3 +; CHECK-ARM-NEXT: movlo r3, r1 +; CHECK-ARM-NEXT: movlo r2, r0 +; CHECK-ARM-NEXT: subs r0, r2, lr +; CHECK-ARM-NEXT: sbc r1, r3, r12 +; CHECK-ARM-NEXT: pop {r4, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB-NEXT: subs r4, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: blo .LBB16_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: .LBB16_2: +; CHECK-THUMB-NEXT: mov r5, r0 +; CHECK-THUMB-NEXT: blo .LBB16_4 +; CHECK-THUMB-NEXT: @ %bb.3: +; CHECK-THUMB-NEXT: mov r5, r2 +; CHECK-THUMB-NEXT: .LBB16_4: +; CHECK-THUMB-NEXT: subs r6, r0, r2 +; CHECK-THUMB-NEXT: mov r6, r1 +; CHECK-THUMB-NEXT: sbcs r6, r3 +; CHECK-THUMB-NEXT: blo .LBB16_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: mov r1, r3 +; CHECK-THUMB-NEXT: .LBB16_6: +; CHECK-THUMB-NEXT: blo .LBB16_8 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: .LBB16_8: +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: sbcs r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, pc} + %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) + %sub = sub i64 %min, %max + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #32] +; CHECK-ARM-NEXT: ldr r4, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r5, r0 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r6, r4, r1 +; CHECK-ARM-NEXT: mov r10, r4 +; CHECK-ARM-NEXT: mov r7, r5 +; CHECK-ARM-NEXT: sbcs r6, lr, r2 +; CHECK-ARM-NEXT: mov r8, r12 +; CHECK-ARM-NEXT: sbcs r6, r12, r3 +; CHECK-ARM-NEXT: mov r9, lr +; CHECK-ARM-NEXT: movlo r8, r3 +; CHECK-ARM-NEXT: movlo r9, r2 +; CHECK-ARM-NEXT: movlo r10, r1 +; CHECK-ARM-NEXT: movlo r7, r0 +; CHECK-ARM-NEXT: subs r6, r0, r5 +; CHECK-ARM-NEXT: sbcs r6, r1, r4 +; CHECK-ARM-NEXT: sbcs r6, r2, lr +; CHECK-ARM-NEXT: sbcs r6, r3, r12 +; CHECK-ARM-NEXT: movlo r12, r3 +; CHECK-ARM-NEXT: movlo lr, r2 +; CHECK-ARM-NEXT: movlo r4, r1 +; CHECK-ARM-NEXT: movlo r5, r0 +; CHECK-ARM-NEXT: subs r0, r5, r7 +; CHECK-ARM-NEXT: sbcs r1, r4, r10 +; CHECK-ARM-NEXT: sbcs r2, lr, r9 +; CHECK-ARM-NEXT: sbc r3, r12, r8 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #20 +; CHECK-THUMB-NEXT: sub sp, #20 +; CHECK-THUMB-NEXT: ldr r5, [sp, #52] +; CHECK-THUMB-NEXT: add r7, sp, #40 +; CHECK-THUMB-NEXT: ldm r7, {r4, r6, r7} +; CHECK-THUMB-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r4, r4, r0 +; CHECK-THUMB-NEXT: mov r4, r6 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: mov r4, r7 +; CHECK-THUMB-NEXT: sbcs r4, r2 +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: sbcs r4, r3 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: bhs .LBB17_12 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: bhs .LBB17_13 +; CHECK-THUMB-NEXT: .LBB17_2: +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: blo .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_3: +; CHECK-THUMB-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: .LBB17_4: +; CHECK-THUMB-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: blo .LBB17_6 +; CHECK-THUMB-NEXT: @ %bb.5: +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_6: +; CHECK-THUMB-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r4, r0, r4 +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: sbcs r4, r6 +; CHECK-THUMB-NEXT: mov r4, r2 +; CHECK-THUMB-NEXT: sbcs r4, r7 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: bhs .LBB17_14 +; CHECK-THUMB-NEXT: @ %bb.7: +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: bhs .LBB17_15 +; CHECK-THUMB-NEXT: .LBB17_8: +; CHECK-THUMB-NEXT: bhs .LBB17_16 +; CHECK-THUMB-NEXT: .LBB17_9: +; CHECK-THUMB-NEXT: blo .LBB17_11 +; CHECK-THUMB-NEXT: .LBB17_10: +; CHECK-THUMB-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB17_11: +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: subs r0, r0, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r4 +; CHECK-THUMB-NEXT: add sp, #20 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB17_12: +; CHECK-THUMB-NEXT: mov r4, r5 +; CHECK-THUMB-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: blo .LBB17_2 +; CHECK-THUMB-NEXT: .LBB17_13: +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: bhs .LBB17_3 +; CHECK-THUMB-NEXT: b .LBB17_4 +; CHECK-THUMB-NEXT: .LBB17_14: +; CHECK-THUMB-NEXT: mov r3, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB17_8 +; CHECK-THUMB-NEXT: .LBB17_15: +; CHECK-THUMB-NEXT: mov r2, r7 +; CHECK-THUMB-NEXT: blo .LBB17_9 +; CHECK-THUMB-NEXT: .LBB17_16: +; CHECK-THUMB-NEXT: mov r1, r6 +; CHECK-THUMB-NEXT: bhs .LBB17_10 +; CHECK-THUMB-NEXT: b .LBB17_11 + %min = call i128 @llvm.umin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.umax.i128(i128 %a, i128 %b) + %sub = sub i128 %min, %max + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> nabds(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r12, r1, #255 +; CHECK-ARM-NEXT: and r3, r0, #255 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: cmp r3, r12 +; CHECK-ARM-NEXT: subls r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r2, r1 +; CHECK-THUMB-NEXT: uxtb r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: bls .LBB18_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB18_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ule i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: mov r12, #255 +; CHECK-ARM-NEXT: sub r2, r1, r0 +; CHECK-ARM-NEXT: orr r12, r12, #65280 +; CHECK-ARM-NEXT: and lr, r1, r12 +; CHECK-ARM-NEXT: and r3, r0, r12 +; CHECK-ARM-NEXT: cmp r3, lr +; CHECK-ARM-NEXT: sublo r2, r0, r1 +; CHECK-ARM-NEXT: mov r0, r2 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r2, r1 +; CHECK-THUMB-NEXT: uxth r3, r0 +; CHECK-THUMB-NEXT: cmp r3, r2 +; CHECK-THUMB-NEXT: blo .LBB19_2 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr +; CHECK-THUMB-NEXT: .LBB19_2: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbhs r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r1, r0 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp uge i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: subs r12, r2, r0 +; CHECK-ARM-NEXT: sbc lr, r3, r1 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: movhs r0, r12 +; CHECK-ARM-NEXT: movhs r1, lr +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r7, lr} +; CHECK-THUMB-NEXT: subs r5, r2, r0 +; CHECK-THUMB-NEXT: mov r4, r3 +; CHECK-THUMB-NEXT: sbcs r4, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: bhs .LBB21_3 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bhs .LBB21_4 +; CHECK-THUMB-NEXT: .LBB21_2: +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} +; CHECK-THUMB-NEXT: .LBB21_3: +; CHECK-THUMB-NEXT: mov r0, r5 +; CHECK-THUMB-NEXT: blo .LBB21_2 +; CHECK-THUMB-NEXT: .LBB21_4: +; CHECK-THUMB-NEXT: mov r1, r4 +; CHECK-THUMB-NEXT: pop {r4, r5, r7, pc} + %cmp = icmp ult i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: ldr r9, [sp, #32] +; CHECK-ARM-NEXT: ldr r8, [sp, #36] +; CHECK-ARM-NEXT: subs r6, r9, r0 +; CHECK-ARM-NEXT: ldr lr, [sp, #40] +; CHECK-ARM-NEXT: sbcs r7, r8, r1 +; CHECK-ARM-NEXT: ldr r12, [sp, #44] +; CHECK-ARM-NEXT: sbcs r4, lr, r2 +; CHECK-ARM-NEXT: sbc r5, r12, r3 +; CHECK-ARM-NEXT: subs r0, r0, r9 +; CHECK-ARM-NEXT: sbcs r1, r1, r8 +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: movhs r0, r6 +; CHECK-ARM-NEXT: movhs r1, r7 +; CHECK-ARM-NEXT: movhs r2, r4 +; CHECK-ARM-NEXT: movhs r3, r5 +; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #28 +; CHECK-THUMB-NEXT: sub sp, #28 +; CHECK-THUMB-NEXT: str r2, [sp, #24] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r4, r1 +; CHECK-THUMB-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r1, r0 +; CHECK-THUMB-NEXT: add r6, sp, #52 +; CHECK-THUMB-NEXT: ldm r6, {r0, r5, r6} +; CHECK-THUMB-NEXT: ldr r2, [sp, #48] +; CHECK-THUMB-NEXT: subs r7, r2, r1 +; CHECK-THUMB-NEXT: str r7, [sp, #4] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r0 +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #8] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r5 +; CHECK-THUMB-NEXT: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r7, r4 +; CHECK-THUMB-NEXT: str r7, [sp, #12] @ 4-byte Spill +; CHECK-THUMB-NEXT: mov r7, r6 +; CHECK-THUMB-NEXT: sbcs r7, r3 +; CHECK-THUMB-NEXT: str r7, [sp, #16] @ 4-byte Spill +; CHECK-THUMB-NEXT: subs r2, r1, r2 +; CHECK-THUMB-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r1, r0 +; CHECK-THUMB-NEXT: sbcs r4, r5 +; CHECK-THUMB-NEXT: sbcs r3, r6 +; CHECK-THUMB-NEXT: bhs .LBB22_6 +; CHECK-THUMB-NEXT: @ %bb.1: +; CHECK-THUMB-NEXT: bhs .LBB22_7 +; CHECK-THUMB-NEXT: .LBB22_2: +; CHECK-THUMB-NEXT: bhs .LBB22_8 +; CHECK-THUMB-NEXT: .LBB22_3: +; CHECK-THUMB-NEXT: blo .LBB22_5 +; CHECK-THUMB-NEXT: .LBB22_4: +; CHECK-THUMB-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB-NEXT: .LBB22_5: +; CHECK-THUMB-NEXT: mov r0, r2 +; CHECK-THUMB-NEXT: mov r2, r4 +; CHECK-THUMB-NEXT: add sp, #28 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB-NEXT: .LBB22_6: +; CHECK-THUMB-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB22_2 +; CHECK-THUMB-NEXT: .LBB22_7: +; CHECK-THUMB-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB-NEXT: blo .LBB22_3 +; CHECK-THUMB-NEXT: .LBB22_8: +; CHECK-THUMB-NEXT: ldr r4, [sp, #12] @ 4-byte Reload +; CHECK-THUMB-NEXT: bhs .LBB22_4 +; CHECK-THUMB-NEXT: b .LBB22_5 + %cmp = icmp ult i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.umax.i8(i8, i8) +declare i16 @llvm.umax.i16(i16, i16) +declare i32 @llvm.umax.i32(i32, i32) +declare i64 @llvm.umax.i64(i64, i64) + +declare i8 @llvm.umin.i8(i8, i8) +declare i16 @llvm.umin.i16(i16, i16) +declare i32 @llvm.umin.i32(i32, i32) +declare i64 @llvm.umin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/abdu.ll b/llvm/test/CodeGen/ARM/abdu.ll new file mode 100644 index 0000000000000..51a13f9a9299b --- /dev/null +++ b/llvm/test/CodeGen/ARM/abdu.ll @@ -0,0 +1,1048 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB + +; +; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b) +; + +define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i8_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i8_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i8 %a to i64 + %bext = zext i8 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i8 + ret i8 %trunc +} + +define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i16_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i16_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i16 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i16 + ret i16 %trunc +} + +define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i16 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i32_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i32_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %aext = zext i32 %a to i64 + %bext = zext i32 %b to i64 + %sub = sub i64 %aext, %bext + %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) + %trunc = trunc i64 %abs to i32 + ret i32 %trunc +} + +define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 false) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i64_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i64_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %aext = zext i64 %a to i128 + %bext = zext i64 %b to i128 + %sub = sub i128 %aext, %bext + %abs = call i128 @llvm.abs.i128(i128 %sub, i1 true) + %trunc = trunc i128 %abs to i64 + ret i64 %trunc +} + +define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 false) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_ext_i128_undef: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_ext_i128_undef: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %aext = zext i128 %a to i256 + %bext = zext i128 %b to i256 + %sub = sub i256 %aext, %bext + %abs = call i256 @llvm.abs.i256(i256 %sub, i1 true) + %trunc = trunc i256 %abs to i128 + ret i128 %trunc +} + +; +; sub(umax(a,b),umin(a,b)) -> abdu(a,b) +; + +define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) + %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) + %sub = sub i8 %max, %min + ret i8 %sub +} + +define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) + %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) + %sub = sub i16 %max, %min + ret i16 %sub +} + +define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) + %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) + %sub = sub i32 %max, %min + ret i32 %sub +} + +define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) + %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) + %sub = sub i64 %max, %min + ret i64 %sub +} + +define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_minmax_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_minmax_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %min = call i128 @llvm.umin.i128(i128 %a, i128 %b) + %max = call i128 @llvm.umax.i128(i128 %a, i128 %b) + %sub = sub i128 %max, %min + ret i128 %sub +} + +; +; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b) +; + +define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ugt i8 %a, %b + %ab = sub i8 %a, %b + %ba = sub i8 %b, %a + %sel = select i1 %cmp, i8 %ab, i8 %ba + ret i8 %sel +} + +define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp uge i16 %a, %b + %ab = sub i16 %a, %b + %ba = sub i16 %b, %a + %sel = select i1 %cmp, i16 %ab, i16 %ba + ret i16 %sel +} + +define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i32 %a, %b + %ab = sub i32 %a, %b + %ba = sub i32 %b, %a + %sel = select i1 %cmp, i32 %ba, i32 %ab + ret i32 %sel +} + +define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %cmp = icmp uge i64 %a, %b + %ab = sub i64 %a, %b + %ba = sub i64 %b, %a + %sel = select i1 %cmp, i64 %ab, i64 %ba + ret i64 %sel +} + +define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_cmp_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_cmp_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %cmp = icmp uge i128 %a, %b + %ab = sub i128 %a, %b + %ba = sub i128 %b, %a + %sel = select i1 %cmp, i128 %ab, i128 %ba + ret i128 %sel +} + +; +; negative tests +; + +define i64 @vector_legalized(i16 %a, i16 %b) { +; CHECK-ARM-LABEL: vector_legalized: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: vector_legalized: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: bx lr + %ea = zext i16 %a to i32 + %eb = zext i16 %b to i32 + %s = sub i32 %ea, %eb + %ab = call i32 @llvm.abs.i32(i32 %s, i1 false) + %e = zext i32 %ab to i64 + %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer) + %z = add i64 %red, %e + ret i64 %z +} + +; +; sub(select(icmp(a,b),a,b),select(icmp(a,b),b,a)) -> abdu(a,b) +; + +define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i8: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: and r1, r1, #255 +; CHECK-ARM-NEXT: and r0, r0, #255 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i8: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxtb r1, r1 +; CHECK-THUMB-NEXT: uxtb r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ult i8 %a, %b + %ab = select i1 %cmp, i8 %a, i8 %b + %ba = select i1 %cmp, i8 %b, i8 %a + %sub = sub i8 %ba, %ab + ret i8 %sub +} + +define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i16: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: mov r2, #255 +; CHECK-ARM-NEXT: orr r2, r2, #65280 +; CHECK-ARM-NEXT: and r1, r1, r2 +; CHECK-ARM-NEXT: and r0, r0, r2 +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i16: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: uxth r1, r1 +; CHECK-THUMB-NEXT: uxth r0, r0 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ule i16 %a, %b + %ab = select i1 %cmp, i16 %a, i16 %b + %ba = select i1 %cmp, i16 %b, i16 %a + %sub = sub i16 %ba, %ab + ret i16 %sub +} + +define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i32: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsblo r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i32: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: sbcs r1, r1 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr + %cmp = icmp ugt i32 %a, %b + %ab = select i1 %cmp, i32 %a, i32 %b + %ba = select i1 %cmp, i32 %b, i32 %a + %sub = sub i32 %ab, %ba + ret i32 %sub +} + +define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i64: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: mov r12, #0 +; CHECK-ARM-NEXT: sbcs r1, r1, r3 +; CHECK-ARM-NEXT: adc r2, r12, #0 +; CHECK-ARM-NEXT: eor r2, r2, #1 +; CHECK-ARM-NEXT: rsb r2, r2, #0 +; CHECK-ARM-NEXT: eor r0, r0, r2 +; CHECK-ARM-NEXT: eor r1, r1, r2 +; CHECK-ARM-NEXT: subs r0, r0, r2 +; CHECK-ARM-NEXT: sbc r1, r1, r2 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i64: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, lr} +; CHECK-THUMB-NEXT: push {r4, lr} +; CHECK-THUMB-NEXT: movs r4, #0 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r3 +; CHECK-THUMB-NEXT: adcs r4, r4 +; CHECK-THUMB-NEXT: movs r2, #1 +; CHECK-THUMB-NEXT: eors r2, r4 +; CHECK-THUMB-NEXT: rsbs r2, r2, #0 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: pop {r4, pc} + %cmp = icmp uge i64 %a, %b + %ab = select i1 %cmp, i64 %a, i64 %b + %ba = select i1 %cmp, i64 %b, i64 %a + %sub = sub i64 %ab, %ba + ret i64 %sub +} + +define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { +; CHECK-ARM-LABEL: abd_select_i128: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: .save {r4, r5, r6, lr} +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r5, [sp, #16] +; CHECK-ARM-NEXT: mov r6, #0 +; CHECK-ARM-NEXT: ldr r4, [sp, #20] +; CHECK-ARM-NEXT: subs r0, r0, r5 +; CHECK-ARM-NEXT: ldr lr, [sp, #24] +; CHECK-ARM-NEXT: sbcs r1, r1, r4 +; CHECK-ARM-NEXT: ldr r12, [sp, #28] +; CHECK-ARM-NEXT: sbcs r2, r2, lr +; CHECK-ARM-NEXT: sbcs r3, r3, r12 +; CHECK-ARM-NEXT: adc r6, r6, #0 +; CHECK-ARM-NEXT: eor r6, r6, #1 +; CHECK-ARM-NEXT: rsb r6, r6, #0 +; CHECK-ARM-NEXT: eor r0, r0, r6 +; CHECK-ARM-NEXT: eor r1, r1, r6 +; CHECK-ARM-NEXT: subs r0, r0, r6 +; CHECK-ARM-NEXT: eor r2, r2, r6 +; CHECK-ARM-NEXT: sbcs r1, r1, r6 +; CHECK-ARM-NEXT: eor r3, r3, r6 +; CHECK-ARM-NEXT: sbcs r2, r2, r6 +; CHECK-ARM-NEXT: sbc r3, r3, r6 +; CHECK-ARM-NEXT: pop {r4, r5, r6, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: abd_select_i128: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-THUMB-NEXT: .pad #4 +; CHECK-THUMB-NEXT: sub sp, #4 +; CHECK-THUMB-NEXT: mov r4, r0 +; CHECK-THUMB-NEXT: ldr r0, [sp, #36] +; CHECK-THUMB-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-THUMB-NEXT: ldr r5, [sp, #32] +; CHECK-THUMB-NEXT: ldr r6, [sp, #28] +; CHECK-THUMB-NEXT: ldr r7, [sp, #24] +; CHECK-THUMB-NEXT: movs r0, #0 +; CHECK-THUMB-NEXT: subs r4, r4, r7 +; CHECK-THUMB-NEXT: sbcs r1, r6 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: adcs r0, r0 +; CHECK-THUMB-NEXT: movs r5, #1 +; CHECK-THUMB-NEXT: eors r5, r0 +; CHECK-THUMB-NEXT: rsbs r5, r5, #0 +; CHECK-THUMB-NEXT: eors r3, r5 +; CHECK-THUMB-NEXT: eors r2, r5 +; CHECK-THUMB-NEXT: eors r1, r5 +; CHECK-THUMB-NEXT: eors r4, r5 +; CHECK-THUMB-NEXT: subs r0, r4, r5 +; CHECK-THUMB-NEXT: sbcs r1, r5 +; CHECK-THUMB-NEXT: sbcs r2, r5 +; CHECK-THUMB-NEXT: sbcs r3, r5 +; CHECK-THUMB-NEXT: add sp, #4 +; CHECK-THUMB-NEXT: pop {r4, r5, r6, r7, pc} + %cmp = icmp ult i128 %a, %b + %ab = select i1 %cmp, i128 %a, i128 %b + %ba = select i1 %cmp, i128 %b, i128 %a + %sub = sub i128 %ba, %ab + ret i128 %sub +} + +declare i8 @llvm.abs.i8(i8, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i64 @llvm.abs.i64(i64, i1) +declare i128 @llvm.abs.i128(i128, i1) + +declare i8 @llvm.umax.i8(i8, i8) +declare i16 @llvm.umax.i16(i16, i16) +declare i32 @llvm.umax.i32(i32, i32) +declare i64 @llvm.umax.i64(i64, i64) + +declare i8 @llvm.umin.i8(i8, i8) +declare i16 @llvm.umin.i16(i16, i16) +declare i32 @llvm.umin.i32(i32, i32) +declare i64 @llvm.umin.i64(i64, i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/iabs.ll b/llvm/test/CodeGen/ARM/iabs.ll index 758fe7507c0b2..eaa47d9274c3f 100644 --- a/llvm/test/CodeGen/ARM/iabs.ll +++ b/llvm/test/CodeGen/ARM/iabs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -mtriple=thumbv6m-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB ;; Integer absolute value, should produce something as good as: ARM: ;; movs r0, r0 @@ -7,11 +8,18 @@ ;; bx lr define i32 @test(i32 %a) { -; CHECK-LABEL: test: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: cmp r0, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr %tmp1neg = sub i32 0, %a %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg @@ -24,11 +32,19 @@ define i32 @test(i32 %a) { ;; rsbmi ;; bx define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp { -; CHECK-LABEL: test2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, r0, r1 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test2: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: subs r0, r0, r1 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test2: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: asrs r1, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r1 +; CHECK-THUMB-NEXT: subs r0, r0, r1 +; CHECK-THUMB-NEXT: bx lr entry: %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, -1 @@ -38,13 +54,22 @@ entry: } define i64 @test3(i64 %a) { -; CHECK-LABEL: test3: -; CHECK: @ %bb.0: -; CHECK-NEXT: eor r0, r0, r1, asr #31 -; CHECK-NEXT: eor r2, r1, r1, asr #31 -; CHECK-NEXT: subs r0, r0, r1, asr #31 -; CHECK-NEXT: sbc r1, r2, r1, asr #31 -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: test3: +; CHECK-ARM: @ %bb.0: +; CHECK-ARM-NEXT: eor r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: eor r2, r1, r1, asr #31 +; CHECK-ARM-NEXT: subs r0, r0, r1, asr #31 +; CHECK-ARM-NEXT: sbc r1, r2, r1, asr #31 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test3: +; CHECK-THUMB: @ %bb.0: +; CHECK-THUMB-NEXT: asrs r2, r1, #31 +; CHECK-THUMB-NEXT: eors r1, r2 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: sbcs r1, r2 +; CHECK-THUMB-NEXT: bx lr %tmp1neg = sub i64 0, %a %b = icmp sgt i64 %a, -1 %abs = select i1 %b, i64 %a, i64 %tmp1neg @@ -54,24 +79,43 @@ define i64 @test3(i64 %a) { declare void @callee(...) define void @testcallframe(i32 %a) { -; CHECK-LABEL: testcallframe: -; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push {r11, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, sp, #8 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: rsbmi r0, r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: str r1, [sp] -; CHECK-NEXT: bl callee -; CHECK-NEXT: add sp, sp, #8 -; CHECK-NEXT: pop {r11, lr} -; CHECK-NEXT: bx lr +; CHECK-ARM-LABEL: testcallframe: +; CHECK-ARM: @ %bb.0: @ %bb +; CHECK-ARM-NEXT: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: .pad #8 +; CHECK-ARM-NEXT: sub sp, sp, #8 +; CHECK-ARM-NEXT: cmp r0, #0 +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: rsbmi r0, r0, #0 +; CHECK-ARM-NEXT: mov r2, #0 +; CHECK-ARM-NEXT: mov r3, #0 +; CHECK-ARM-NEXT: str r1, [sp] +; CHECK-ARM-NEXT: bl callee +; CHECK-ARM-NEXT: add sp, sp, #8 +; CHECK-ARM-NEXT: pop {r11, lr} +; CHECK-ARM-NEXT: bx lr +; +; CHECK-THUMB-LABEL: testcallframe: +; CHECK-THUMB: @ %bb.0: @ %bb +; CHECK-THUMB-NEXT: .save {r7, lr} +; CHECK-THUMB-NEXT: push {r7, lr} +; CHECK-THUMB-NEXT: .pad #8 +; CHECK-THUMB-NEXT: sub sp, #8 +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: str r1, [sp] +; CHECK-THUMB-NEXT: asrs r2, r0, #31 +; CHECK-THUMB-NEXT: eors r0, r2 +; CHECK-THUMB-NEXT: subs r0, r0, r2 +; CHECK-THUMB-NEXT: mov r2, r1 +; CHECK-THUMB-NEXT: mov r3, r1 +; CHECK-THUMB-NEXT: bl callee +; CHECK-THUMB-NEXT: add sp, #8 +; CHECK-THUMB-NEXT: pop {r7, pc} bb: %i = tail call i32 @llvm.abs.i32(i32 %a, i1 false) tail call void @callee(i32 %i, i32 0, i32 0, i32 0, i32 0) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/Thumb/iabs.ll b/llvm/test/CodeGen/Thumb/iabs.ll index 6bebea67e265a..ffa72a927749d 100644 --- a/llvm/test/CodeGen/Thumb/iabs.ll +++ b/llvm/test/CodeGen/Thumb/iabs.ll @@ -4,8 +4,8 @@ define i8 @test_i8(i8 %a) nounwind { ; CHECK-LABEL: test_i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxtb r1, r0 -; CHECK-NEXT: asrs r1, r1, #7 +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: asrs r1, r0, #31 ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: bx lr @@ -18,8 +18,8 @@ define i8 @test_i8(i8 %a) nounwind { define i16 @test_i16(i16 %a) nounwind { ; CHECK-LABEL: test_i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxth r1, r0 -; CHECK-NEXT: asrs r1, r1, #15 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: asrs r1, r0, #31 ; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/abds-crash.ll b/llvm/test/CodeGen/Thumb2/abds-crash.ll index 52dda72dffa0e..2c872df178e97 100644 --- a/llvm/test/CodeGen/Thumb2/abds-crash.ll +++ b/llvm/test/CodeGen/Thumb2/abds-crash.ll @@ -9,13 +9,13 @@ define void @vp8_rd_pick_inter_mode() { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: ldr r1, [r4] -; CHECK-NEXT: movs r2, #180 -; CHECK-NEXT: str r0, [r2] -; CHECK-NEXT: movs r2, #188 +; CHECK-NEXT: ldr r2, [r4] +; CHECK-NEXT: movs r1, #180 +; CHECK-NEXT: str r0, [r1] +; CHECK-NEXT: movs r1, #188 ; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: str r1, [r2] -; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: str r2, [r1] +; CHECK-NEXT: sxth r1, r2 ; CHECK-NEXT: subs r0, r0, r1 ; CHECK-NEXT: it mi ; CHECK-NEXT: rsbmi r0, r0, #0 diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll index 88259ba758803..a0c278d1d1744 100644 --- a/llvm/test/CodeGen/Thumb2/abs.ll +++ b/llvm/test/CodeGen/Thumb2/abs.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECKT1 ; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2 -; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2 +; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2V8 declare i64 @llvm.abs.i64(i64, i1 immarg) @@ -24,6 +24,15 @@ define i64 @neg_abs64(i64 %x) { ; CHECKT2-NEXT: rsbs r0, r0, r1, asr #31 ; CHECKT2-NEXT: sbc.w r1, r3, r2 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: neg_abs64: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: eor.w r0, r0, r1, asr #31 +; CHECKT2V8-NEXT: eor.w r2, r1, r1, asr #31 +; CHECKT2V8-NEXT: asrs r3, r1, #31 +; CHECKT2V8-NEXT: rsbs r0, r0, r1, asr #31 +; CHECKT2V8-NEXT: sbc.w r1, r3, r2 +; CHECKT2V8-NEXT: bx lr %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) %neg = sub nsw i64 0, %abs ret i64 %neg @@ -41,9 +50,16 @@ define i32 @neg_abs32(i32 %x) { ; ; CHECKT2-LABEL: neg_abs32: ; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: eor.w r1, r0, r0, asr #31 -; CHECKT2-NEXT: rsb r0, r1, r0, asr #31 +; CHECKT2-NEXT: cmp r0, #0 +; CHECKT2-NEXT: it pl +; CHECKT2-NEXT: rsbpl r0, r0, #0 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: neg_abs32: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, pl +; CHECKT2V8-NEXT: bx lr %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %neg = sub nsw i32 0, %abs ret i32 %neg @@ -66,6 +82,13 @@ define i16 @neg_abs16(i16 %x) { ; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15 ; CHECKT2-NEXT: rsb r0, r0, r1, asr #15 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: neg_abs16: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: sxth r1, r0 +; CHECKT2V8-NEXT: eor.w r0, r0, r1, asr #15 +; CHECKT2V8-NEXT: rsb r0, r0, r1, asr #15 +; CHECKT2V8-NEXT: bx lr %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) %neg = sub nsw i16 0, %abs ret i16 %neg @@ -109,6 +132,21 @@ define i128 @neg_abs128(i128 %x) { ; CHECKT2-NEXT: sbcs.w r2, lr, r2 ; CHECKT2-NEXT: sbc.w r3, lr, r12 ; CHECKT2-NEXT: pop {r7, pc} +; +; CHECKT2V8-LABEL: neg_abs128: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: .save {r7, lr} +; CHECKT2V8-NEXT: push {r7, lr} +; CHECKT2V8-NEXT: eor.w r0, r0, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r1, r1, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r2, r2, r3, asr #31 +; CHECKT2V8-NEXT: asr.w lr, r3, #31 +; CHECKT2V8-NEXT: rsbs r0, r0, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r12, r3, r3, asr #31 +; CHECKT2V8-NEXT: sbcs.w r1, lr, r1 +; CHECKT2V8-NEXT: sbcs.w r2, lr, r2 +; CHECKT2V8-NEXT: sbc.w r3, lr, r12 +; CHECKT2V8-NEXT: pop {r7, pc} %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) %neg = sub nsw i128 0, %abs ret i128 %neg @@ -133,6 +171,14 @@ define i64 @abs64(i64 %x) { ; CHECKT2-NEXT: subs.w r0, r0, r1, asr #31 ; CHECKT2-NEXT: sbc.w r1, r2, r1, asr #31 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs64: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: eor.w r0, r0, r1, asr #31 +; CHECKT2V8-NEXT: eor.w r2, r1, r1, asr #31 +; CHECKT2V8-NEXT: subs.w r0, r0, r1, asr #31 +; CHECKT2V8-NEXT: sbc.w r1, r2, r1, asr #31 +; CHECKT2V8-NEXT: bx lr %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) ret i64 %abs } @@ -151,6 +197,12 @@ define i32 @abs32(i32 %x) { ; CHECKT2-NEXT: it mi ; CHECKT2-NEXT: rsbmi r0, r0, #0 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs32: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, mi +; CHECKT2V8-NEXT: bx lr %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs } @@ -158,18 +210,26 @@ define i32 @abs32(i32 %x) { define i16 @abs16(i16 %x) { ; CHECKT1-LABEL: abs16: ; CHECKT1: @ %bb.0: -; CHECKT1-NEXT: sxth r1, r0 -; CHECKT1-NEXT: asrs r1, r1, #15 +; CHECKT1-NEXT: sxth r0, r0 +; CHECKT1-NEXT: asrs r1, r0, #31 ; CHECKT1-NEXT: eors r0, r1 ; CHECKT1-NEXT: subs r0, r0, r1 ; CHECKT1-NEXT: bx lr ; ; CHECKT2-LABEL: abs16: ; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: sxth r1, r0 -; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15 -; CHECKT2-NEXT: sub.w r0, r0, r1, asr #15 +; CHECKT2-NEXT: sxth r0, r0 +; CHECKT2-NEXT: cmp r0, #0 +; CHECKT2-NEXT: it mi +; CHECKT2-NEXT: rsbmi r0, r0, #0 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs16: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: sxth r0, r0 +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, mi +; CHECKT2V8-NEXT: bx lr %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) ret i16 %abs } @@ -201,6 +261,18 @@ define i128 @abs128(i128 %x) { ; CHECKT2-NEXT: sbcs.w r2, r2, r3, asr #31 ; CHECKT2-NEXT: sbc.w r3, r12, r3, asr #31 ; CHECKT2-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs128: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: eor.w r0, r0, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r1, r1, r3, asr #31 +; CHECKT2V8-NEXT: subs.w r0, r0, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r2, r2, r3, asr #31 +; CHECKT2V8-NEXT: sbcs.w r1, r1, r3, asr #31 +; CHECKT2V8-NEXT: eor.w r12, r3, r3, asr #31 +; CHECKT2V8-NEXT: sbcs.w r2, r2, r3, asr #31 +; CHECKT2V8-NEXT: sbc.w r3, r12, r3, asr #31 +; CHECKT2V8-NEXT: bx lr %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) ret i128 %abs }