diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cd7f0e719ad0c..9dbcab630d5a5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::ABS, MVT::i64, Custom); } + setOperationAction(ISD::ABDS, MVT::i32, Custom); + setOperationAction(ISD::ABDS, MVT::i64, Custom); + setOperationAction(ISD::ABDU, MVT::i32, Custom); + setOperationAction(ISD::ABDU, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); for (MVT VT : MVT::fixedlen_vector_valuetypes()) { @@ -3712,7 +3717,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL, } static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, - const SDLoc &DL, SelectionDAG &DAG) { + const SDLoc &DL, SelectionDAG &DAG, + bool MIOrPLSupported = false) { EVT VT = LHS.getValueType(); const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); @@ -3755,6 +3761,33 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, } else if (LHS.getOpcode() == AArch64ISD::ANDS) { // Use result of ANDS return LHS.getValue(1); + } else if (MIOrPLSupported) { + // For MIOrPLSupported, optimize SUB/ADD operations with zero comparison + if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) { + // SUB(x, y) < 0 -> SUBS(x, y) + return DAG + .getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), + LHS.getOperand(0), LHS.getOperand(1)) + .getValue(1); + } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) { + // ADD(x, y) >= 0 -> ADDS(x, y) + return DAG + .getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT), + LHS.getOperand(0), LHS.getOperand(1)) + .getValue(1); + } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) { + // ADD(x, y) < 0 -> SUBS(x, y) + return DAG + .getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT), + LHS.getOperand(0), LHS.getOperand(1)) + .getValue(1); + } else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) { + // SUB(x, y) >= 0 -> ADDS(x, y) + return DAG + .getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), + LHS.getOperand(0), LHS.getOperand(1)) + .getValue(1); + } } } @@ -3819,7 +3852,8 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, - const SDLoc &DL, SelectionDAG &DAG) { + const SDLoc &DL, SelectionDAG &DAG, + bool MIOrPLSupported = false) { unsigned Opcode = 0; const bool FullFP16 = DAG.getSubtarget().hasFullFP16(); @@ -3846,6 +3880,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, // we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ? Opcode = AArch64ISD::CCMN; LHS = LHS.getOperand(1); + } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC) && + MIOrPLSupported) { + // For MIOrPLSupported, optimize SUB/ADD operations with zero comparison + if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) { + // SUB(x, y) < 0 -> CCMP(x, y) with appropriate condition + Opcode = AArch64ISD::CCMP; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) { + // ADD(x, y) >= 0 -> CCMP(x, y) with appropriate condition + Opcode = AArch64ISD::CCMN; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + } else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) { + // ADD(x, y) < 0 -> CCMP(x, -y) with appropriate condition + Opcode = AArch64ISD::CCMN; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + } else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) { + // SUB(x, y) >= 0 -> CCMP(-x, y) with appropriate condition + Opcode = AArch64ISD::CCMP; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + } } if (Opcode == 0) Opcode = AArch64ISD::CCMP; @@ -3972,7 +4030,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, return emitComparison(LHS, RHS, CC, DL, DAG); // Otherwise produce a ccmp. return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, - DAG); + DAG, true); } assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); @@ -4251,7 +4309,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } if (!Cmp) { - Cmp = emitComparison(LHS, RHS, CC, DL, DAG); + Cmp = emitComparison(LHS, RHS, CC, DL, DAG, true); AArch64CC = changeIntCCToAArch64CC(CC, RHS); } AArch64cc = getCondCode(DAG, AArch64CC); @@ -7371,13 +7429,100 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); SDLoc DL(Op); - SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT); - // Generate SUBS & CSEL. - SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), - Op.getOperand(0), DAG.getConstant(0, DL, VT)); + // Generate CMP & CSEL. + SDValue Cmp = emitComparison(Op.getOperand(0), DAG.getConstant(0, DL, VT), + ISD::SETGE, DL, DAG, true); + SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT); return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, - getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1)); + getCondCode(DAG, AArch64CC::PL), Cmp); +} + +// Generate SUBS and CNEG for absolute difference. +SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + + bool IsSigned = Op.getOpcode() == ISD::ABDS; + if (VT.isVector()) { + if (IsSigned) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED); + else + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); + } + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc DL(Op); + + if (!isa(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) { + SDValue TheLHS = isCMN(LHS, IsSigned ? ISD::SETGE : ISD::SETUGE, DAG) + ? LHS.getOperand(1) + : LHS; + SDValue TheRHS = isCMN(RHS, IsSigned ? ISD::SETGE : ISD::SETUGE, DAG) + ? RHS.getOperand(1) + : RHS; + if (getCmpOperandFoldingProfit(TheLHS) > + getCmpOperandFoldingProfit(TheRHS)) { + std::swap(LHS, RHS); + } + } + + // If the subtract doesn't overflow then just use abs(sub()) + bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS); + + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS)) + return DAG.getNode(ISD::ABS, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, LHS, RHS)); + + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS)) + return DAG.getNode(ISD::ABS, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, RHS, LHS)); + + unsigned Opcode = AArch64ISD::SUBS; + // Check if RHS is a subtraction against 0: (0 - X) + if (RHS.getOpcode() == ISD::SUB) { + SDValue SubLHS = RHS.getOperand(0); + SDValue SubRHS = RHS.getOperand(1); + + // Check if it's 0 - X + if (isNullConstant(SubLHS)) { + bool CanUseAdd = false; + if (IsSigned) { + // For UCMP: only if X is known to never be INT_MIN (to avoid overflow) + if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS) + .getSignedMinValue() + .isMinSignedValue()) { + CanUseAdd = true; + } + } else { + // For UCMP: only if X is known to never be zero + if (DAG.isKnownNeverZero(SubRHS)) { + CanUseAdd = true; + } + } + + if (CanUseAdd) { + Opcode = AArch64ISD::ADDS; + RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of + // LHS - (0 - X) + } + } + } + + // Generate SUBS and CSEL for absolute difference (like LowerABS) + // Compute a - b with flags + SDValue Cmp = DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS); + + // Compute b - a (negative of a - b) + SDValue Neg = DAG.getNegative(Cmp.getValue(0), DL, VT); + + // For unsigned: use HS (a >= b) to select a-b, otherwise b-a + // For signed: use GE (a >= b) to select a-b, otherwise b-a + AArch64CC::CondCode CC = IsSigned ? AArch64CC::GE : AArch64CC::HS; + + // CSEL: if a > b, select a-b, otherwise b-a + return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg, + getCondCode(DAG, CC), Cmp.getValue(1)); } static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { @@ -7832,9 +7977,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::ABS: return LowerABS(Op, DAG); case ISD::ABDS: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED); case ISD::ABDU: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED); + return LowerABD(Op, DAG); case ISD::AVGFLOORS: return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED); case ISD::AVGFLOORU: @@ -25815,29 +25959,6 @@ static SDValue performCSELCombine(SDNode *N, } } - // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't - // use overflow flags, to avoid the comparison with zero. In case of success, - // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). - // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB - // nodes with their SUBS equivalent as is already done for other flag-setting - // operators, in which case doing the replacement here becomes redundant. - if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && - isNullConstant(Cond.getOperand(1))) { - SDValue Sub = Cond.getOperand(0); - AArch64CC::CondCode CC = - static_cast(N->getConstantOperandVal(2)); - if (Sub.getOpcode() == ISD::SUB && - (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || - CC == AArch64CC::PL)) { - SDLoc DL(N); - SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), - Sub.getOperand(0), Sub.getOperand(1)); - DCI.CombineTo(Sub.getNode(), Subs); - DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); - return SDValue(N, 0); - } - } - // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index ff073d3eafb1f..4de8985a44ff8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -604,6 +604,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5a51c812732e6..6f469d73761ae 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1730,12 +1730,20 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::ADDSWrr: case AArch64::ADDSWri: + case AArch64::ADDSWrx: case AArch64::ADDSXrr: case AArch64::ADDSXri: + case AArch64::ADDSXrx: case AArch64::SUBSWrr: case AArch64::SUBSWri: + case AArch64::SUBSWrx: case AArch64::SUBSXrr: case AArch64::SUBSXri: + case AArch64::SUBSXrx: + case AArch64::ADCSWr: + case AArch64::ADCSXr: + case AArch64::SBCSWr: + case AArch64::SBCSXr: return Instr.getOpcode(); case AArch64::ADDWrr: @@ -1746,6 +1754,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ADDSXrr; case AArch64::ADDXri: return AArch64::ADDSXri; + case AArch64::ADDWrx: + return AArch64::ADDSWrx; + case AArch64::ADDXrx: + return AArch64::ADDSXrx; case AArch64::ADCWr: return AArch64::ADCSWr; case AArch64::ADCXr: @@ -1758,6 +1770,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::SUBSXrr; case AArch64::SUBXri: return AArch64::SUBSXri; + case AArch64::SUBWrx: + return AArch64::SUBSWrx; + case AArch64::SUBXrx: + return AArch64::SUBSXrx; case AArch64::SBCWr: return AArch64::SBCSWr; case AArch64::SBCXr: diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 37319642f5b34..11bfcb92593cb 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -72,9 +72,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: subs w8, w1, w0, sxth +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -105,7 +104,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -120,7 +119,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1, sxth -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -135,7 +134,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, gt +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -150,7 +149,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, gt +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -165,7 +164,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, gt +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -225,12 +224,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w1 -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lt -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) %max = call i8 @llvm.smax.i8(i8 %a, i8 %b) @@ -241,12 +237,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lt -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) %max = call i16 @llvm.smax.i16(i16 %a, i16 %b) @@ -257,10 +250,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, lt -; CHECK-NEXT: csel w9, w0, w1, gt -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -271,10 +262,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, lt -; CHECK-NEXT: csel x9, x0, x1, gt -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) @@ -445,7 +434,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) @@ -457,7 +446,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, pl +; CHECK-NEXT: cneg w0, w8, ge ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) @@ -469,7 +458,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, pl +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %sub = sub nsw i64 %a, %b %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) @@ -481,7 +470,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, pl +; CHECK-NEXT: cneg x0, x8, ge ; CHECK-NEXT: ret %sub = sub nsw i64 %a, %b %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index 1ef1c1c68c7bb..42618ac0cf68d 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -68,9 +68,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: subs w8, w1, w0, sxth +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %aext = sext i16 %a to i64 %bext = sext i32 %b to i64 @@ -99,7 +98,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -113,7 +112,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1, sxth -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i16 %b to i64 @@ -127,7 +126,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -141,7 +140,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -155,7 +154,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %aext = sext i64 %a to i128 %bext = sext i64 %b to i128 @@ -235,7 +234,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %min = call i32 @llvm.smin.i32(i32 %a, i32 %b) %max = call i32 @llvm.smax.i32(i32 %a, i32 %b) @@ -247,7 +246,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %min = call i64 @llvm.smin.i64(i64 %a, i64 %b) %max = call i64 @llvm.smax.i64(i64 %a, i64 %b) @@ -307,7 +306,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %cmp = icmp slt i32 %a, %b %ab = sub i32 %a, %b @@ -320,7 +319,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = sub i64 %a, %b @@ -406,7 +405,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b %abs = call i32 @llvm.abs.i32(i32 %sub, i1 false) @@ -417,7 +416,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b %abs = call i32 @llvm.abs.i32(i32 %sub, i1 true) @@ -428,7 +427,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, mi +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %sub = sub nsw i64 %a, %b %abs = call i64 @llvm.abs.i64(i64 %sub, i1 false) @@ -439,7 +438,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_subnsw_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, mi +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %sub = sub nsw i64 %a, %b %abs = call i64 @llvm.abs.i64(i64 %sub, i1 true) @@ -546,7 +545,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, le +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret %cmp = icmp sgt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -559,7 +558,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, le +; CHECK-NEXT: cneg x0, x8, lt ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 79fc12ea76f63..8da576121d881 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -72,9 +72,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: subs w8, w1, w0, uxth +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -105,7 +104,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -120,7 +119,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1, uxth -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -135,7 +134,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, hi +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -150,7 +149,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hi +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -165,7 +164,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hi +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -229,12 +228,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lo -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) %max = call i8 @llvm.umax.i8(i8 %a, i8 %b) @@ -245,12 +241,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w10, w9, w8, lo -; CHECK-NEXT: csel w8, w9, w8, hi -; CHECK-NEXT: sub w0, w10, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) %max = call i16 @llvm.umax.i16(i16 %a, i16 %b) @@ -261,10 +254,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, lo -; CHECK-NEXT: csel w9, w0, w1, hi -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: cneg w0, w8, hs ; CHECK-NEXT: ret %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -275,10 +266,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, lo -; CHECK-NEXT: csel x9, x0, x1, hi -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: subs x8, x0, x1 +; CHECK-NEXT: cneg x0, x8, hs ; CHECK-NEXT: ret %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 6db7693fb3a1c..e7e38b6cd5d99 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -68,9 +68,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: subs w8, w1, w0, uxth +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i16 %a to i64 %bext = zext i32 %b to i64 @@ -99,7 +98,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -113,7 +112,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1, uxth -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i16 %b to i64 @@ -127,7 +126,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_ext_i32_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -141,7 +140,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -155,7 +154,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_ext_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %aext = zext i64 %a to i128 %bext = zext i64 %b to i128 @@ -239,7 +238,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_minmax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %min = call i32 @llvm.umin.i32(i32 %a, i32 %b) %max = call i32 @llvm.umax.i32(i32 %a, i32 %b) @@ -251,7 +250,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_minmax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %min = call i64 @llvm.umin.i64(i64 %a, i64 %b) %max = call i64 @llvm.umax.i64(i64 %a, i64 %b) @@ -313,7 +312,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ult i32 %a, %b %ab = sub i32 %a, %b @@ -326,7 +325,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = sub i64 %a, %b @@ -411,7 +410,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ls +; CHECK-NEXT: cneg w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ugt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -424,7 +423,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ls +; CHECK-NEXT: cneg x0, x8, lo ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 0f56d25a47b2a..92d8ba242e0de 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -388,3 +388,33 @@ entry: ret <3 x i32> %res } declare <3 x i32> @llvm.abs.v3i32(<3 x i32>, i1) + +define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) { +; CHECK-LABEL: combine_subs_multiple_sub_uses: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w0, w1 +; CHECK-NEXT: csel w9, w0, w1, ne +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %add = add i32 %sel, %sub + ret i32 %add +} + +define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: do_not_combine_subs_multiple_flag_uses: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: csel w8, w0, w1, ne +; CHECK-NEXT: csel w9, w2, w3, ne +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret + %sub = sub i32 %a, %b + %cc = icmp ne i32 %sub, 0 + %sel = select i1 %cc, i32 %a, i32 %b + %other = select i1 %cc, i32 %c, i32 %d + %add = add i32 %sel, %other + ret i32 %add +} diff --git a/llvm/test/CodeGen/AArch64/alias_mask.ll b/llvm/test/CodeGen/AArch64/alias_mask.ll index 9b9c020016bab..48c66ad5bac1c 100644 --- a/llvm/test/CodeGen/AArch64/alias_mask.ll +++ b/llvm/test/CodeGen/AArch64/alias_mask.ll @@ -393,70 +393,70 @@ entry: define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_32_expand3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x9, x1, x0 +; CHECK-NEXT: subs x10, x1, x0 ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: add x10, x9, #3 -; CHECK-NEXT: sub x11, x9, #61 -; CHECK-NEXT: csel x10, x10, x9, mi -; CHECK-NEXT: subs x9, x9, #64 -; CHECK-NEXT: csel x9, x11, x9, mi -; CHECK-NEXT: asr x10, x10, #2 -; CHECK-NEXT: asr x9, x9, #2 -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: add x9, x10, #3 +; CHECK-NEXT: sub x12, x10, #61 +; CHECK-NEXT: csel x9, x9, x10, mi +; CHECK-NEXT: asr x11, x9, #2 ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: cmp x11, #1 +; CHECK-NEXT: dup v1.2d, x11 ; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: cset w9, lt +; CHECK-NEXT: subs x10, x10, #64 ; CHECK-NEXT: mov z6.d, z0.d -; CHECK-NEXT: dup v7.2d, x10 -; CHECK-NEXT: dup v16.2d, x9 -; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc -; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa -; CHECK-NEXT: cmp x9, #1 -; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8 -; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6 -; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4 -; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2 -; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d +; CHECK-NEXT: csel x10, x12, x10, mi +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc +; CHECK-NEXT: asr x10, x10, #2 +; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa +; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8 +; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6 +; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4 +; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d +; CHECK-NEXT: dup v16.2d, x10 +; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2 +; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d +; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d +; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d +; CHECK-NEXT: cmp x10, #1 +; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d ; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe -; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d -; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d -; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d -; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d -; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d -; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d -; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d ; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d ; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d ; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d -; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d ; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d -; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d -; CHECK-NEXT: cset w9, lt -; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d -; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s -; CHECK-NEXT: cmp x10, #1 -; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s -; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s -; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h -; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h +; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d +; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d +; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d +; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d +; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d +; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d +; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s +; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s +; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s +; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s +; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s +; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s +; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s +; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s ; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b +; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b ; CHECK-NEXT: dup v2.16b, w10 -; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b ; CHECK-NEXT: dup v3.16b, w9 ; CHECK-NEXT: adrp x9, .LCPI14_0 -; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b ; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0] ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0] ; CHECK-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 @@ -469,8 +469,8 @@ define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) { ; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b ; CHECK-NEXT: addv h1, v1.8h ; CHECK-NEXT: addv h0, v0.8h -; CHECK-NEXT: str h1, [x8] -; CHECK-NEXT: str h0, [x8, #2] +; CHECK-NEXT: str h1, [x8, #2] +; CHECK-NEXT: str h0, [x8] ; CHECK-NEXT: ret entry: %0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 4) @@ -586,70 +586,70 @@ entry: define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_64_expand4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x9, x1, x0 +; CHECK-NEXT: subs x10, x1, x0 ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: add x10, x9, #7 -; CHECK-NEXT: sub x11, x9, #121 -; CHECK-NEXT: csel x10, x10, x9, mi -; CHECK-NEXT: subs x9, x9, #128 -; CHECK-NEXT: csel x9, x11, x9, mi -; CHECK-NEXT: asr x10, x10, #3 -; CHECK-NEXT: asr x9, x9, #3 -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: add x9, x10, #7 +; CHECK-NEXT: sub x12, x10, #121 +; CHECK-NEXT: csel x9, x9, x10, mi +; CHECK-NEXT: asr x11, x9, #3 ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: cmp x11, #1 +; CHECK-NEXT: dup v1.2d, x11 ; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: cset w9, lt +; CHECK-NEXT: subs x10, x10, #128 ; CHECK-NEXT: mov z6.d, z0.d -; CHECK-NEXT: dup v7.2d, x10 -; CHECK-NEXT: dup v16.2d, x9 -; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc -; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa -; CHECK-NEXT: cmp x9, #1 -; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8 -; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6 -; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4 -; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2 -; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d +; CHECK-NEXT: csel x10, x12, x10, mi +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc +; CHECK-NEXT: asr x10, x10, #3 +; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa +; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8 +; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6 +; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4 +; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d +; CHECK-NEXT: dup v16.2d, x10 +; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2 +; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d +; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d +; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d +; CHECK-NEXT: cmp x10, #1 +; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d +; CHECK-NEXT: cset w10, lt ; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d ; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe -; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d -; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d -; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d -; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d -; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d -; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d -; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d ; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d ; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d ; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d -; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d ; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d -; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d -; CHECK-NEXT: cset w9, lt -; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d -; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s -; CHECK-NEXT: cmp x10, #1 -; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s -; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s -; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h -; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h +; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d +; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d +; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d +; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d +; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d +; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d +; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s +; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s +; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s +; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s +; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s +; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s +; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s +; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s ; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b +; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b ; CHECK-NEXT: dup v2.16b, w10 -; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b ; CHECK-NEXT: dup v3.16b, w9 ; CHECK-NEXT: adrp x9, .LCPI18_0 -; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b ; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0] ; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0] ; CHECK-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 @@ -662,8 +662,8 @@ define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) { ; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b ; CHECK-NEXT: addv h1, v1.8h ; CHECK-NEXT: addv h0, v0.8h -; CHECK-NEXT: str h1, [x8] -; CHECK-NEXT: str h0, [x8, #2] +; CHECK-NEXT: str h1, [x8, #2] +; CHECK-NEXT: str h0, [x8] ; CHECK-NEXT: ret entry: %0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 8) @@ -790,11 +790,10 @@ entry: define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_8_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1) @@ -804,11 +803,10 @@ entry: define <1 x i1> @whilewr_16_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_16_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 2) @@ -818,11 +816,10 @@ entry: define <1 x i1> @whilewr_32_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_32_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #3 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 4) @@ -832,11 +829,10 @@ entry: define <1 x i1> @whilewr_64_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_64_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #7 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 8) @@ -846,11 +842,10 @@ entry: define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_8_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1) @@ -860,11 +855,10 @@ entry: define <1 x i1> @whilerw_16_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_16_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 2) @@ -874,11 +868,10 @@ entry: define <1 x i1> @whilerw_32_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_32_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #3 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 4) @@ -888,11 +881,10 @@ entry: define <1 x i1> @whilerw_64_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_64_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #7 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 8) diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll index 69fad57a683ac..584e61b734527 100644 --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -65,7 +65,7 @@ define i32@foo5(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: foo5: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, mi +; CHECK-NEXT: cneg w0, w8, lt ; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b @@ -98,7 +98,7 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: foo7: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w9, w8, mi +; CHECK-NEXT: cneg w9, w8, lt ; CHECK-NEXT: cmn w8, #1 ; CHECK-NEXT: csel w8, w9, w0, lt ; CHECK-NEXT: csel w0, w8, w9, gt diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll deleted file mode 100644 index 5036be9c45e69..0000000000000 --- a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll +++ /dev/null @@ -1,112 +0,0 @@ -; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s - -; REQUIRES: asserts - -; These tests ensure that we don't combine -; CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) -; if the flags set by SUBS(SUB(x,y), 0) have more than one use. -; -; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is -; only valid if there are no users of the overflow flags (C/V) generated by the -; SUBS. Currently, we only check the flags used by the CSEL, and therefore we -; conservatively reject cases where the SUBS's flags have other uses. - -target triple = "aarch64-unknown-linux-gnu" - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:' -; CHECK-NEXT: SelectionDAG has 13 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t5: i32 = sub t2, t4 -; CHECK-NEXT: t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> -; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1 -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 -; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:' -; CHECK-NEXT: SelectionDAG has 11 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t18: i32,i32 = AArch64ISD::SUBS t2, t4 -; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1 -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 -; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 - -define i32 @combine_subs(i32 %a, i32 %b) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - ret i32 %sel -} - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' -; CHECK-NEXT: SelectionDAG has 14 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t5: i32 = sub t2, t4 -; CHECK-NEXT: t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> -; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1 -; CHECK-NEXT: t10: i32 = add t17, t5 -; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 -; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' -; CHECK-NEXT: SelectionDAG has 12 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1 -; CHECK-NEXT: t10: i32 = add t17, t19 -; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 -; CHECK-NEXT: t19: i32,i32 = AArch64ISD::SUBS t2, t4 -; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 - -define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - %add = add i32 %sel, %sub - ret i32 %add -} - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' -; CHECK-NEXT: SelectionDAG has 19 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 -; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 -; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 -; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 -; CHECK-NEXT: t15: i32 = add t24, t23 -; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 -; CHECK-NEXT: t9: i32 = sub t2, t4 -; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> -; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' -; CHECK-NEXT: SelectionDAG has 19 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 -; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 -; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 -; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 -; CHECK-NEXT: t15: i32 = add t24, t23 -; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 -; CHECK-NEXT: t9: i32 = sub t2, t4 -; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> -; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 - -define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - %other = select i1 %cc, i32 %c, i32 %d - %add = add i32 %sel, %other - ret i32 %add -} diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 79bba5363188b..02bbca72b5a97 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -15,7 +15,7 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cneg w9, w9, lt ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -36,7 +36,7 @@ define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs w9, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cneg w9, w9, ls +; CHECK-NEXT: cneg w9, w9, lo ; CHECK-NEXT: cneg w8, w8, ls ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -60,7 +60,7 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w10, w9, w1 -; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cneg w10, w10, lt ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 @@ -83,7 +83,7 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w9, w0, w9 -; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cneg w9, w9, lt ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 @@ -107,7 +107,7 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: subs w10, w9, w10 -; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cneg w10, w10, lt ; CHECK-NEXT: cneg w8, w8, le ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 @@ -136,7 +136,7 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: cneg x9, x9, lt ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -157,7 +157,7 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: subs x9, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cneg x9, x9, ls +; CHECK-NEXT: cneg x9, x9, lo ; CHECK-NEXT: cneg x8, x8, ls ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -181,7 +181,7 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x10, x9, x1 -; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: cneg x10, x10, lt ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 @@ -204,7 +204,7 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x9, x0, x9 -; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: cneg x9, x9, lt ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 @@ -228,7 +228,7 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x10, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: subs x10, x9, x10 -; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: cneg x10, x10, lt ; CHECK-NEXT: cneg x8, x8, le ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 diff --git a/llvm/test/CodeGen/AArch64/pr72777.ll b/llvm/test/CodeGen/AArch64/pr72777.ll index fa9f82f8c93c2..c8966bf9ce68b 100644 --- a/llvm/test/CodeGen/AArch64/pr72777.ll +++ b/llvm/test/CodeGen/AArch64/pr72777.ll @@ -5,9 +5,9 @@ define i64 @f(i64 %0, i64 %1) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: ; CHECK-NEXT: orr x8, x1, #0x1 -; CHECK-NEXT: add x9, x0, x0 ; CHECK-NEXT: mov x10, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: add x8, x8, x0 +; CHECK-NEXT: adds x8, x8, x0 ; CHECK-NEXT: lsl x9, x8, #1 ; CHECK-NEXT: cinv x10, x10, pl ; CHECK-NEXT: cmp x8, x9, asr #1