diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9a247bb5a83d9..352afe7a0b145 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18371,9 +18371,75 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { return SDValue(); } +static SDValue foldCMOVOfCMOV(SDNode *Op, SelectionDAG &DAG) { + // (cmov A, B, CC1, (cmov C, D, CC2, Flags)) + // -> (cmov (cmov A, B, CC1, C), (cmov A, B, CC1, D), CC2, Flags) + SDValue L = Op->getOperand(0); + SDValue R = Op->getOperand(1); + ARMCC::CondCodes OpCC = + static_cast(Op->getConstantOperandVal(2)); + + SDValue OpCmp = Op->getOperand(3); + if (OpCmp.getOpcode() != ARMISD::CMPZ && OpCmp.getOpcode() != ARMISD::CMP) + // Only looking at EQ and NE cases. + return SDValue(); + + SDValue CmpLHS = OpCmp.getOperand(0); + SDValue CmpRHS = OpCmp.getOperand(1); + + if (CmpRHS.getOpcode() == ARMISD::CMOV) + std::swap(CmpLHS, CmpRHS); + else if (CmpLHS.getOpcode() != ARMISD::CMOV) + return SDValue(); + + SDValue X = CmpLHS->getOperand(0); + SDValue Y = CmpLHS->getOperand(1); + if (!isa(X) || !isa(Y) || X == Y) + return SDValue(); + + ConstantSDNode *CX = cast(X); + ConstantSDNode *CY = cast(Y); + if (CX->getAPIntValue() == CY->getAPIntValue()) + return SDValue(); + + ARMCC::CondCodes CC = + static_cast(CmpLHS->getConstantOperandVal(2)); + SDValue CondFlags = CmpLHS->getOperand(3); + + if (CmpRHS == Y) { + // If the compare uses the second constant, flip the condition. + // VERIFY: getOppositeCondition does the same flip as AArch64's + // getInvertedCondCode. + CC = ARMCC::getOppositeCondition(CC); + } else if (CmpRHS != X) { + return SDValue(); + } + + if (OpCC == ARMCC::NE) { + // Outer NE inverts the sense. + CC = ARMCC::getOppositeCondition(CC); + } else if (OpCC != ARMCC::EQ) { + return SDValue(); + } + + SDLoc DL(Op); + EVT VT = Op->getValueType(0); + // CMOV takes (falseVal, trueVal, CC, Flags). To match (CSEL L,R,CC), pass + // (R,L). + SDValue CCValue = DAG.getConstant(CC, DL, FlagsVT); + return DAG.getNode(ARMISD::CMOV, DL, VT, R, L, CCValue, CondFlags); +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { + // CMOV x, x, cc -> x + if (N->getOperand(0) == N->getOperand(1)) + return N->getOperand(0); + + if (SDValue R = foldCMOVOfCMOV(N, DAG)) + return R; + SDValue Cmp = N->getOperand(3); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. @@ -19951,14 +20017,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } break; case ARMISD::CMOV: { - // Bits are known zero/one if known on the LHS and RHS. - Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1); - if (Known.isUnknown()) - return; - - KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1); - Known = Known.intersectWith(KnownRHS); - return; + KnownBits Known2; + Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); + Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); + Known = Known.intersectWith(Known2); + break; } case ISD::INTRINSIC_W_CHAIN: { Intrinsic::ID IntID = diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll index 5ebb115791c66..dbda2c78d8111 100644 --- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll +++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll @@ -20,19 +20,17 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: vmov r4, r5, d17 ; CHECK-NEXT: subs.w r3, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r2 +; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: it lo ; CHECK-NEXT: movlo r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: subs r3, r4, r6 -; CHECK-NEXT: sbcs.w r3, r5, r7 ; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r2, r3 +; CHECK-NEXT: subs r6, r4, r6 +; CHECK-NEXT: sbcs.w r7, r5, r7 +; CHECK-NEXT: itt lo ; CHECK-NEXT: movlo r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: movlo r1, r3 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} @@ -59,19 +57,17 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: vmov r6, r7, d17 ; CHECK-NEXT: subs.w r3, lr, r3 ; CHECK-NEXT: sbcs.w r2, r12, r2 +; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mov.w r2, #0 ; CHECK-NEXT: it lo ; CHECK-NEXT: movlo r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: subs r3, r4, r6 -; CHECK-NEXT: sbcs.w r3, r5, r7 ; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r2, r3 +; CHECK-NEXT: subs r6, r4, r6 +; CHECK-NEXT: sbcs.w r7, r5, r7 +; CHECK-NEXT: itt lo ; CHECK-NEXT: movlo r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: movlo r1, r3 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll index f633315822cc3..72ea6994be33b 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -8339,12 +8339,12 @@ define i64 @test_max_i64() { ; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: rsbs r0, r2, #1 ; CHECK-ARM8-NEXT: rscs r0, r1, #0 -; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: movwlt r0, #1 +; CHECK-ARM8-NEXT: mov r3, #0 +; CHECK-ARM8-NEXT: movwlt r3, #1 +; CHECK-ARM8-NEXT: mov r0, r1 +; CHECK-ARM8-NEXT: movge r0, r3 ; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: movlt r10, r2 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM8-NEXT: mov r11, r0 ; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 @@ -8406,12 +8406,12 @@ define i64 @test_max_i64() { ; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: rsbs r0, r2, #1 ; CHECK-ARM6-NEXT: rscs r0, r1, #0 -; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: movlt r0, #1 +; CHECK-ARM6-NEXT: mov r3, #0 +; CHECK-ARM6-NEXT: movlt r3, #1 +; CHECK-ARM6-NEXT: mov r0, r1 +; CHECK-ARM6-NEXT: movge r0, r3 ; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: movlt r10, r2 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM6-NEXT: mov r11, r0 ; CHECK-ARM6-NEXT: ldr r6, .LCPI40_0 @@ -8474,18 +8474,18 @@ define i64 @test_max_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK-THUMB7-NEXT: rsbs.w r0, r2, #1 -; CHECK-THUMB7-NEXT: mov.w r0, #0 -; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1 +; CHECK-THUMB7-NEXT: mov.w r3, #0 +; CHECK-THUMB7-NEXT: sbcs.w r0, r3, r1 ; CHECK-THUMB7-NEXT: it lt -; CHECK-THUMB7-NEXT: movlt r0, #1 +; CHECK-THUMB7-NEXT: movlt r3, #1 ; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: mov r0, r1 +; CHECK-THUMB7-NEXT: it ge +; CHECK-THUMB7-NEXT: movge r0, r3 ; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r10, r2 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r0, r1 ; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-THUMB7-NEXT: mov r11, r0 ; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 @@ -8545,85 +8545,82 @@ define i64 @test_max_i64() { ; CHECK-THUMB8BASE: @ %bb.0: @ %entry ; CHECK-THUMB8BASE-NEXT: .save {r4, lr} ; CHECK-THUMB8BASE-NEXT: push {r4, lr} -; CHECK-THUMB8BASE-NEXT: .pad #72 -; CHECK-THUMB8BASE-NEXT: sub sp, #72 +; CHECK-THUMB8BASE-NEXT: .pad #64 +; CHECK-THUMB8BASE-NEXT: sub sp, #64 ; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4] ; CHECK-THUMB8BASE-NEXT: ldr r1, [r1] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: b .LBB40_1 ; CHECK-THUMB8BASE-NEXT: .LBB40_1: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #60] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #36] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r1, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r0, #1 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #40] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: subs r3, r0, r3 ; CHECK-THUMB8BASE-NEXT: sbcs r1, r2 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: blt .LBB40_3 ; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB40_3: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: blt .LBB40_5 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: bge .LBB40_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB40_5: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB40_7 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: blt .LBB40_7 ; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB40_7: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68] +; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60] ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4] ; CHECK-THUMB8BASE-NEXT: str r0, [sp] ; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-THUMB8BASE-NEXT: add r1, sp, #64 +; CHECK-THUMB8BASE-NEXT: add r1, sp, #56 ; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8 ; CHECK-THUMB8BASE-NEXT: mov r2, r0 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68] -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: cmp r2, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: beq .LBB40_1 ; CHECK-THUMB8BASE-NEXT: b .LBB40_8 ; CHECK-THUMB8BASE-NEXT: .LBB40_8: @ %atomicrmw.end -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: add sp, #72 +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: add sp, #64 ; CHECK-THUMB8BASE-NEXT: pop {r4, pc} entry: %0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic @@ -8652,12 +8649,12 @@ define i64 @test_min_i64() { ; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: subs r0, r2, #2 ; CHECK-ARM8-NEXT: sbcs r0, r1, #0 -; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: movwlt r0, #1 +; CHECK-ARM8-NEXT: mov r3, #0 +; CHECK-ARM8-NEXT: movwlt r3, #1 +; CHECK-ARM8-NEXT: mov r0, r1 +; CHECK-ARM8-NEXT: movge r0, r3 ; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: movlt r10, r2 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM8-NEXT: mov r11, r0 ; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 @@ -8719,12 +8716,12 @@ define i64 @test_min_i64() { ; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: subs r0, r2, #2 ; CHECK-ARM6-NEXT: sbcs r0, r1, #0 -; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: movlt r0, #1 +; CHECK-ARM6-NEXT: mov r3, #0 +; CHECK-ARM6-NEXT: movlt r3, #1 +; CHECK-ARM6-NEXT: mov r0, r1 +; CHECK-ARM6-NEXT: movge r0, r3 ; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: movlt r10, r2 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM6-NEXT: mov r11, r0 ; CHECK-ARM6-NEXT: ldr r6, .LCPI41_0 @@ -8790,15 +8787,14 @@ define i64 @test_min_i64() { ; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: subs r0, r2, #2 ; CHECK-THUMB7-NEXT: sbcs r0, r1, #0 -; CHECK-THUMB7-NEXT: mov.w r0, #0 -; CHECK-THUMB7-NEXT: it lt -; CHECK-THUMB7-NEXT: movlt r0, #1 +; CHECK-THUMB7-NEXT: mov.w r3, #0 +; CHECK-THUMB7-NEXT: mov r0, r1 +; CHECK-THUMB7-NEXT: ite lt +; CHECK-THUMB7-NEXT: movlt r3, #1 +; CHECK-THUMB7-NEXT: movge r0, r3 ; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r10, r2 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r0, r1 ; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-THUMB7-NEXT: mov r11, r0 ; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 @@ -8858,85 +8854,82 @@ define i64 @test_min_i64() { ; CHECK-THUMB8BASE: @ %bb.0: @ %entry ; CHECK-THUMB8BASE-NEXT: .save {r4, lr} ; CHECK-THUMB8BASE-NEXT: push {r4, lr} -; CHECK-THUMB8BASE-NEXT: .pad #72 -; CHECK-THUMB8BASE-NEXT: sub sp, #72 +; CHECK-THUMB8BASE-NEXT: .pad #64 +; CHECK-THUMB8BASE-NEXT: sub sp, #64 ; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4] ; CHECK-THUMB8BASE-NEXT: ldr r1, [r1] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: b .LBB41_1 ; CHECK-THUMB8BASE-NEXT: .LBB41_1: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r0, #1 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #36] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r2, #0 -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r2, [sp, #40] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: subs r3, r3, #2 ; CHECK-THUMB8BASE-NEXT: sbcs r1, r2 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: blt .LBB41_3 ; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB41_3: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: blt .LBB41_5 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: bge .LBB41_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB41_5: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB41_7 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: blt .LBB41_7 ; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB41_7: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68] +; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60] ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4] ; CHECK-THUMB8BASE-NEXT: str r0, [sp] ; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-THUMB8BASE-NEXT: add r1, sp, #64 +; CHECK-THUMB8BASE-NEXT: add r1, sp, #56 ; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8 ; CHECK-THUMB8BASE-NEXT: mov r2, r0 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68] -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: cmp r2, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: beq .LBB41_1 ; CHECK-THUMB8BASE-NEXT: b .LBB41_8 ; CHECK-THUMB8BASE-NEXT: .LBB41_8: @ %atomicrmw.end -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: add sp, #72 +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: add sp, #64 ; CHECK-THUMB8BASE-NEXT: pop {r4, pc} entry: %0 = atomicrmw min ptr @atomic_i64, i64 1 monotonic @@ -8965,12 +8958,12 @@ define i64 @test_umax_i64() { ; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: rsbs r0, r2, #1 ; CHECK-ARM8-NEXT: rscs r0, r1, #0 -; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: movwlo r0, #1 +; CHECK-ARM8-NEXT: mov r3, #0 +; CHECK-ARM8-NEXT: movwlo r3, #1 +; CHECK-ARM8-NEXT: mov r0, r1 +; CHECK-ARM8-NEXT: movhs r0, r3 ; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: movlo r10, r2 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM8-NEXT: mov r11, r0 ; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 @@ -9032,12 +9025,12 @@ define i64 @test_umax_i64() { ; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: rsbs r0, r2, #1 ; CHECK-ARM6-NEXT: rscs r0, r1, #0 -; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: movlo r0, #1 +; CHECK-ARM6-NEXT: mov r3, #0 +; CHECK-ARM6-NEXT: movlo r3, #1 +; CHECK-ARM6-NEXT: mov r0, r1 +; CHECK-ARM6-NEXT: movhs r0, r3 ; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: movlo r10, r2 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM6-NEXT: mov r11, r0 ; CHECK-ARM6-NEXT: ldr r6, .LCPI42_0 @@ -9100,18 +9093,18 @@ define i64 @test_umax_i64() { ; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK-THUMB7-NEXT: rsbs.w r0, r2, #1 -; CHECK-THUMB7-NEXT: mov.w r0, #0 -; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1 +; CHECK-THUMB7-NEXT: mov.w r3, #0 +; CHECK-THUMB7-NEXT: sbcs.w r0, r3, r1 ; CHECK-THUMB7-NEXT: it lo -; CHECK-THUMB7-NEXT: movlo r0, #1 +; CHECK-THUMB7-NEXT: movlo r3, #1 ; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 +; CHECK-THUMB7-NEXT: mov r0, r1 +; CHECK-THUMB7-NEXT: it hs +; CHECK-THUMB7-NEXT: movhs r0, r3 ; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r10, r2 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r0, r1 ; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-THUMB7-NEXT: mov r11, r0 ; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 @@ -9171,85 +9164,82 @@ define i64 @test_umax_i64() { ; CHECK-THUMB8BASE: @ %bb.0: @ %entry ; CHECK-THUMB8BASE-NEXT: .save {r4, lr} ; CHECK-THUMB8BASE-NEXT: push {r4, lr} -; CHECK-THUMB8BASE-NEXT: .pad #72 -; CHECK-THUMB8BASE-NEXT: sub sp, #72 +; CHECK-THUMB8BASE-NEXT: .pad #64 +; CHECK-THUMB8BASE-NEXT: sub sp, #64 ; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4] ; CHECK-THUMB8BASE-NEXT: ldr r1, [r1] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: b .LBB42_1 ; CHECK-THUMB8BASE-NEXT: .LBB42_1: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #60] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #36] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r1, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r0, #1 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #40] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: subs r3, r0, r3 ; CHECK-THUMB8BASE-NEXT: sbcs r1, r2 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: blo .LBB42_3 ; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB42_3: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: blo .LBB42_5 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: bhs .LBB42_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB42_5: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB42_7 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: blo .LBB42_7 ; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB42_7: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68] +; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60] ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4] ; CHECK-THUMB8BASE-NEXT: str r0, [sp] ; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-THUMB8BASE-NEXT: add r1, sp, #64 +; CHECK-THUMB8BASE-NEXT: add r1, sp, #56 ; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8 ; CHECK-THUMB8BASE-NEXT: mov r2, r0 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68] -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: cmp r2, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: beq .LBB42_1 ; CHECK-THUMB8BASE-NEXT: b .LBB42_8 ; CHECK-THUMB8BASE-NEXT: .LBB42_8: @ %atomicrmw.end -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: add sp, #72 +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: add sp, #64 ; CHECK-THUMB8BASE-NEXT: pop {r4, pc} entry: %0 = atomicrmw umax ptr @atomic_i64, i64 1 monotonic @@ -9278,12 +9268,12 @@ define i64 @test_umin_i64() { ; CHECK-ARM8-NEXT: mov r9, r1 ; CHECK-ARM8-NEXT: subs r0, r2, #2 ; CHECK-ARM8-NEXT: sbcs r0, r1, #0 -; CHECK-ARM8-NEXT: mov r0, #0 -; CHECK-ARM8-NEXT: movwlo r0, #1 +; CHECK-ARM8-NEXT: mov r3, #0 +; CHECK-ARM8-NEXT: movwlo r3, #1 +; CHECK-ARM8-NEXT: mov r0, r1 +; CHECK-ARM8-NEXT: movhs r0, r3 ; CHECK-ARM8-NEXT: mov r10, #1 ; CHECK-ARM8-NEXT: movlo r10, r2 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM8-NEXT: mov r11, r0 ; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64 @@ -9345,12 +9335,12 @@ define i64 @test_umin_i64() { ; CHECK-ARM6-NEXT: mov r9, r1 ; CHECK-ARM6-NEXT: subs r0, r2, #2 ; CHECK-ARM6-NEXT: sbcs r0, r1, #0 -; CHECK-ARM6-NEXT: mov r0, #0 -; CHECK-ARM6-NEXT: movlo r0, #1 +; CHECK-ARM6-NEXT: mov r3, #0 +; CHECK-ARM6-NEXT: movlo r3, #1 +; CHECK-ARM6-NEXT: mov r0, r1 +; CHECK-ARM6-NEXT: movhs r0, r3 ; CHECK-ARM6-NEXT: mov r10, #1 ; CHECK-ARM6-NEXT: movlo r10, r2 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-ARM6-NEXT: mov r11, r0 ; CHECK-ARM6-NEXT: ldr r6, .LCPI43_0 @@ -9416,15 +9406,14 @@ define i64 @test_umin_i64() { ; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: subs r0, r2, #2 ; CHECK-THUMB7-NEXT: sbcs r0, r1, #0 -; CHECK-THUMB7-NEXT: mov.w r0, #0 -; CHECK-THUMB7-NEXT: it lo -; CHECK-THUMB7-NEXT: movlo r0, #1 +; CHECK-THUMB7-NEXT: mov.w r3, #0 +; CHECK-THUMB7-NEXT: mov r0, r1 +; CHECK-THUMB7-NEXT: ite lo +; CHECK-THUMB7-NEXT: movlo r3, #1 +; CHECK-THUMB7-NEXT: movhs r0, r3 ; CHECK-THUMB7-NEXT: mov.w r10, #1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r10, r2 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r0, r1 ; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 ; CHECK-THUMB7-NEXT: mov r11, r0 ; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64 @@ -9484,85 +9473,82 @@ define i64 @test_umin_i64() { ; CHECK-THUMB8BASE: @ %bb.0: @ %entry ; CHECK-THUMB8BASE-NEXT: .save {r4, lr} ; CHECK-THUMB8BASE-NEXT: push {r4, lr} -; CHECK-THUMB8BASE-NEXT: .pad #72 -; CHECK-THUMB8BASE-NEXT: sub sp, #72 +; CHECK-THUMB8BASE-NEXT: .pad #64 +; CHECK-THUMB8BASE-NEXT: sub sp, #64 ; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4] ; CHECK-THUMB8BASE-NEXT: ldr r1, [r1] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: b .LBB43_1 ; CHECK-THUMB8BASE-NEXT: .LBB43_1: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r0, #1 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #36] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: movs r2, #0 -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r2, [sp, #40] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: subs r3, r3, #2 ; CHECK-THUMB8BASE-NEXT: sbcs r1, r2 -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: blo .LBB43_3 ; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB43_3: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: blo .LBB43_5 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: bhs .LBB43_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB43_5: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB43_7 +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: blo .LBB43_7 ; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: .LBB43_7: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 +; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68] +; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60] ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4] ; CHECK-THUMB8BASE-NEXT: str r0, [sp] ; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64 ; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64 -; CHECK-THUMB8BASE-NEXT: add r1, sp, #64 +; CHECK-THUMB8BASE-NEXT: add r1, sp, #56 ; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8 ; CHECK-THUMB8BASE-NEXT: mov r2, r0 -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68] -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64] -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60] +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56] +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: cmp r2, #0 -; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill +; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: beq .LBB43_1 ; CHECK-THUMB8BASE-NEXT: b .LBB43_8 ; CHECK-THUMB8BASE-NEXT: .LBB43_8: @ %atomicrmw.end -; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-THUMB8BASE-NEXT: add sp, #72 +; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-THUMB8BASE-NEXT: add sp, #64 ; CHECK-THUMB8BASE-NEXT: pop {r4, pc} entry: %0 = atomicrmw umin ptr @atomic_i64, i64 1 monotonic diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll index 16b7403bdb932..3be25dc2c3e77 100644 --- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll +++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll @@ -473,21 +473,21 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: ldr r0, [sp, #28] ; CHECKV6M-NEXT: lsls r0, r0, #31 ; CHECKV6M-NEXT: ldr r6, .LCPI5_0 -; CHECKV6M-NEXT: ldr r5, [sp, #24] -; CHECKV6M-NEXT: ldr r0, [sp, #20] +; CHECKV6M-NEXT: ldr r0, [sp, #24] +; CHECKV6M-NEXT: ldr r5, [sp, #20] ; CHECKV6M-NEXT: beq .LBB5_6 ; CHECKV6M-NEXT: @ %bb.1: @ %then ; CHECKV6M-NEXT: movs r7, #0 ; CHECKV6M-NEXT: subs r2, r2, r6 ; CHECKV6M-NEXT: sbcs r3, r7 ; CHECKV6M-NEXT: mov r2, r0 -; CHECKV6M-NEXT: blo .LBB5_3 +; CHECKV6M-NEXT: bhs .LBB5_3 ; CHECKV6M-NEXT: @ %bb.2: @ %then ; CHECKV6M-NEXT: mov r2, r5 ; CHECKV6M-NEXT: .LBB5_3: @ %then ; CHECKV6M-NEXT: subs r3, r4, r6 ; CHECKV6M-NEXT: sbcs r1, r7 -; CHECKV6M-NEXT: blo .LBB5_5 +; CHECKV6M-NEXT: bhs .LBB5_5 ; CHECKV6M-NEXT: @ %bb.4: @ %then ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB5_5: @ %then @@ -497,7 +497,7 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: movs r1, #0 ; CHECKV6M-NEXT: subs r2, r2, r6 ; CHECKV6M-NEXT: sbcs r3, r1 -; CHECKV6M-NEXT: blo .LBB5_8 +; CHECKV6M-NEXT: bhs .LBB5_8 ; CHECKV6M-NEXT: @ %bb.7: @ %else ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB5_8: @ %else @@ -516,25 +516,25 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7M-NEXT: movs r4, #1 ; CHECKV7M-NEXT: movt r4, #2 ; CHECKV7M-NEXT: lsls r0, r0, #31 -; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8] +; CHECKV7M-NEXT: ldrd r0, lr, [sp, #8] ; CHECKV7M-NEXT: beq .LBB5_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then ; CHECKV7M-NEXT: subs r2, r2, r4 ; CHECKV7M-NEXT: sbcs r2, r3, #0 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r2, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r2, lr ; CHECKV7M-NEXT: subs.w r3, r12, r4 ; CHECKV7M-NEXT: sbcs r1, r1, #0 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r0, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r4, pc} ; CHECKV7M-NEXT: .LBB5_2: @ %else ; CHECKV7M-NEXT: subs r1, r2, r4 ; CHECKV7M-NEXT: sbcs r1, r3, #0 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r0, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: pop {r4, pc} ; ; CHECKV7A-LABEL: icmp64_ule_m1: @@ -543,7 +543,7 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: push {r4, lr} ; CHECKV7A-NEXT: ldr r4, [sp, #16] ; CHECKV7A-NEXT: mov r12, r0 -; CHECKV7A-NEXT: ldrd lr, r0, [sp, #8] +; CHECKV7A-NEXT: ldrd r0, lr, [sp, #8] ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: movw r4, #1 ; CHECKV7A-NEXT: movt r4, #2 @@ -552,19 +552,19 @@ define i32 @icmp64_ule_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: subs r2, r2, r4 ; CHECKV7A-NEXT: sbcs r2, r3, #0 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r2, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r2, lr ; CHECKV7A-NEXT: subs.w r3, r12, r4 ; CHECKV7A-NEXT: sbcs r1, r1, #0 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r0, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, pc} ; CHECKV7A-NEXT: .LBB5_2: @ %else ; CHECKV7A-NEXT: subs r1, r2, r4 ; CHECKV7A-NEXT: sbcs r1, r3, #0 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r0, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: pop {r4, pc} br i1 %c, label %then, label %else then: @@ -590,21 +590,21 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: lsls r6, r0, #17 ; CHECKV6M-NEXT: ldr r0, [sp, #28] ; CHECKV6M-NEXT: lsls r0, r0, #31 -; CHECKV6M-NEXT: ldr r5, [sp, #24] -; CHECKV6M-NEXT: ldr r0, [sp, #20] +; CHECKV6M-NEXT: ldr r0, [sp, #24] +; CHECKV6M-NEXT: ldr r5, [sp, #20] ; CHECKV6M-NEXT: beq .LBB6_6 ; CHECKV6M-NEXT: @ %bb.1: @ %then ; CHECKV6M-NEXT: movs r7, #0 ; CHECKV6M-NEXT: subs r2, r2, r6 ; CHECKV6M-NEXT: sbcs r3, r7 ; CHECKV6M-NEXT: mov r2, r0 -; CHECKV6M-NEXT: bhs .LBB6_3 +; CHECKV6M-NEXT: blo .LBB6_3 ; CHECKV6M-NEXT: @ %bb.2: @ %then ; CHECKV6M-NEXT: mov r2, r5 ; CHECKV6M-NEXT: .LBB6_3: @ %then ; CHECKV6M-NEXT: subs r3, r4, r6 ; CHECKV6M-NEXT: sbcs r1, r7 -; CHECKV6M-NEXT: bhs .LBB6_5 +; CHECKV6M-NEXT: blo .LBB6_5 ; CHECKV6M-NEXT: @ %bb.4: @ %then ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB6_5: @ %then @@ -614,7 +614,7 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: movs r1, #0 ; CHECKV6M-NEXT: subs r2, r2, r6 ; CHECKV6M-NEXT: sbcs r3, r1 -; CHECKV6M-NEXT: bhs .LBB6_8 +; CHECKV6M-NEXT: blo .LBB6_8 ; CHECKV6M-NEXT: @ %bb.7: @ %else ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB6_8: @ %else @@ -692,8 +692,8 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: ldr r0, [sp, #28] ; CHECKV6M-NEXT: lsls r0, r0, #31 ; CHECKV6M-NEXT: ldr r6, .LCPI7_0 -; CHECKV6M-NEXT: ldr r5, [sp, #24] -; CHECKV6M-NEXT: ldr r0, [sp, #20] +; CHECKV6M-NEXT: ldr r0, [sp, #24] +; CHECKV6M-NEXT: ldr r5, [sp, #20] ; CHECKV6M-NEXT: beq .LBB7_6 ; CHECKV6M-NEXT: @ %bb.1: @ %then ; CHECKV6M-NEXT: movs r7, #0 @@ -701,13 +701,13 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: mov r2, r7 ; CHECKV6M-NEXT: sbcs r2, r3 ; CHECKV6M-NEXT: mov r2, r0 -; CHECKV6M-NEXT: blo .LBB7_3 +; CHECKV6M-NEXT: bhs .LBB7_3 ; CHECKV6M-NEXT: @ %bb.2: @ %then ; CHECKV6M-NEXT: mov r2, r5 ; CHECKV6M-NEXT: .LBB7_3: @ %then ; CHECKV6M-NEXT: subs r3, r6, r4 ; CHECKV6M-NEXT: sbcs r7, r1 -; CHECKV6M-NEXT: blo .LBB7_5 +; CHECKV6M-NEXT: bhs .LBB7_5 ; CHECKV6M-NEXT: @ %bb.4: @ %then ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB7_5: @ %then @@ -717,7 +717,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV6M-NEXT: movs r1, #0 ; CHECKV6M-NEXT: subs r2, r6, r2 ; CHECKV6M-NEXT: sbcs r1, r3 -; CHECKV6M-NEXT: blo .LBB7_8 +; CHECKV6M-NEXT: bhs .LBB7_8 ; CHECKV6M-NEXT: @ %bb.7: @ %else ; CHECKV6M-NEXT: mov r0, r5 ; CHECKV6M-NEXT: .LBB7_8: @ %else @@ -736,27 +736,27 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7M-NEXT: movs r4, #1 ; CHECKV7M-NEXT: movt r4, #2 ; CHECKV7M-NEXT: lsls r0, r0, #31 -; CHECKV7M-NEXT: ldrd lr, r0, [sp, #16] +; CHECKV7M-NEXT: ldrd r0, lr, [sp, #16] ; CHECKV7M-NEXT: beq .LBB7_2 ; CHECKV7M-NEXT: @ %bb.1: @ %then ; CHECKV7M-NEXT: subs r2, r4, r2 ; CHECKV7M-NEXT: mov.w r5, #0 ; CHECKV7M-NEXT: sbcs.w r2, r5, r3 ; CHECKV7M-NEXT: mov r2, r0 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r2, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r2, lr ; CHECKV7M-NEXT: subs.w r3, r4, r12 ; CHECKV7M-NEXT: sbcs.w r1, r5, r1 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r0, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: add r0, r2 ; CHECKV7M-NEXT: pop {r4, r5, r7, pc} ; CHECKV7M-NEXT: .LBB7_2: @ %else ; CHECKV7M-NEXT: movs r1, #0 ; CHECKV7M-NEXT: subs r2, r4, r2 ; CHECKV7M-NEXT: sbcs r1, r3 -; CHECKV7M-NEXT: it lo -; CHECKV7M-NEXT: movlo r0, lr +; CHECKV7M-NEXT: it hs +; CHECKV7M-NEXT: movhs r0, lr ; CHECKV7M-NEXT: pop {r4, r5, r7, pc} ; ; CHECKV7A-LABEL: icmp64_ugt_m1: @@ -765,7 +765,7 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: push {r4, r5, r7, lr} ; CHECKV7A-NEXT: ldr r4, [sp, #24] ; CHECKV7A-NEXT: mov r12, r0 -; CHECKV7A-NEXT: ldrd lr, r0, [sp, #16] +; CHECKV7A-NEXT: ldrd r0, lr, [sp, #16] ; CHECKV7A-NEXT: lsls r4, r4, #31 ; CHECKV7A-NEXT: movw r4, #1 ; CHECKV7A-NEXT: movt r4, #2 @@ -775,20 +775,20 @@ define i32 @icmp64_ugt_m1(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) { ; CHECKV7A-NEXT: mov.w r5, #0 ; CHECKV7A-NEXT: sbcs.w r2, r5, r3 ; CHECKV7A-NEXT: mov r2, r0 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r2, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r2, lr ; CHECKV7A-NEXT: subs.w r3, r4, r12 ; CHECKV7A-NEXT: sbcs.w r1, r5, r1 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r0, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: add r0, r2 ; CHECKV7A-NEXT: pop {r4, r5, r7, pc} ; CHECKV7A-NEXT: .LBB7_2: @ %else ; CHECKV7A-NEXT: movs r1, #0 ; CHECKV7A-NEXT: subs r2, r4, r2 ; CHECKV7A-NEXT: sbcs r1, r3 -; CHECKV7A-NEXT: it lo -; CHECKV7A-NEXT: movlo r0, lr +; CHECKV7A-NEXT: it hs +; CHECKV7A-NEXT: movhs r0, lr ; CHECKV7A-NEXT: pop {r4, r5, r7, pc} br i1 %c, label %then, label %else then: diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8ab56b228d2a7..7c61ab8a7a727 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -29,10 +29,11 @@ define i32 @stest_f64i32(double %x) { ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB0_6 +; SOFT-NEXT: bge .LBB0_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: .LBB0_6: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.7: @@ -45,16 +46,14 @@ define i32 @stest_f64i32(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: mvn r12, #-2147483648 -; VFP2-NEXT: subs.w r3, r0, r12 -; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: mvn r3, #-2147483648 +; VFP2-NEXT: movs r2, #0 +; VFP2-NEXT: subs r3, r0, r3 ; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: itte lt ; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne r2, r1 -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: movlt r2, r1 +; VFP2-NEXT: mvnge r0, #-2147483648 ; VFP2-NEXT: mov.w r1, #-1 ; VFP2-NEXT: rsbs.w r3, r0, #-2147483648 ; VFP2-NEXT: sbcs r1, r2 @@ -87,7 +86,7 @@ define i32 @utest_f64i32(double %x) { ; SOFT-NEXT: adds r3, r0, #1 ; SOFT-NEXT: sbcs r1, r2 ; SOFT-NEXT: blo .LBB1_2 -; SOFT-NEXT: @ %bb.1: @ %entry +; SOFT-NEXT: @ %bb.1: ; SOFT-NEXT: mvns r0, r2 ; SOFT-NEXT: .LBB1_2: @ %entry ; SOFT-NEXT: pop {r7, pc} @@ -123,34 +122,27 @@ define i32 @ustest_f64i32(double %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: movs r2, #0 -; SOFT-NEXT: mvns r3, r2 -; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: mvns r3, r0 +; SOFT-NEXT: adds r4, r2, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: sbcs r4, r0 ; SOFT-NEXT: blt .LBB2_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB2_2: @ %entry ; SOFT-NEXT: blt .LBB2_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: .LBB2_4: @ %entry -; SOFT-NEXT: rsbs r3, r0, #0 -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: rsbs r3, r2, #0 +; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB2_7 +; SOFT-NEXT: bge .LBB2_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB2_8 -; SOFT-NEXT: .LBB2_6: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB2_7: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB2_6 -; SOFT-NEXT: .LBB2_8: @ %entry ; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB2_6: @ %entry ; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f64i32: @@ -163,19 +155,14 @@ define i32 @ustest_f64i32(double %x) { ; VFP2-NEXT: mov.w r2, #0 ; VFP2-NEXT: sbcs r3, r1, #0 ; VFP2-NEXT: mov.w r3, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: itte lt ; VFP2-NEXT: movlt r3, #1 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: ite ne -; VFP2-NEXT: movne r3, r1 -; VFP2-NEXT: moveq.w r0, #-1 +; VFP2-NEXT: movlt r3, r1 +; VFP2-NEXT: movge.w r0, #-1 ; VFP2-NEXT: rsbs r1, r0, #0 ; VFP2-NEXT: sbcs.w r1, r2, r3 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r0, #0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i32: @@ -217,10 +204,11 @@ define i32 @stest_f32i32(float %x) { ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB3_6 +; SOFT-NEXT: bge .LBB3_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: .LBB3_6: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.7: @@ -252,7 +240,7 @@ define i32 @utest_f32i32(float %x) { ; SOFT-NEXT: adds r3, r0, #1 ; SOFT-NEXT: sbcs r1, r2 ; SOFT-NEXT: blo .LBB4_2 -; SOFT-NEXT: @ %bb.1: @ %entry +; SOFT-NEXT: @ %bb.1: ; SOFT-NEXT: mvns r0, r2 ; SOFT-NEXT: .LBB4_2: @ %entry ; SOFT-NEXT: pop {r7, pc} @@ -276,34 +264,27 @@ define i32 @ustest_f32i32(float %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #0 -; SOFT-NEXT: mvns r3, r2 -; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: mvns r3, r0 +; SOFT-NEXT: adds r4, r2, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: sbcs r4, r0 ; SOFT-NEXT: blt .LBB5_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB5_2: @ %entry ; SOFT-NEXT: blt .LBB5_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: .LBB5_4: @ %entry -; SOFT-NEXT: rsbs r3, r0, #0 -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: rsbs r3, r2, #0 +; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB5_7 +; SOFT-NEXT: bge .LBB5_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB5_8 -; SOFT-NEXT: .LBB5_6: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB5_7: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB5_6 -; SOFT-NEXT: .LBB5_8: @ %entry ; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB5_6: @ %entry ; SOFT-NEXT: pop {r4, pc} ; ; VFP-LABEL: ustest_f32i32: @@ -347,10 +328,11 @@ define i32 @stest_f16i32(half %x) { ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB6_6 +; SOFT-NEXT: bge .LBB6_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: .LBB6_6: @ %entry +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.7: @@ -395,7 +377,7 @@ define i32 @utesth_f16i32(half %x) { ; SOFT-NEXT: adds r3, r0, #1 ; SOFT-NEXT: sbcs r1, r2 ; SOFT-NEXT: blo .LBB7_2 -; SOFT-NEXT: @ %bb.1: @ %entry +; SOFT-NEXT: @ %bb.1: ; SOFT-NEXT: mvns r0, r2 ; SOFT-NEXT: .LBB7_2: @ %entry ; SOFT-NEXT: pop {r7, pc} @@ -432,34 +414,27 @@ define i32 @ustest_f16i32(half %x) { ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #0 -; SOFT-NEXT: mvns r3, r2 -; SOFT-NEXT: adds r4, r0, #1 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 +; SOFT-NEXT: mvns r3, r0 +; SOFT-NEXT: adds r4, r2, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: sbcs r4, r0 ; SOFT-NEXT: blt .LBB8_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB8_2: @ %entry ; SOFT-NEXT: blt .LBB8_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: .LBB8_4: @ %entry -; SOFT-NEXT: rsbs r3, r0, #0 -; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: rsbs r3, r2, #0 +; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: sbcs r3, r1 -; SOFT-NEXT: blt .LBB8_7 +; SOFT-NEXT: bge .LBB8_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB8_8 -; SOFT-NEXT: .LBB8_6: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB8_7: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB8_6 -; SOFT-NEXT: .LBB8_8: @ %entry ; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: .LBB8_6: @ %entry ; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f16i32: @@ -1017,21 +992,21 @@ define i64 @stest_f64i64(double %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: itee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: mov.w r4, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r0, r4 ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: movge r1, lr @@ -1048,12 +1023,12 @@ define i64 @stest_f64i64(double %x) { ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r5, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne ; FULL-NEXT: mov.w r3, #-1 ; FULL-NEXT: csel r0, r0, r3, ne ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 ; FULL-NEXT: mov.w r12, #-2147483648 ; FULL-NEXT: sbcs.w r4, r12, r1 @@ -1273,21 +1248,21 @@ define i64 @stest_f32i64(float %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: itee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: mov.w r4, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r0, r4 ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: movge r1, lr @@ -1304,12 +1279,12 @@ define i64 @stest_f32i64(float %x) { ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r5, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne ; FULL-NEXT: mov.w r3, #-1 ; FULL-NEXT: csel r0, r0, r3, ne ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 ; FULL-NEXT: mov.w r12, #-2147483648 ; FULL-NEXT: sbcs.w r4, r12, r1 @@ -1534,21 +1509,21 @@ define i64 @stest_f16i64(half %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: itee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 +; VFP2-NEXT: mov.w r4, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r0, r4 ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 ; VFP2-NEXT: itt ge ; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: movge r1, lr @@ -1567,12 +1542,12 @@ define i64 @stest_f16i64(half %x) { ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r5, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne ; FULL-NEXT: mov.w r3, #-1 ; FULL-NEXT: csel r0, r0, r3, ne ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 ; FULL-NEXT: mov.w r12, #-2147483648 ; FULL-NEXT: sbcs.w r4, r12, r1 @@ -1743,37 +1718,34 @@ define i32 @stest_f64i32_mm(double %x) { ; SOFT-NEXT: bl __aeabi_d2lz ; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI27_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: ldr r5, .LCPI27_0 +; SOFT-NEXT: subs r4, r0, r5 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r3 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: bge .LBB27_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB27_8 +; SOFT-NEXT: blt .LBB27_8 ; SOFT-NEXT: .LBB27_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB27_4 +; SOFT-NEXT: blt .LBB27_4 ; SOFT-NEXT: .LBB27_3: @ %entry -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB27_4: @ %entry ; SOFT-NEXT: mvns r3, r3 -; SOFT-NEXT: lsls r2, r2, #31 -; SOFT-NEXT: subs r4, r2, r0 -; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: lsls r1, r2, #31 +; SOFT-NEXT: subs r2, r1, r0 +; SOFT-NEXT: sbcs r3, r4 ; SOFT-NEXT: blt .LBB27_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB27_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB27_7: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: blt .LBB27_2 -; SOFT-NEXT: .LBB27_8: @ %entry ; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB27_3 +; SOFT-NEXT: bge .LBB27_2 +; SOFT-NEXT: .LBB27_8: @ %entry +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: bge .LBB27_3 ; SOFT-NEXT: b .LBB27_4 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: @@ -1786,17 +1758,14 @@ define i32 @stest_f64i32_mm(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: subs r3, r0, r2 -; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: it ge -; VFP2-NEXT: movge r0, r2 +; VFP2-NEXT: mvn r12, #-2147483648 +; VFP2-NEXT: subs.w r3, r0, r12 ; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: sbcs r3, r1, #0 +; VFP2-NEXT: itte lt ; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r2, r1 +; VFP2-NEXT: movlt r2, r1 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: mov.w r1, #-1 ; VFP2-NEXT: rsbs.w r3, r0, #-2147483648 ; VFP2-NEXT: sbcs r1, r2 @@ -1910,37 +1879,34 @@ define i32 @stest_f32i32_mm(float %x) { ; SOFT-NEXT: bl __aeabi_f2lz ; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI30_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: ldr r5, .LCPI30_0 +; SOFT-NEXT: subs r4, r0, r5 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r3 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: bge .LBB30_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB30_8 +; SOFT-NEXT: blt .LBB30_8 ; SOFT-NEXT: .LBB30_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB30_4 +; SOFT-NEXT: blt .LBB30_4 ; SOFT-NEXT: .LBB30_3: @ %entry -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB30_4: @ %entry ; SOFT-NEXT: mvns r3, r3 -; SOFT-NEXT: lsls r2, r2, #31 -; SOFT-NEXT: subs r4, r2, r0 -; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: lsls r1, r2, #31 +; SOFT-NEXT: subs r2, r1, r0 +; SOFT-NEXT: sbcs r3, r4 ; SOFT-NEXT: blt .LBB30_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB30_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB30_7: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: blt .LBB30_2 -; SOFT-NEXT: .LBB30_8: @ %entry ; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB30_3 +; SOFT-NEXT: bge .LBB30_2 +; SOFT-NEXT: .LBB30_8: @ %entry +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: bge .LBB30_3 ; SOFT-NEXT: b .LBB30_4 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: @@ -2030,37 +1996,34 @@ define i32 @stest_f16i32_mm(half %x) { ; SOFT-NEXT: bl __aeabi_f2lz ; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI33_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: ldr r5, .LCPI33_0 +; SOFT-NEXT: subs r4, r0, r5 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r3 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: bge .LBB33_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB33_8 +; SOFT-NEXT: blt .LBB33_8 ; SOFT-NEXT: .LBB33_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB33_4 +; SOFT-NEXT: blt .LBB33_4 ; SOFT-NEXT: .LBB33_3: @ %entry -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: .LBB33_4: @ %entry ; SOFT-NEXT: mvns r3, r3 -; SOFT-NEXT: lsls r2, r2, #31 -; SOFT-NEXT: subs r4, r2, r0 -; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: lsls r1, r2, #31 +; SOFT-NEXT: subs r2, r1, r0 +; SOFT-NEXT: sbcs r3, r4 ; SOFT-NEXT: blt .LBB33_6 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB33_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB33_7: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: blt .LBB33_2 -; SOFT-NEXT: .LBB33_8: @ %entry ; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB33_3 +; SOFT-NEXT: bge .LBB33_2 +; SOFT-NEXT: .LBB33_8: @ %entry +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: bge .LBB33_3 ; SOFT-NEXT: b .LBB33_4 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: @@ -2595,77 +2558,79 @@ define i64 @stest_f64i64_mm(double %x) { ; SOFT-NEXT: .pad #12 ; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI45_0 -; SOFT-NEXT: adds r4, r7, #1 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: ldr r7, .LCPI45_0 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r7 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 ; SOFT-NEXT: blt .LBB45_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB45_2: @ %entry -; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB45_12 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bge .LBB45_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB45_13 +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB45_14 -; SOFT-NEXT: .LBB45_5: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB45_7 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: blt .LBB45_12 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: blt .LBB45_13 ; SOFT-NEXT: .LBB45_6: @ %entry -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB45_8 ; SOFT-NEXT: .LBB45_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB45_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB45_16 -; SOFT-NEXT: .LBB45_9: @ %entry -; SOFT-NEXT: bne .LBB45_11 +; SOFT-NEXT: ldr r1, .LCPI45_0 +; SOFT-NEXT: .LBB45_8: @ %entry +; SOFT-NEXT: lsls r3, r4, #31 +; SOFT-NEXT: rsbs r7, r2, #0 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r0 +; SOFT-NEXT: bge .LBB45_14 +; SOFT-NEXT: @ %bb.9: @ %entry +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB45_15 ; SOFT-NEXT: .LBB45_10: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB45_16 ; SOFT-NEXT: .LBB45_11: @ %entry -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB45_12: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB45_4 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bge .LBB45_6 ; SOFT-NEXT: .LBB45_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB45_5 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB45_7 +; SOFT-NEXT: b .LBB45_8 ; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI45_0 -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB45_6 -; SOFT-NEXT: b .LBB45_7 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bge .LBB45_10 ; SOFT-NEXT: .LBB45_15: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB45_9 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB45_11 ; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB45_10 -; SOFT-NEXT: b .LBB45_11 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI45_0: @@ -2683,56 +2648,56 @@ define i64 @stest_f64i64_mm(double %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 +; VFP2-NEXT: movge.w r0, #-1 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: it lt +; VFP2-NEXT: mov.w r4, #-1 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 +; VFP2-NEXT: ite lt ; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f64i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixdfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge.w r0, #-1 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r3, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: mov.w lr, #-2147483648 ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt +; FULL-NEXT: csel r0, r2, r0, ge ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -2744,30 +2709,33 @@ entry: define i64 @utest_f64i64_mm(double %x) { ; SOFT-LABEL: utest_f64i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB46_4 +; SOFT-NEXT: sbcs r3, r5 +; SOFT-NEXT: bhs .LBB46_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB46_5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blo .LBB46_6 ; SOFT-NEXT: .LBB46_2: @ %entry -; SOFT-NEXT: beq .LBB46_6 +; SOFT-NEXT: bhs .LBB46_4 ; SOFT-NEXT: .LBB46_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB46_4: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB46_2 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: .LBB46_4: @ %entry +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB46_5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB46_3 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bhs .LBB46_2 ; SOFT-NEXT: .LBB46_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: blo .LBB46_3 +; SOFT-NEXT: b .LBB46_4 ; ; VFP2-LABEL: utest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -2777,12 +2745,10 @@ define i64 @utest_f64i64_mm(double %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lo +; VFP2-NEXT: itee lo ; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utest_f64i64_mm: @@ -2793,9 +2759,8 @@ define i64 @utest_f64i64_mm(double %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: csel r0, r2, r0, hs +; FULL-NEXT: csel r1, r2, r1, hs ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui double %x to i128 @@ -2807,52 +2772,52 @@ entry: define i64 @ustest_f64i64_mm(double %x) { ; SOFT-LABEL: ustest_f64i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixdfti ; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: movs r5, #1 ; SOFT-NEXT: movs r1, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB47_2 +; SOFT-NEXT: bge .LBB47_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB47_3 -; SOFT-NEXT: b .LBB47_4 -; SOFT-NEXT: .LBB47_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB47_4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB47_9 +; SOFT-NEXT: .LBB47_2: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: blt .LBB47_10 ; SOFT-NEXT: .LBB47_3: @ %entry -; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB47_5 ; SOFT-NEXT: .LBB47_4: @ %entry -; SOFT-NEXT: beq .LBB47_10 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB47_7 -; SOFT-NEXT: .LBB47_6: @ %entry -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB47_7: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: .LBB47_5: @ %entry +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: bpl .LBB47_11 -; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: bpl .LBB47_12 +; SOFT-NEXT: .LBB47_7: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: .LBB47_8: @ %entry +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bge .LBB47_2 ; SOFT-NEXT: .LBB47_9: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: bge .LBB47_3 ; SOFT-NEXT: .LBB47_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB47_6 -; SOFT-NEXT: b .LBB47_7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB47_4 +; SOFT-NEXT: b .LBB47_5 ; SOFT-NEXT: .LBB47_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB47_9 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bmi .LBB47_7 ; SOFT-NEXT: .LBB47_12: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -2862,13 +2827,11 @@ define i64 @ustest_f64i64_mm(double %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteet lt ; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movlt r12, r3 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt mi ; VFP2-NEXT: movmi r0, #0 @@ -2883,10 +2846,9 @@ define i64 @ustest_f64i64_mm(double %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r2, r3, r2, ne +; FULL-NEXT: csel r1, r2, r1, ge +; FULL-NEXT: csel r0, r2, r0, ge +; FULL-NEXT: csel r2, r2, r3, ge ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: itt mi ; FULL-NEXT: movmi r0, #0 @@ -2908,77 +2870,79 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT-NEXT: .pad #12 ; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI48_0 -; SOFT-NEXT: adds r4, r7, #1 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: ldr r7, .LCPI48_0 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r7 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 ; SOFT-NEXT: blt .LBB48_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB48_2: @ %entry -; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB48_12 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bge .LBB48_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB48_13 +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB48_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_14 -; SOFT-NEXT: .LBB48_5: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB48_7 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: blt .LBB48_12 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: blt .LBB48_13 ; SOFT-NEXT: .LBB48_6: @ %entry -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB48_8 ; SOFT-NEXT: .LBB48_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB48_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB48_16 -; SOFT-NEXT: .LBB48_9: @ %entry -; SOFT-NEXT: bne .LBB48_11 +; SOFT-NEXT: ldr r1, .LCPI48_0 +; SOFT-NEXT: .LBB48_8: @ %entry +; SOFT-NEXT: lsls r3, r4, #31 +; SOFT-NEXT: rsbs r7, r2, #0 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r0 +; SOFT-NEXT: bge .LBB48_14 +; SOFT-NEXT: @ %bb.9: @ %entry +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB48_15 ; SOFT-NEXT: .LBB48_10: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB48_16 ; SOFT-NEXT: .LBB48_11: @ %entry -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB48_4 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bge .LBB48_6 ; SOFT-NEXT: .LBB48_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB48_5 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB48_7 +; SOFT-NEXT: b .LBB48_8 ; SOFT-NEXT: .LBB48_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI48_0 -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB48_6 -; SOFT-NEXT: b .LBB48_7 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bge .LBB48_10 ; SOFT-NEXT: .LBB48_15: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB48_9 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB48_11 ; SOFT-NEXT: .LBB48_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB48_10 -; SOFT-NEXT: b .LBB48_11 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI48_0: @@ -2996,56 +2960,56 @@ define i64 @stest_f32i64_mm(float %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 +; VFP2-NEXT: movge.w r0, #-1 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: it lt +; VFP2-NEXT: mov.w r4, #-1 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 +; VFP2-NEXT: ite lt ; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f32i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: bl __fixsfti ; FULL-NEXT: subs.w lr, r0, #-1 ; FULL-NEXT: mvn r12, #-2147483648 ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge.w r0, #-1 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r3, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: mov.w lr, #-2147483648 ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt +; FULL-NEXT: csel r0, r2, r0, ge ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3057,30 +3021,33 @@ entry: define i64 @utest_f32i64_mm(float %x) { ; SOFT-LABEL: utest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB49_4 +; SOFT-NEXT: sbcs r3, r5 +; SOFT-NEXT: bhs .LBB49_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB49_5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blo .LBB49_6 ; SOFT-NEXT: .LBB49_2: @ %entry -; SOFT-NEXT: beq .LBB49_6 +; SOFT-NEXT: bhs .LBB49_4 ; SOFT-NEXT: .LBB49_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB49_4: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB49_2 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: .LBB49_4: @ %entry +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB49_5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB49_3 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bhs .LBB49_2 ; SOFT-NEXT: .LBB49_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: blo .LBB49_3 +; SOFT-NEXT: b .LBB49_4 ; ; VFP2-LABEL: utest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3090,12 +3057,10 @@ define i64 @utest_f32i64_mm(float %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lo +; VFP2-NEXT: itee lo ; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utest_f32i64_mm: @@ -3106,9 +3071,8 @@ define i64 @utest_f32i64_mm(float %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: csel r0, r2, r0, hs +; FULL-NEXT: csel r1, r2, r1, hs ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui float %x to i128 @@ -3120,52 +3084,52 @@ entry: define i64 @ustest_f32i64_mm(float %x) { ; SOFT-LABEL: ustest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: movs r5, #1 ; SOFT-NEXT: movs r1, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB50_2 +; SOFT-NEXT: bge .LBB50_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB50_3 -; SOFT-NEXT: b .LBB50_4 -; SOFT-NEXT: .LBB50_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB50_4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB50_9 +; SOFT-NEXT: .LBB50_2: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: blt .LBB50_10 ; SOFT-NEXT: .LBB50_3: @ %entry -; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB50_5 ; SOFT-NEXT: .LBB50_4: @ %entry -; SOFT-NEXT: beq .LBB50_10 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB50_7 -; SOFT-NEXT: .LBB50_6: @ %entry -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB50_7: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: .LBB50_5: @ %entry +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: bpl .LBB50_11 -; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: bpl .LBB50_12 +; SOFT-NEXT: .LBB50_7: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: .LBB50_8: @ %entry +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bge .LBB50_2 ; SOFT-NEXT: .LBB50_9: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: bge .LBB50_3 ; SOFT-NEXT: .LBB50_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB50_6 -; SOFT-NEXT: b .LBB50_7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB50_4 +; SOFT-NEXT: b .LBB50_5 ; SOFT-NEXT: .LBB50_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB50_9 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bmi .LBB50_7 ; SOFT-NEXT: .LBB50_12: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3175,13 +3139,11 @@ define i64 @ustest_f32i64_mm(float %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteet lt ; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movlt r12, r3 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt mi ; VFP2-NEXT: movmi r0, #0 @@ -3196,10 +3158,9 @@ define i64 @ustest_f32i64_mm(float %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r2, r3, r2, ne +; FULL-NEXT: csel r1, r2, r1, ge +; FULL-NEXT: csel r0, r2, r0, ge +; FULL-NEXT: csel r2, r2, r3, ge ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: itt mi ; FULL-NEXT: movmi r0, #0 @@ -3223,77 +3184,79 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r6, .LCPI51_0 -; SOFT-NEXT: adds r4, r7, #1 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r6 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r5 -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: ldr r7, .LCPI51_0 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: adds r0, r0, #1 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: sbcs r0, r7 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: sbcs r0, r5 +; SOFT-NEXT: mov r7, r4 ; SOFT-NEXT: blt .LBB51_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB51_2: @ %entry -; SOFT-NEXT: mvns r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB51_12 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bge .LBB51_4 ; SOFT-NEXT: @ %bb.3: @ %entry -; SOFT-NEXT: beq .LBB51_13 +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB51_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_14 -; SOFT-NEXT: .LBB51_5: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bne .LBB51_7 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: blt .LBB51_12 +; SOFT-NEXT: @ %bb.5: @ %entry +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: blt .LBB51_13 ; SOFT-NEXT: .LBB51_6: @ %entry -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB51_8 ; SOFT-NEXT: .LBB51_7: @ %entry -; SOFT-NEXT: lsls r3, r0, #31 -; SOFT-NEXT: rsbs r4, r7, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: mov r4, r6 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r4, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r6, r2 -; SOFT-NEXT: bge .LBB51_15 -; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: beq .LBB51_16 -; SOFT-NEXT: .LBB51_9: @ %entry -; SOFT-NEXT: bne .LBB51_11 +; SOFT-NEXT: ldr r1, .LCPI51_0 +; SOFT-NEXT: .LBB51_8: @ %entry +; SOFT-NEXT: lsls r3, r4, #31 +; SOFT-NEXT: rsbs r7, r2, #0 +; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: sbcs r7, r1 +; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r0 +; SOFT-NEXT: bge .LBB51_14 +; SOFT-NEXT: @ %bb.9: @ %entry +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB51_15 ; SOFT-NEXT: .LBB51_10: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB51_16 ; SOFT-NEXT: .LBB51_11: @ %entry -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: bne .LBB51_4 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bge .LBB51_6 ; SOFT-NEXT: .LBB51_13: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: bne .LBB51_5 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB51_7 +; SOFT-NEXT: b .LBB51_8 ; SOFT-NEXT: .LBB51_14: @ %entry -; SOFT-NEXT: ldr r1, .LCPI51_0 -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: beq .LBB51_6 -; SOFT-NEXT: b .LBB51_7 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bge .LBB51_10 ; SOFT-NEXT: .LBB51_15: @ %entry -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB51_9 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: bne .LBB51_11 ; SOFT-NEXT: .LBB51_16: @ %entry -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: beq .LBB51_10 -; SOFT-NEXT: b .LBB51_11 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI51_0: @@ -3314,33 +3277,32 @@ define i64 @stest_f16i64_mm(half %x) { ; VFP2-NEXT: sbcs r4, r2, #0 ; VFP2-NEXT: sbcs r4, r3, #0 ; VFP2-NEXT: mov.w r4, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteee lt ; VFP2-NEXT: movlt r4, #1 +; VFP2-NEXT: movge r3, r4 +; VFP2-NEXT: movge r2, r4 +; VFP2-NEXT: movge.w r0, #-1 ; VFP2-NEXT: cmp r4, #0 -; VFP2-NEXT: itet eq -; VFP2-NEXT: moveq r3, r4 -; VFP2-NEXT: movne r4, r2 -; VFP2-NEXT: moveq r1, lr -; VFP2-NEXT: mov.w r2, #-1 ; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r0, r2 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: rsbs r5, r0, #0 ; VFP2-NEXT: mov.w lr, #-2147483648 ; VFP2-NEXT: sbcs.w r5, lr, r1 -; VFP2-NEXT: sbcs.w r4, r2, r4 -; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: it lt +; VFP2-NEXT: mov.w r4, #-1 +; VFP2-NEXT: sbcs.w r2, r4, r2 +; VFP2-NEXT: sbcs.w r2, r4, r3 +; VFP2-NEXT: ite lt ; VFP2-NEXT: movlt.w r12, #1 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: it eq ; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f16i64_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r4, r5, r7, lr} -; FULL-NEXT: push {r4, r5, r7, lr} +; FULL-NEXT: .save {r4, lr} +; FULL-NEXT: push {r4, lr} ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti @@ -3349,23 +3311,24 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: sbcs.w lr, r1, r12 ; FULL-NEXT: sbcs lr, r2, #0 ; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge.w r0, #-1 ; FULL-NEXT: cset lr, lt +; FULL-NEXT: csel r3, lr, r3, ge +; FULL-NEXT: csel r2, lr, r2, ge ; FULL-NEXT: cmp.w lr, #0 -; FULL-NEXT: csel r5, r3, lr, ne -; FULL-NEXT: mov.w r3, #-1 -; FULL-NEXT: csel r0, r0, r3, ne +; FULL-NEXT: mov.w lr, #-2147483648 ; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: csel r2, r2, lr, ne ; FULL-NEXT: rsbs r4, r0, #0 -; FULL-NEXT: mov.w r12, #-2147483648 -; FULL-NEXT: sbcs.w r4, r12, r1 -; FULL-NEXT: sbcs.w r2, r3, r2 -; FULL-NEXT: sbcs.w r2, r3, r5 +; FULL-NEXT: sbcs.w r4, lr, r1 +; FULL-NEXT: mov.w r12, #-1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, r3 ; FULL-NEXT: cset r2, lt +; FULL-NEXT: csel r0, r2, r0, ge ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r12, ne -; FULL-NEXT: pop {r4, r5, r7, pc} +; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: pop {r4, pc} entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3377,32 +3340,35 @@ entry: define i64 @utesth_f16i64_mm(half %x) { ; SOFT-LABEL: utesth_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: subs r2, r2, #1 -; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB52_4 +; SOFT-NEXT: sbcs r3, r5 +; SOFT-NEXT: bhs .LBB52_5 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB52_5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blo .LBB52_6 ; SOFT-NEXT: .LBB52_2: @ %entry -; SOFT-NEXT: beq .LBB52_6 +; SOFT-NEXT: bhs .LBB52_4 ; SOFT-NEXT: .LBB52_3: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB52_4: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB52_2 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: .LBB52_4: @ %entry +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB52_5: @ %entry -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB52_3 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bhs .LBB52_2 ; SOFT-NEXT: .LBB52_6: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: blo .LBB52_3 +; SOFT-NEXT: b .LBB52_4 ; ; VFP2-LABEL: utesth_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3415,12 +3381,10 @@ define i64 @utesth_f16i64_mm(half %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lo +; VFP2-NEXT: itee lo ; VFP2-NEXT: movlo.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i64_mm: @@ -3433,9 +3397,8 @@ define i64 @utesth_f16i64_mm(half %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r1, r1, r2, ne +; FULL-NEXT: csel r0, r2, r0, hs +; FULL-NEXT: csel r1, r2, r1, hs ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui half %x to i128 @@ -3447,54 +3410,54 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; SOFT-LABEL: ustest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, lr} -; SOFT-NEXT: push {r4, lr} +; SOFT-NEXT: .save {r4, r5, r7, lr} +; SOFT-NEXT: push {r4, r5, r7, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: movs r5, #1 ; SOFT-NEXT: movs r1, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB53_2 +; SOFT-NEXT: bge .LBB53_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB53_3 -; SOFT-NEXT: b .LBB53_4 -; SOFT-NEXT: .LBB53_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB53_4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB53_9 +; SOFT-NEXT: .LBB53_2: @ %entry +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: blt .LBB53_10 ; SOFT-NEXT: .LBB53_3: @ %entry -; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB53_5 ; SOFT-NEXT: .LBB53_4: @ %entry -; SOFT-NEXT: beq .LBB53_10 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: bne .LBB53_7 -; SOFT-NEXT: .LBB53_6: @ %entry -; SOFT-NEXT: mov r3, r2 -; SOFT-NEXT: .LBB53_7: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: .LBB53_5: @ %entry +; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: bpl .LBB53_11 -; SOFT-NEXT: @ %bb.8: @ %entry +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: bpl .LBB53_12 +; SOFT-NEXT: .LBB53_7: @ %entry +; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: .LBB53_8: @ %entry +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bge .LBB53_2 ; SOFT-NEXT: .LBB53_9: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: bge .LBB53_3 ; SOFT-NEXT: .LBB53_10: @ %entry -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB53_6 -; SOFT-NEXT: b .LBB53_7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB53_4 +; SOFT-NEXT: b .LBB53_5 ; SOFT-NEXT: .LBB53_11: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: bmi .LBB53_9 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bmi .LBB53_7 ; SOFT-NEXT: .LBB53_12: @ %entry -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, pc} +; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: pop {r4, r5, r7, pc} ; ; VFP2-LABEL: ustest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3507,13 +3470,11 @@ define i64 @ustest_f16i64_mm(half %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: it lt +; VFP2-NEXT: iteet lt ; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itte eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: movne r12, r3 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movlt r12, r3 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt mi ; VFP2-NEXT: movmi r0, #0 @@ -3530,10 +3491,9 @@ define i64 @ustest_f16i64_mm(half %x) { ; FULL-NEXT: subs r2, #1 ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: csel r2, r3, r2, ne +; FULL-NEXT: csel r1, r2, r1, ge +; FULL-NEXT: csel r0, r2, r0, ge +; FULL-NEXT: csel r2, r2, r3, ge ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: itt mi ; FULL-NEXT: movmi r0, #0 @@ -3560,11 +3520,11 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: movs r0, #0 ; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill ; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: lsls r1, r0, #31 -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: lsls r7, r0, #10 ; SOFT-NEXT: b .LBB54_2 ; SOFT-NEXT: .LBB54_1: @ in Loop: Header=BB54_2 Depth=1 @@ -3585,25 +3545,25 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: mov r2, r1 ; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: sbcs r2, r3 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: bge .LBB54_14 ; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bge .LBB54_15 +; SOFT-NEXT: blt .LBB54_15 ; SOFT-NEXT: .LBB54_4: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB54_6 +; SOFT-NEXT: blt .LBB54_6 ; SOFT-NEXT: .LBB54_5: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: ldr r0, .LCPI54_0 ; SOFT-NEXT: .LBB54_6: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: subs r2, r2, r0 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB54_8 +; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: subs r1, r3, r0 +; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: sbcs r1, r2 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: bge .LBB54_8 ; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB54_8: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: str r0, [r5] +; SOFT-NEXT: str r1, [r5] ; SOFT-NEXT: ldr r0, [r4, #4] ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fmul @@ -3613,40 +3573,39 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: mov r2, r1 ; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: bge .LBB54_16 ; SOFT-NEXT: @ %bb.9: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB54_17 +; SOFT-NEXT: blt .LBB54_17 ; SOFT-NEXT: .LBB54_10: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: bne .LBB54_12 +; SOFT-NEXT: ldr r1, .LCPI54_0 +; SOFT-NEXT: bge .LBB54_12 ; SOFT-NEXT: .LBB54_11: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r0, .LCPI54_0 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB54_12: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: subs r2, r2, r0 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: blt .LBB54_1 -; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB54_2 Depth=1 +; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: subs r0, r3, r1 ; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: sbcs r0, r2 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: bge .LBB54_1 +; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB54_2 Depth=1 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: b .LBB54_1 ; SOFT-NEXT: .LBB54_14: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r0, .LCPI54_0 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: blt .LBB54_4 -; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB54_5 +; SOFT-NEXT: bge .LBB54_4 +; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: bge .LBB54_5 ; SOFT-NEXT: b .LBB54_6 ; SOFT-NEXT: .LBB54_16: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB54_10 +; SOFT-NEXT: bge .LBB54_10 ; SOFT-NEXT: .LBB54_17: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: beq .LBB54_11 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: ldr r1, .LCPI54_0 +; SOFT-NEXT: blt .LBB54_11 ; SOFT-NEXT: b .LBB54_12 ; SOFT-NEXT: .LBB54_18: ; SOFT-NEXT: add sp, #20 @@ -3762,7 +3721,7 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: lsls r7, r0, #10 ; SOFT-NEXT: b .LBB55_2 ; SOFT-NEXT: .LBB55_1: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: str r0, [r5, #4] +; SOFT-NEXT: str r1, [r5, #4] ; SOFT-NEXT: adds r4, #8 ; SOFT-NEXT: adds r5, #8 ; SOFT-NEXT: subs r7, r7, #2 @@ -3786,15 +3745,16 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: @ %bb.5: @ in Loop: Header=BB55_2 Depth=1 ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB55_6: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r2, .LCPI55_0 -; SOFT-NEXT: subs r2, r0, r2 +; SOFT-NEXT: ldr r3, .LCPI55_0 +; SOFT-NEXT: subs r2, r0, r3 ; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload ; SOFT-NEXT: sbcs r1, r2 -; SOFT-NEXT: blt .LBB55_8 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: bge .LBB55_8 ; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r0, .LCPI55_0 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: .LBB55_8: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: str r0, [r5] +; SOFT-NEXT: str r1, [r5] ; SOFT-NEXT: ldr r0, [r4, #4] ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fmul @@ -3811,13 +3771,14 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: @ %bb.11: @ in Loop: Header=BB55_2 Depth=1 ; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB55_12: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r2, .LCPI55_0 -; SOFT-NEXT: subs r2, r0, r2 +; SOFT-NEXT: ldr r3, .LCPI55_0 +; SOFT-NEXT: subs r2, r0, r3 ; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload ; SOFT-NEXT: sbcs r1, r2 -; SOFT-NEXT: blt .LBB55_1 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: bge .LBB55_1 ; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r0, .LCPI55_0 +; SOFT-NEXT: mov r1, r0 ; SOFT-NEXT: b .LBB55_1 ; SOFT-NEXT: .LBB55_14: ; SOFT-NEXT: add sp, #12 @@ -3920,11 +3881,12 @@ define i32 @stest_f32i32i64(float %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_f2lz +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r2, .LCPI56_0 -; SOFT-NEXT: subs r4, r0, r2 +; SOFT-NEXT: ldr r0, .LCPI56_0 +; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r1, r3 -; SOFT-NEXT: blt .LBB56_2 +; SOFT-NEXT: bge .LBB56_2 ; SOFT-NEXT: @ %bb.1: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB56_2: @ %entry @@ -3949,10 +3911,10 @@ define i32 @stest_f32i32i64(float %x) { ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_f2lz ; VFP2-NEXT: movw r2, #32767 -; VFP2-NEXT: subs r3, r0, r2 +; VFP2-NEXT: subs r2, r0, r2 ; VFP2-NEXT: sbcs r1, r1, #0 ; VFP2-NEXT: it ge -; VFP2-NEXT: movge r0, r2 +; VFP2-NEXT: movwge r0, #32767 ; VFP2-NEXT: movw r1, #32768 ; VFP2-NEXT: cmn.w r0, #32768 ; VFP2-NEXT: movt r1, #65535 @@ -3967,12 +3929,13 @@ define i32 @stest_f32i32i64(float %x) { ; FULL-NEXT: vmov r0, s0 ; FULL-NEXT: bl __aeabi_f2lz ; FULL-NEXT: movw r2, #32767 -; FULL-NEXT: subs r3, r0, r2 +; FULL-NEXT: subs r2, r0, r2 ; FULL-NEXT: sbcs r1, r1, #0 -; FULL-NEXT: csel r0, r0, r2, lt +; FULL-NEXT: it ge +; FULL-NEXT: movwge r0, #32767 ; FULL-NEXT: movw r1, #32768 -; FULL-NEXT: movt r1, #65535 ; FULL-NEXT: cmn.w r0, #32768 +; FULL-NEXT: movt r1, #65535 ; FULL-NEXT: csel r0, r0, r1, gt ; FULL-NEXT: pop {r7, pc} entry: diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 96f009a4da02d..1d75bef771f1c 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -20,43 +20,39 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov.32 d9[0], r4 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: mvn r3, #-2147483648 -; CHECK-NEXT: subs r4, r4, r3 +; CHECK-NEXT: mvn r12, #-2147483648 +; CHECK-NEXT: subs r3, r4, r12 ; CHECK-NEXT: adr r2, .LCPI0_0 ; CHECK-NEXT: vmov.32 d9[1], r5 -; CHECK-NEXT: sbcs r5, r5, #0 +; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: mvn r4, #0 +; CHECK-NEXT: mvn r3, #0 ; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mvnne r5, #0 -; CHECK-NEXT: subs r0, r0, r3 -; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vmov.32 d8[1], r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vmov.i32 q10, #0x80000000 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movlt r5, r3 +; CHECK-NEXT: subs r0, r0, r12 +; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vdup.32 d19, r5 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128] +; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: vmov.i32 q10, #0x80000000 ; CHECK-NEXT: vdup.32 d18, r0 ; CHECK-NEXT: vbit q8, q4, q9 ; CHECK-NEXT: vmov r0, r1, d17 -; CHECK-NEXT: vmov r3, r5, d16 +; CHECK-NEXT: vmov r5, r4, d16 ; CHECK-NEXT: rsbs r0, r0, #-2147483648 -; CHECK-NEXT: sbcs r0, r4, r1 +; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: rsbs r1, r3, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r3, r4 ; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: movlt r2, r3 ; CHECK-NEXT: vdup.32 d18, r2 ; CHECK-NEXT: vbif q8, q10, q9 ; CHECK-NEXT: vmovn.i64 d0, q8 @@ -101,14 +97,12 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mvnne r5, #0 +; CHECK-NEXT: movlo r5, r3 ; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: vdup.32 d17, r5 ; CHECK-NEXT: movwlo r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: movlo r2, r3 ; CHECK-NEXT: vdup.32 d16, r2 ; CHECK-NEXT: vand q9, q4, q8 ; CHECK-NEXT: vorn q8, q9, q8 @@ -147,32 +141,28 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mvnne r5, #0 +; CHECK-NEXT: vmov.32 d8[1], r1 +; CHECK-NEXT: movlt r5, r3 ; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vmov.32 d8[1], r1 +; CHECK-NEXT: vdup.32 d17, r5 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vdup.32 d17, r5 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r3 ; CHECK-NEXT: vdup.32 d16, r0 ; CHECK-NEXT: vbsl q8, q4, q9 ; CHECK-NEXT: vmov r0, r1, d17 -; CHECK-NEXT: vmov r3, r5, d16 +; CHECK-NEXT: vmov r5, r4, d16 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: rsbs r1, r3, #0 -; CHECK-NEXT: rscs r1, r5, #0 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: rsbs r1, r5, #0 +; CHECK-NEXT: rscs r1, r4, #0 ; CHECK-NEXT: vmov.32 d19[0], r0 ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: movlt r2, r3 ; CHECK-NEXT: vmov.32 d18[0], r2 ; CHECK-NEXT: vand q8, q9, q8 ; CHECK-NEXT: vmovn.i64 d0, q8 @@ -195,103 +185,95 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: adr r1, .LCPI3_0 -; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128] -; CHECK-NEXT: vmov r5, s17 -; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: adr r3, .LCPI3_0 ; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: vmov.32 d13[0], r6 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r6, r9 -; CHECK-NEXT: vmov.32 d12[0], r0 -; CHECK-NEXT: sbcs r2, r7, #0 +; CHECK-NEXT: vld1.64 {d10, d11}, [r3:128] +; CHECK-NEXT: subs r3, r5, r9 +; CHECK-NEXT: sbcs r3, r6, #0 +; CHECK-NEXT: mvn r4, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: vmov.32 d13[0], r2 +; CHECK-NEXT: movwlt r3, #1 ; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: vmov.32 d13[1], r7 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vdup.32 d17, r2 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vmov.32 d12[1], r1 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d16, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vorr q4, q8, q8 -; CHECK-NEXT: vbsl q4, q6, q5 +; CHECK-NEXT: vmov.32 d12[0], r5 +; CHECK-NEXT: movlt r3, r4 +; CHECK-NEXT: subs r2, r2, r9 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: vmov.32 d13[1], r1 +; CHECK-NEXT: sbcs r1, r1, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: vmov.32 d12[1], r6 +; CHECK-NEXT: movlt r1, r4 +; CHECK-NEXT: vdup.32 d9, r1 +; CHECK-NEXT: vdup.32 d8, r3 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vmov.32 d13[0], r0 +; CHECK-NEXT: vmov.32 d15[0], r0 ; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: vbsl q4, q6, q5 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: movlt r6, r4 ; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: vmov.32 d13[1], r1 -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: vmov r5, r7, d9 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vmov.32 d12[0], r0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: subs r2, r0, r9 +; CHECK-NEXT: vmov.32 d15[1], r5 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: vmov.32 d14[0], r0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vdup.32 d17, r6 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vmov.32 d12[1], r1 -; CHECK-NEXT: rsbs r3, r11, #-2147483648 +; CHECK-NEXT: vdup.32 d17, r6 +; CHECK-NEXT: movlt r0, r4 +; CHECK-NEXT: vmov.32 d14[1], r1 +; CHECK-NEXT: rsbs r6, r11, #-2147483648 ; CHECK-NEXT: vdup.32 d16, r0 -; CHECK-NEXT: mvn r0, #0 -; CHECK-NEXT: vbsl q8, q6, q5 -; CHECK-NEXT: adr r1, .LCPI3_1 -; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] -; CHECK-NEXT: sbcs r3, r0, r10 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: vmov r1, r2, d17 -; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvnne r3, #0 -; CHECK-NEXT: rsbs r6, r5, #-2147483648 -; CHECK-NEXT: vmov r6, r5, d16 -; CHECK-NEXT: sbcs r7, r0, r7 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: vdup.32 d23, r7 -; CHECK-NEXT: vdup.32 d22, r3 +; CHECK-NEXT: sbcs r6, r4, r10 +; CHECK-NEXT: vbsl q8, q7, q5 +; CHECK-NEXT: adr r0, .LCPI3_1 +; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: vmov r0, r1, d17 +; CHECK-NEXT: movlt r6, r4 +; CHECK-NEXT: rsbs r2, r2, #-2147483648 +; CHECK-NEXT: sbcs r2, r4, r3 +; CHECK-NEXT: vmov r3, r5, d16 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: movlt r2, r4 +; CHECK-NEXT: vdup.32 d23, r2 +; CHECK-NEXT: vdup.32 d22, r6 ; CHECK-NEXT: vbsl q11, q4, q9 +; CHECK-NEXT: rsbs r0, r0, #-2147483648 ; CHECK-NEXT: vmovn.i64 d1, q11 -; CHECK-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-NEXT: sbcs r1, r0, r2 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: rsbs r2, r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r0, r5 -; CHECK-NEXT: vdup.32 d21, r1 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d20, r4 +; CHECK-NEXT: sbcs r0, r4, r1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: movlt r0, r4 +; CHECK-NEXT: vdup.32 d21, r0 +; CHECK-NEXT: rsbs r1, r3, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: movlt r7, r4 +; CHECK-NEXT: vdup.32 d20, r7 ; CHECK-NEXT: vbif q8, q9, q10 ; CHECK-NEXT: vmovn.i64 d0, q8 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 @@ -350,34 +332,30 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlo r0, r3 ; CHECK-NEXT: subs r1, r5, r3 ; CHECK-NEXT: sbcs r1, r4, #0 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlo r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: movlo r1, r3 ; CHECK-NEXT: subs r7, r10, r3 ; CHECK-NEXT: sbcs r7, r8, #0 ; CHECK-NEXT: vdup.32 d19, r1 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: vdup.32 d18, r0 ; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: subs r3, r6, r3 -; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: vand q10, q5, q9 +; CHECK-NEXT: movlo r7, r3 +; CHECK-NEXT: subs r6, r6, r3 +; CHECK-NEXT: sbcs r6, r9, #0 ; CHECK-NEXT: vdup.32 d17, r7 ; CHECK-NEXT: movwlo r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vand q10, q5, q9 +; CHECK-NEXT: vorn q9, q10, q9 +; CHECK-NEXT: movlo r2, r3 ; CHECK-NEXT: vdup.32 d16, r2 ; CHECK-NEXT: vand q11, q4, q8 -; CHECK-NEXT: vorn q9, q10, q9 -; CHECK-NEXT: vorn q8, q11, q8 ; CHECK-NEXT: vmovn.i64 d1, q9 +; CHECK-NEXT: vorn q8, q11, q8 ; CHECK-NEXT: vmovn.i64 d0, q8 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} @@ -394,100 +372,94 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: vmov r5, s17 -; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: vmov.32 d9[0], r6 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: vmov r4, s18 +; CHECK-NEXT: vmov.32 d11[0], r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: subs r2, r6, r9 -; CHECK-NEXT: sbcs r2, r7, #0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vmov.32 d9[1], r7 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 d8[1], r1 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d17, r2 -; CHECK-NEXT: vdup.32 d16, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vbif q4, q5, q8 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov.32 d13[0], r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vmov r7, r10, d8 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov.32 d9[0], r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r5, r9 -; CHECK-NEXT: vmov.32 d12[0], r0 -; CHECK-NEXT: sbcs r2, r6, #0 +; CHECK-NEXT: vmov.32 d9[1], r10 +; CHECK-NEXT: mvn r3, #0 +; CHECK-NEXT: subs r5, r5, r3 +; CHECK-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEXT: vmov.32 d8[1], r7 +; CHECK-NEXT: sbcs r7, r7, #0 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: vmov.32 d13[1], r6 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: vmov.32 d10[0], r0 +; CHECK-NEXT: movlt r7, r3 +; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vdup.32 d17, r2 +; CHECK-NEXT: vmov.32 d11[1], r9 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vmov.32 d12[1], r1 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vdup.32 d16, r0 -; CHECK-NEXT: rsbs r7, r7, #0 -; CHECK-NEXT: vbsl q8, q6, q5 -; CHECK-NEXT: rscs r7, r10, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov r0, r1, d17 -; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: vmov r6, r5, d16 +; CHECK-NEXT: vmov.32 d10[1], r1 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: subs r1, r8, r3 +; CHECK-NEXT: sbcs r1, r9, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: movlt r1, r3 +; CHECK-NEXT: subs r6, r6, r3 +; CHECK-NEXT: vdup.32 d19, r1 +; CHECK-NEXT: sbcs r6, r10, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: vbsl q9, q5, q8 +; CHECK-NEXT: movlt r6, r3 +; CHECK-NEXT: vdup.32 d21, r6 +; CHECK-NEXT: vmov r0, r1, d18 +; CHECK-NEXT: vdup.32 d20, r7 +; CHECK-NEXT: vbit q8, q4, q10 +; CHECK-NEXT: vmov r5, r4, d19 +; CHECK-NEXT: vmov r7, r6, d17 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: rsbs r1, r2, #0 -; CHECK-NEXT: rscs r1, r3, #0 -; CHECK-NEXT: vmov.32 d19[0], r0 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: rsbs r1, r7, #0 +; CHECK-NEXT: rscs r1, r6, #0 +; CHECK-NEXT: vmov r7, r6, d16 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: rsbs r0, r6, #0 -; CHECK-NEXT: rscs r0, r5, #0 +; CHECK-NEXT: movlt r1, r3 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: rscs r5, r4, #0 ; CHECK-NEXT: vmov.32 d21[0], r1 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov.32 d20[0], r7 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vmov.32 d18[0], r4 -; CHECK-NEXT: vand q10, q10, q4 -; CHECK-NEXT: vand q8, q9, q8 -; CHECK-NEXT: vmovn.i64 d1, q10 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: movlt r5, r3 +; CHECK-NEXT: vmov.32 d23[0], r5 +; CHECK-NEXT: vmov.32 d22[0], r0 +; CHECK-NEXT: vand q9, q11, q9 +; CHECK-NEXT: vmovn.i64 d1, q9 +; CHECK-NEXT: rsbs r1, r7, #0 +; CHECK-NEXT: rscs r1, r6, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: movlt r2, r3 +; CHECK-NEXT: vmov.32 d20[0], r2 +; CHECK-NEXT: vand q8, q10, q8 ; CHECK-NEXT: vmovn.i64 d0, q8 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -506,108 +478,101 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEON-NEXT: .pad #4 ; CHECK-NEON-NEXT: sub sp, sp, #4 -; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEON-NEXT: vmov r0, s2 +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEON-NEXT: vmov r0, s1 ; CHECK-NEON-NEXT: vmov.f32 s16, s3 -; CHECK-NEON-NEXT: vmov.f32 s18, s1 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: mov r10, r0 ; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: mov r5, r1 +; CHECK-NEON-NEXT: mov r9, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov.32 d13[0], r10 ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: vmov r2, s18 -; CHECK-NEON-NEXT: adr r3, .LCPI6_0 -; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128] -; CHECK-NEON-NEXT: mvn r9, #-2147483648 -; CHECK-NEON-NEXT: subs r3, r6, r9 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: sbcs r3, r5, #0 -; CHECK-NEON-NEXT: vmov.32 d15[0], r0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: mvnne r4, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d14[0], r6 -; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r7, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: mov r6, r1 +; CHECK-NEON-NEXT: adr r1, .LCPI6_0 +; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r1:128] ; CHECK-NEON-NEXT: vmov r8, s20 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: vmov.32 d15[1], r1 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: vdup.32 d11, r0 -; CHECK-NEON-NEXT: vmov.32 d14[1], r5 -; CHECK-NEON-NEXT: mov r0, r2 +; CHECK-NEON-NEXT: vmov.32 d11[0], r7 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vdup.32 d10, r4 ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: vmov.32 d13[0], r0 -; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: vbsl q5, q7, q4 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: mvn r11, #-2147483648 +; CHECK-NEON-NEXT: subs r0, r0, r11 +; CHECK-NEON-NEXT: mvn r5, #0 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 +; CHECK-NEON-NEXT: vmov.32 d11[1], r6 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: vmov.32 d10[1], r1 +; CHECK-NEON-NEXT: movlt r0, r5 +; CHECK-NEON-NEXT: subs r1, r7, r11 +; CHECK-NEON-NEXT: sbcs r1, r6, #0 ; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: mov r0, r8 +; CHECK-NEON-NEXT: mov r1, #0 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: movlt r1, r5 +; CHECK-NEON-NEXT: vdup.32 d17, r1 +; CHECK-NEON-NEXT: vdup.32 d16, r0 +; CHECK-NEON-NEXT: subs r0, r10, r11 +; CHECK-NEON-NEXT: vbif q5, q4, q8 +; CHECK-NEON-NEXT: sbcs r0, r9, #0 ; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov r11, r10, d10 -; CHECK-NEON-NEXT: vmov.32 d13[1], r1 -; CHECK-NEON-NEXT: mvnne r6, #0 -; CHECK-NEON-NEXT: vmov r5, r4, d11 +; CHECK-NEON-NEXT: mov r0, r8 +; CHECK-NEON-NEXT: movlt r6, r5 +; CHECK-NEON-NEXT: vmov r7, r10, d10 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: subs r2, r0, r11 +; CHECK-NEON-NEXT: vmov.32 d13[1], r9 +; CHECK-NEON-NEXT: vmov r2, r3, d11 ; CHECK-NEON-NEXT: vmov.32 d12[0], r0 -; CHECK-NEON-NEXT: subs r0, r0, r9 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: vdup.32 d17, r6 ; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 +; CHECK-NEON-NEXT: vdup.32 d17, r6 +; CHECK-NEON-NEXT: movlt r0, r5 ; CHECK-NEON-NEXT: vmov.32 d12[1], r1 -; CHECK-NEON-NEXT: rsbs r3, r11, #-2147483648 +; CHECK-NEON-NEXT: rsbs r7, r7, #-2147483648 ; CHECK-NEON-NEXT: vdup.32 d16, r0 -; CHECK-NEON-NEXT: mvn r0, #0 +; CHECK-NEON-NEXT: sbcs r7, r5, r10 ; CHECK-NEON-NEXT: vbsl q8, q6, q4 -; CHECK-NEON-NEXT: adr r1, .LCPI6_1 -; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r1:128] -; CHECK-NEON-NEXT: sbcs r3, r0, r10 -; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: vmov r1, r2, d17 -; CHECK-NEON-NEXT: movwlt r3, #1 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mvnne r3, #0 -; CHECK-NEON-NEXT: rsbs r6, r5, #-2147483648 -; CHECK-NEON-NEXT: sbcs r6, r0, r4 -; CHECK-NEON-NEXT: vmov r5, r4, d16 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mvnne r6, #0 -; CHECK-NEON-NEXT: vdup.32 d23, r6 -; CHECK-NEON-NEXT: vdup.32 d22, r3 +; CHECK-NEON-NEXT: adr r0, .LCPI6_1 +; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r0:128] +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: vmov r0, r1, d17 +; CHECK-NEON-NEXT: movlt r7, r5 +; CHECK-NEON-NEXT: rsbs r2, r2, #-2147483648 +; CHECK-NEON-NEXT: sbcs r2, r5, r3 +; CHECK-NEON-NEXT: vmov r3, r6, d16 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: movlt r2, r5 +; CHECK-NEON-NEXT: vdup.32 d23, r2 +; CHECK-NEON-NEXT: vdup.32 d22, r7 ; CHECK-NEON-NEXT: vbsl q11, q5, q9 +; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648 ; CHECK-NEON-NEXT: vmovn.i64 d1, q11 -; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r0, r2 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: rsbs r2, r5, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r0, r4 -; CHECK-NEON-NEXT: vdup.32 d21, r1 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: mvnne r7, #0 -; CHECK-NEON-NEXT: vdup.32 d20, r7 +; CHECK-NEON-NEXT: sbcs r0, r5, r1 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: movlt r0, r5 +; CHECK-NEON-NEXT: vdup.32 d21, r0 +; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r5, r6 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: movlt r4, r5 +; CHECK-NEON-NEXT: vdup.32 d20, r4 ; CHECK-NEON-NEXT: vbif q8, q9, q10 ; CHECK-NEON-NEXT: vmovn.i64 d0, q8 -; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEON-NEXT: add sp, sp, #4 ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEON-NEXT: .p2align 4 @@ -627,104 +592,102 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-FP16-NEXT: .pad #8 +; CHECK-FP16-NEXT: sub sp, sp, #8 ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r8, d0[0] ; CHECK-FP16-NEXT: vmov.u16 r9, d0[1] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] ; CHECK-FP16-NEXT: mov r5, r1 -; CHECK-FP16-NEXT: vmov.32 d9[0], r4 +; CHECK-FP16-NEXT: vmov.32 d13[0], r4 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: adr r2, .LCPI6_0 -; CHECK-FP16-NEXT: mvn r10, #-2147483648 +; CHECK-FP16-NEXT: mvn r8, #-2147483648 ; CHECK-FP16-NEXT: vld1.64 {d10, d11}, [r2:128] -; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: subs r2, r4, r8 ; CHECK-FP16-NEXT: sbcs r2, r5, #0 -; CHECK-FP16-NEXT: vmov s0, r9 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: vmov.32 d12[0], r0 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: vmov.32 d8[0], r0 ; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: vmov.32 d13[1], r5 +; CHECK-FP16-NEXT: mvn r5, #0 +; CHECK-FP16-NEXT: movlt r2, r5 +; CHECK-FP16-NEXT: subs r0, r0, r8 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vmov.32 d9[1], r5 +; CHECK-FP16-NEXT: vstr s0, [sp, #4] @ 4-byte Spill ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: vmov s0, r9 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: vmov.32 d8[1], r1 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r2 -; CHECK-FP16-NEXT: vdup.32 d16, r0 -; CHECK-FP16-NEXT: vbif q4, q5, q8 +; CHECK-FP16-NEXT: vdup.32 d9, r2 +; CHECK-FP16-NEXT: movlt r0, r5 +; CHECK-FP16-NEXT: vmov.32 d12[1], r1 +; CHECK-FP16-NEXT: vdup.32 d8, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.32 d13[0], r0 -; CHECK-FP16-NEXT: subs r0, r0, r10 -; CHECK-FP16-NEXT: vmov s0, r8 -; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: vbif q6, q5, q4 +; CHECK-FP16-NEXT: vmov.32 d15[0], r0 +; CHECK-FP16-NEXT: subs r0, r0, r8 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: vmov r9, r8, d8 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: vldr s0, [sp, #4] @ 4-byte Reload ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov.32 d13[1], r1 -; CHECK-FP16-NEXT: vmov r5, r4, d9 -; CHECK-FP16-NEXT: mvnne r7, #0 +; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: movlt r7, r5 +; CHECK-FP16-NEXT: vmov r10, r9, d12 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.32 d12[0], r0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: subs r2, r0, r8 +; CHECK-FP16-NEXT: vmov.32 d15[1], r4 +; CHECK-FP16-NEXT: vmov r2, r3, d13 +; CHECK-FP16-NEXT: vmov.32 d14[0], r0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r7 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: vmov.32 d12[1], r1 -; CHECK-FP16-NEXT: rsbs r3, r9, #-2147483648 +; CHECK-FP16-NEXT: vdup.32 d17, r7 +; CHECK-FP16-NEXT: movlt r0, r5 +; CHECK-FP16-NEXT: vmov.32 d14[1], r1 +; CHECK-FP16-NEXT: rsbs r7, r10, #-2147483648 ; CHECK-FP16-NEXT: vdup.32 d16, r0 -; CHECK-FP16-NEXT: mvn r0, #0 -; CHECK-FP16-NEXT: vbsl q8, q6, q5 -; CHECK-FP16-NEXT: adr r1, .LCPI6_1 -; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r1:128] -; CHECK-FP16-NEXT: sbcs r3, r0, r8 -; CHECK-FP16-NEXT: mov r3, #0 -; CHECK-FP16-NEXT: vmov r1, r2, d17 -; CHECK-FP16-NEXT: movwlt r3, #1 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mvnne r3, #0 -; CHECK-FP16-NEXT: rsbs r7, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r7, r0, r4 -; CHECK-FP16-NEXT: vmov r5, r4, d16 +; CHECK-FP16-NEXT: sbcs r7, r5, r9 +; CHECK-FP16-NEXT: vbsl q8, q7, q5 +; CHECK-FP16-NEXT: adr r0, .LCPI6_1 +; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r0:128] ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: mvnne r7, #0 -; CHECK-FP16-NEXT: vdup.32 d23, r7 -; CHECK-FP16-NEXT: vdup.32 d22, r3 -; CHECK-FP16-NEXT: vbsl q11, q4, q9 +; CHECK-FP16-NEXT: vmov r0, r1, d17 +; CHECK-FP16-NEXT: movlt r7, r5 +; CHECK-FP16-NEXT: rsbs r2, r2, #-2147483648 +; CHECK-FP16-NEXT: sbcs r2, r5, r3 +; CHECK-FP16-NEXT: vmov r3, r4, d16 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: movlt r2, r5 +; CHECK-FP16-NEXT: vdup.32 d23, r2 +; CHECK-FP16-NEXT: vdup.32 d22, r7 +; CHECK-FP16-NEXT: vbsl q11, q6, q9 +; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648 ; CHECK-FP16-NEXT: vmovn.i64 d1, q11 -; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r0, r2 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: rsbs r2, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r0, r4 -; CHECK-FP16-NEXT: vdup.32 d21, r1 +; CHECK-FP16-NEXT: sbcs r0, r5, r1 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: movlt r0, r5 +; CHECK-FP16-NEXT: vdup.32 d21, r0 +; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r5, r4 ; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: mvnne r6, #0 +; CHECK-FP16-NEXT: movlt r6, r5 ; CHECK-FP16-NEXT: vdup.32 d20, r6 ; CHECK-FP16-NEXT: vbif q8, q9, q10 ; CHECK-FP16-NEXT: vmovn.i64 d0, q8 -; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: add sp, sp, #8 +; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-FP16-NEXT: .p2align 4 ; CHECK-FP16-NEXT: @ %bb.1: @@ -788,34 +751,30 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: movwlo r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 +; CHECK-NEON-NEXT: movlo r0, r3 ; CHECK-NEON-NEXT: subs r1, r4, r3 ; CHECK-NEON-NEXT: sbcs r1, r8, #0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlo r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvnne r1, #0 +; CHECK-NEON-NEXT: movlo r1, r3 ; CHECK-NEON-NEXT: subs r6, r6, r3 ; CHECK-NEON-NEXT: sbcs r6, r9, #0 ; CHECK-NEON-NEXT: vdup.32 d19, r1 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: vdup.32 d18, r0 ; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mvnne r6, #0 -; CHECK-NEON-NEXT: subs r3, r5, r3 -; CHECK-NEON-NEXT: sbcs r3, r7, #0 +; CHECK-NEON-NEXT: vand q10, q4, q9 +; CHECK-NEON-NEXT: movlo r6, r3 +; CHECK-NEON-NEXT: subs r5, r5, r3 +; CHECK-NEON-NEXT: sbcs r7, r7, #0 ; CHECK-NEON-NEXT: vdup.32 d17, r6 ; CHECK-NEON-NEXT: movwlo r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: vand q10, q4, q9 +; CHECK-NEON-NEXT: vorn q9, q10, q9 +; CHECK-NEON-NEXT: movlo r2, r3 ; CHECK-NEON-NEXT: vdup.32 d16, r2 ; CHECK-NEON-NEXT: vand q11, q6, q8 -; CHECK-NEON-NEXT: vorn q9, q10, q9 -; CHECK-NEON-NEXT: vorn q8, q11, q8 ; CHECK-NEON-NEXT: vmovn.i64 d1, q9 +; CHECK-NEON-NEXT: vorn q8, q11, q8 ; CHECK-NEON-NEXT: vmovn.i64 d0, q8 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: vpop {d12, d13} @@ -856,34 +815,30 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movwlo r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mvnne r0, #0 +; CHECK-FP16-NEXT: movlo r0, r3 ; CHECK-FP16-NEXT: subs r1, r5, r3 ; CHECK-FP16-NEXT: sbcs r1, r7, #0 ; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: movwlo r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvnne r1, #0 +; CHECK-FP16-NEXT: movlo r1, r3 ; CHECK-FP16-NEXT: subs r7, r4, r3 ; CHECK-FP16-NEXT: sbcs r7, r8, #0 ; CHECK-FP16-NEXT: vdup.32 d19, r1 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: vdup.32 d18, r0 ; CHECK-FP16-NEXT: movwlo r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: mvnne r7, #0 -; CHECK-FP16-NEXT: subs r3, r6, r3 -; CHECK-FP16-NEXT: sbcs r3, r9, #0 +; CHECK-FP16-NEXT: vand q10, q4, q9 +; CHECK-FP16-NEXT: movlo r7, r3 +; CHECK-FP16-NEXT: subs r6, r6, r3 +; CHECK-FP16-NEXT: sbcs r6, r9, #0 ; CHECK-FP16-NEXT: vdup.32 d17, r7 ; CHECK-FP16-NEXT: movwlo r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: vand q10, q4, q9 +; CHECK-FP16-NEXT: vorn q9, q10, q9 +; CHECK-FP16-NEXT: movlo r2, r3 ; CHECK-FP16-NEXT: vdup.32 d16, r2 ; CHECK-FP16-NEXT: vand q11, q5, q8 -; CHECK-FP16-NEXT: vorn q9, q10, q9 -; CHECK-FP16-NEXT: vorn q8, q11, q8 ; CHECK-FP16-NEXT: vmovn.i64 d1, q9 +; CHECK-FP16-NEXT: vorn q8, q11, q8 ; CHECK-FP16-NEXT: vmovn.i64 d0, q8 ; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -900,209 +855,200 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEON: @ %bb.0: @ %entry ; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEON-NEXT: vmov r0, s3 ; CHECK-NEON-NEXT: vmov.f32 s16, s2 ; CHECK-NEON-NEXT: vmov.f32 s18, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: mov r6, r1 +; CHECK-NEON-NEXT: mov r8, r0 +; CHECK-NEON-NEXT: vmov r0, s20 +; CHECK-NEON-NEXT: mov r9, r1 +; CHECK-NEON-NEXT: vmov r6, s18 +; CHECK-NEON-NEXT: vmov r10, s16 +; CHECK-NEON-NEXT: vmov.32 d9[0], r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vmov r8, s20 -; CHECK-NEON-NEXT: vmov.32 d13[0], r5 ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: vmov r2, s18 -; CHECK-NEON-NEXT: vmov.32 d12[0], r0 -; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d13[1], r6 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov.32 d12[1], r1 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: subs r1, r5, r9 -; CHECK-NEON-NEXT: sbcs r1, r6, #0 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: vdup.32 d9, r1 -; CHECK-NEON-NEXT: vdup.32 d8, r0 -; CHECK-NEON-NEXT: mov r0, r2 +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov.32 d10[0], r0 +; CHECK-NEON-NEXT: mov r0, r6 +; CHECK-NEON-NEXT: mov r5, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEON-NEXT: vbsl q4, q6, q5 ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: vmov.32 d13[0], r0 -; CHECK-NEON-NEXT: mov r0, r8 -; CHECK-NEON-NEXT: mov r6, r1 -; CHECK-NEON-NEXT: vmov r4, r10, d8 +; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: vmov.32 d11[0], r0 +; CHECK-NEON-NEXT: mov r0, r10 +; CHECK-NEON-NEXT: mov r7, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r5, r9 -; CHECK-NEON-NEXT: vmov.32 d12[0], r0 -; CHECK-NEON-NEXT: sbcs r2, r6, #0 +; CHECK-NEON-NEXT: vmov.32 d11[1], r7 +; CHECK-NEON-NEXT: mvn r3, #0 +; CHECK-NEON-NEXT: subs r4, r4, r3 +; CHECK-NEON-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-NEON-NEXT: vmov.32 d10[1], r5 +; CHECK-NEON-NEXT: sbcs r5, r5, #0 +; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: vmov.32 d13[1], r6 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: vmov.32 d8[0], r0 +; CHECK-NEON-NEXT: movlt r5, r3 +; CHECK-NEON-NEXT: subs r0, r0, r3 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vdup.32 d17, r2 +; CHECK-NEON-NEXT: vmov.32 d9[1], r9 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: vmov.32 d12[1], r1 ; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: vmov r2, r3, d9 -; CHECK-NEON-NEXT: vdup.32 d16, r0 -; CHECK-NEON-NEXT: rsbs r6, r4, #0 -; CHECK-NEON-NEXT: vbsl q8, q6, q5 -; CHECK-NEON-NEXT: rscs r6, r10, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov r0, r1, d17 -; CHECK-NEON-NEXT: mvnne r6, #0 -; CHECK-NEON-NEXT: vmov r5, r4, d16 +; CHECK-NEON-NEXT: vmov.32 d8[1], r1 +; CHECK-NEON-NEXT: movlt r0, r3 +; CHECK-NEON-NEXT: subs r1, r8, r3 +; CHECK-NEON-NEXT: sbcs r1, r9, #0 +; CHECK-NEON-NEXT: mov r1, #0 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: movlt r1, r3 +; CHECK-NEON-NEXT: subs r6, r6, r3 +; CHECK-NEON-NEXT: sbcs r7, r7, #0 +; CHECK-NEON-NEXT: vdup.32 d19, r1 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: vdup.32 d18, r0 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: vbsl q9, q4, q8 +; CHECK-NEON-NEXT: movlt r7, r3 +; CHECK-NEON-NEXT: vdup.32 d21, r7 +; CHECK-NEON-NEXT: vdup.32 d20, r5 +; CHECK-NEON-NEXT: vmov r0, r1, d18 +; CHECK-NEON-NEXT: vbit q8, q5, q10 +; CHECK-NEON-NEXT: vmov r5, r4, d19 +; CHECK-NEON-NEXT: vmov r7, r6, d17 ; CHECK-NEON-NEXT: rsbs r0, r0, #0 ; CHECK-NEON-NEXT: rscs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: rsbs r1, r2, #0 -; CHECK-NEON-NEXT: rscs r1, r3, #0 -; CHECK-NEON-NEXT: vmov.32 d19[0], r0 +; CHECK-NEON-NEXT: movlt r0, r3 +; CHECK-NEON-NEXT: rsbs r1, r7, #0 +; CHECK-NEON-NEXT: rscs r1, r6, #0 +; CHECK-NEON-NEXT: vmov r7, r6, d16 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: rsbs r0, r5, #0 -; CHECK-NEON-NEXT: rscs r0, r4, #0 +; CHECK-NEON-NEXT: movlt r1, r3 +; CHECK-NEON-NEXT: rsbs r5, r5, #0 +; CHECK-NEON-NEXT: rscs r5, r4, #0 ; CHECK-NEON-NEXT: vmov.32 d21[0], r1 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: vmov.32 d20[0], r6 -; CHECK-NEON-NEXT: mvnne r7, #0 -; CHECK-NEON-NEXT: vmov.32 d18[0], r7 -; CHECK-NEON-NEXT: vand q10, q10, q4 -; CHECK-NEON-NEXT: vand q8, q9, q8 -; CHECK-NEON-NEXT: vmovn.i64 d1, q10 +; CHECK-NEON-NEXT: mov r5, #0 +; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: movlt r5, r3 +; CHECK-NEON-NEXT: vmov.32 d23[0], r5 +; CHECK-NEON-NEXT: vmov.32 d22[0], r0 +; CHECK-NEON-NEXT: vand q9, q11, q9 +; CHECK-NEON-NEXT: vmovn.i64 d1, q9 +; CHECK-NEON-NEXT: rsbs r1, r7, #0 +; CHECK-NEON-NEXT: rscs r1, r6, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: movlt r2, r3 +; CHECK-NEON-NEXT: vmov.32 d20[0], r2 +; CHECK-NEON-NEXT: vand q8, q10, q8 ; CHECK-NEON-NEXT: vmovn.i64 d0, q8 -; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: ustest_f16i32: -; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13} +; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13} +; CHECK-FP16-NEXT: .vsave {d8} +; CHECK-FP16-NEXT: vpush {d8} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r8, d0[0] -; CHECK-FP16-NEXT: vmov.u16 r9, d0[1] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r4, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] -; CHECK-FP16-NEXT: mov r5, r1 -; CHECK-FP16-NEXT: vmov.32 d9[0], r4 +; CHECK-FP16-NEXT: mov r8, r0 +; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] +; CHECK-FP16-NEXT: mov r9, r1 +; CHECK-FP16-NEXT: vmov.32 d13[0], r8 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mvn r10, #0 -; CHECK-FP16-NEXT: subs r2, r4, r10 -; CHECK-FP16-NEXT: sbcs r2, r5, #0 -; CHECK-FP16-NEXT: vmov.32 d8[0], r0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: vmov s0, r9 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 -; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vmov.32 d9[1], r5 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: vmov.32 d8[1], r1 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r2 -; CHECK-FP16-NEXT: vdup.32 d16, r0 -; CHECK-FP16-NEXT: vbif q4, q5, q8 -; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov s0, r8 +; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: mov r5, r1 -; CHECK-FP16-NEXT: vmov.32 d13[0], r0 -; CHECK-FP16-NEXT: vmov r7, r8, d8 +; CHECK-FP16-NEXT: vmov.32 d10[0], r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r4, r10 -; CHECK-FP16-NEXT: vmov.32 d12[0], r0 -; CHECK-FP16-NEXT: sbcs r2, r5, #0 +; CHECK-FP16-NEXT: mov r6, r0 +; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] +; CHECK-FP16-NEXT: mov r7, r1 +; CHECK-FP16-NEXT: vmov.32 d11[0], r6 +; CHECK-FP16-NEXT: vmov s0, r0 +; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.32 d11[1], r7 +; CHECK-FP16-NEXT: mvn r3, #0 +; CHECK-FP16-NEXT: subs r4, r4, r3 +; CHECK-FP16-NEXT: vmov.i64 q8, #0xffffffff +; CHECK-FP16-NEXT: vmov.32 d10[1], r5 +; CHECK-FP16-NEXT: sbcs r5, r5, #0 +; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: vmov.32 d13[1], r5 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: vmov.32 d12[0], r0 +; CHECK-FP16-NEXT: movlt r5, r3 +; CHECK-FP16-NEXT: subs r0, r0, r3 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r2 +; CHECK-FP16-NEXT: vmov.32 d13[1], r9 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: vmov.32 d12[1], r1 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: vmov r2, r3, d9 -; CHECK-FP16-NEXT: vdup.32 d16, r0 -; CHECK-FP16-NEXT: rsbs r7, r7, #0 -; CHECK-FP16-NEXT: vbsl q8, q6, q5 -; CHECK-FP16-NEXT: rscs r7, r8, #0 +; CHECK-FP16-NEXT: vmov.32 d12[1], r1 +; CHECK-FP16-NEXT: movlt r0, r3 +; CHECK-FP16-NEXT: subs r1, r8, r3 +; CHECK-FP16-NEXT: sbcs r1, r9, #0 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: movwlt r1, #1 +; CHECK-FP16-NEXT: movlt r1, r3 +; CHECK-FP16-NEXT: subs r6, r6, r3 +; CHECK-FP16-NEXT: sbcs r7, r7, #0 +; CHECK-FP16-NEXT: vdup.32 d19, r1 ; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: vdup.32 d18, r0 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov r0, r1, d17 -; CHECK-FP16-NEXT: mvnne r7, #0 -; CHECK-FP16-NEXT: vmov r5, r4, d16 +; CHECK-FP16-NEXT: vbsl q9, q6, q8 +; CHECK-FP16-NEXT: movlt r7, r3 +; CHECK-FP16-NEXT: vdup.32 d21, r7 +; CHECK-FP16-NEXT: vdup.32 d20, r5 +; CHECK-FP16-NEXT: vmov r0, r1, d18 +; CHECK-FP16-NEXT: vbit q8, q5, q10 +; CHECK-FP16-NEXT: vmov r5, r4, d19 +; CHECK-FP16-NEXT: vmov r7, r6, d17 ; CHECK-FP16-NEXT: rsbs r0, r0, #0 ; CHECK-FP16-NEXT: rscs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: rsbs r1, r2, #0 -; CHECK-FP16-NEXT: rscs r1, r3, #0 -; CHECK-FP16-NEXT: vmov.32 d19[0], r0 +; CHECK-FP16-NEXT: movlt r0, r3 +; CHECK-FP16-NEXT: rsbs r1, r7, #0 +; CHECK-FP16-NEXT: rscs r1, r6, #0 +; CHECK-FP16-NEXT: vmov r7, r6, d16 ; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: rsbs r0, r5, #0 -; CHECK-FP16-NEXT: rscs r0, r4, #0 +; CHECK-FP16-NEXT: movlt r1, r3 +; CHECK-FP16-NEXT: rsbs r5, r5, #0 +; CHECK-FP16-NEXT: rscs r5, r4, #0 ; CHECK-FP16-NEXT: vmov.32 d21[0], r1 -; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: vmov.32 d20[0], r7 -; CHECK-FP16-NEXT: mvnne r6, #0 -; CHECK-FP16-NEXT: vmov.32 d18[0], r6 -; CHECK-FP16-NEXT: vand q10, q10, q4 -; CHECK-FP16-NEXT: vand q8, q9, q8 -; CHECK-FP16-NEXT: vmovn.i64 d1, q10 +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: movlt r5, r3 +; CHECK-FP16-NEXT: vmov.32 d23[0], r5 +; CHECK-FP16-NEXT: vmov.32 d22[0], r0 +; CHECK-FP16-NEXT: vand q9, q11, q9 +; CHECK-FP16-NEXT: vmovn.i64 d1, q9 +; CHECK-FP16-NEXT: rsbs r1, r7, #0 +; CHECK-FP16-NEXT: rscs r1, r6, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: movlt r2, r3 +; CHECK-FP16-NEXT: vmov.32 d20[0], r2 +; CHECK-FP16-NEXT: vand q8, q10, q8 ; CHECK-FP16-NEXT: vmovn.i64 d0, q8 -; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: vpop {d8} +; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -1617,8 +1563,8 @@ entry: define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 @@ -1627,55 +1573,50 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r8 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mvnge r4, #-2147483648 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movlt r1, r2 ; CHECK-NEXT: rsbs r2, r0, #0 ; CHECK-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEXT: sbcs r1, r9, r1 ; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 +; CHECK-NEXT: movge r4, #-2147483648 +; CHECK-NEXT: movlt r7, r0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r6, r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r1, r8 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvnge r1, #-2147483648 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movlt r6, r2 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEXT: sbcs r2, r9, r6 +; CHECK-NEXT: sbcs r2, r9, r3 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: movge r1, #-2147483648 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1703,17 +1644,15 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movhs r4, r6 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: movhs r0, r5 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 @@ -1744,41 +1683,37 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movge r2, r8 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movge r4, r1 +; CHECK-NEXT: movlt r1, r0 ; CHECK-NEXT: rsbs r0, r1, #0 ; CHECK-NEXT: rscs r0, r4, #0 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: rscs r0, r3, #0 +; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: movne r7, r1 +; CHECK-NEXT: movge r4, r7 +; CHECK-NEXT: movlt r7, r1 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r6, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movlt r8, r2 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r3, r2 -; CHECK-NEXT: moveq r1, r2 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movge r1, r6 +; CHECK-NEXT: movlt r6, r0 +; CHECK-NEXT: rsbs r0, r6, #0 ; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: rscs r0, r8, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r2, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: movge r6, r5 +; CHECK-NEXT: movlt r5, r1 +; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} @@ -1796,8 +1731,8 @@ entry: define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 @@ -1806,55 +1741,50 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r8 ; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mvnge r4, #-2147483648 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movlt r1, r2 ; CHECK-NEXT: rsbs r2, r0, #0 ; CHECK-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEXT: sbcs r1, r9, r1 ; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 +; CHECK-NEXT: movge r4, #-2147483648 +; CHECK-NEXT: movlt r7, r0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r6, r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r1, r8 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvnge r1, #-2147483648 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movlt r6, r2 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEXT: sbcs r2, r9, r6 +; CHECK-NEXT: sbcs r2, r9, r3 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: movge r1, #-2147483648 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1882,17 +1812,15 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movhs r4, r6 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: movhs r0, r5 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 @@ -1919,45 +1847,41 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #1 -; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: movge r2, r8 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r8, #1 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: movge r2, r8 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movge r4, r1 +; CHECK-NEXT: movlt r1, r0 ; CHECK-NEXT: rsbs r0, r1, #0 ; CHECK-NEXT: rscs r0, r4, #0 -; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: rscs r0, r2, #0 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: movne r7, r1 +; CHECK-NEXT: movge r4, r7 +; CHECK-NEXT: movlt r7, r1 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r6, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movlt r8, r2 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r3, r2 -; CHECK-NEXT: moveq r1, r2 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movge r1, r6 +; CHECK-NEXT: movlt r6, r0 +; CHECK-NEXT: rsbs r0, r6, #0 ; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: rscs r0, r8, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r2, r5 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: movge r6, r5 +; CHECK-NEXT: movlt r5, r1 +; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} @@ -1975,14 +1899,14 @@ entry: define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r8, r0 +; CHECK-NEON-NEXT: mov r5, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 @@ -1990,60 +1914,55 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 ; CHECK-NEON-NEXT: subs r1, r0, r9 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 -; CHECK-NEON-NEXT: vmov s0, r8 +; CHECK-NEON-NEXT: mvn r8, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r4, r8 +; CHECK-NEON-NEXT: vmov s0, r5 ; CHECK-NEON-NEXT: sbcs r1, r2, #0 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #-2147483648 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: mvnge r4, #-2147483648 ; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: movne r1, r2 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: moveq r0, r9 +; CHECK-NEON-NEXT: mvnge r0, #0 +; CHECK-NEON-NEXT: movge r3, r1 +; CHECK-NEON-NEXT: movlt r1, r2 ; CHECK-NEON-NEXT: rsbs r2, r0, #0 ; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEON-NEXT: sbcs r1, r9, r1 ; CHECK-NEON-NEXT: sbcs r1, r9, r3 ; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: moveq r4, r8 +; CHECK-NEON-NEXT: movge r4, #-2147483648 +; CHECK-NEON-NEXT: movlt r5, r0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: subs r7, r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r7, r1, r6 +; CHECK-NEON-NEXT: sbcs r7, r1, r8 ; CHECK-NEON-NEXT: sbcs r7, r2, #0 ; CHECK-NEON-NEXT: sbcs r7, r3, #0 ; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: mvnge r1, #-2147483648 ; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r3, r7 -; CHECK-NEON-NEXT: movne r7, r2 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r7 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: moveq r6, r8 -; CHECK-NEON-NEXT: vmov.32 d0[0], r10 +; CHECK-NEON-NEXT: mvnge r0, #0 +; CHECK-NEON-NEXT: movge r3, r7 +; CHECK-NEON-NEXT: movlt r7, r2 +; CHECK-NEON-NEXT: rsbs r2, r0, #0 +; CHECK-NEON-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEON-NEXT: sbcs r2, r9, r7 +; CHECK-NEON-NEXT: sbcs r2, r9, r3 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: movge r1, #-2147483648 +; CHECK-NEON-NEXT: movlt r6, r0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r6 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: vmov.32 d0[1], r1 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-FP16-LABEL: stest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 @@ -2051,54 +1970,49 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r9, #0 ; CHECK-FP16-NEXT: subs r1, r0, r9 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 +; CHECK-FP16-NEXT: mvn r8, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r4, r8 ; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #-2147483648 +; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: mvnge r4, #-2147483648 ; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: movne r1, r2 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: moveq r0, r9 +; CHECK-FP16-NEXT: mvnge r0, #0 +; CHECK-FP16-NEXT: movge r3, r1 +; CHECK-FP16-NEXT: movlt r1, r2 ; CHECK-FP16-NEXT: rsbs r2, r0, #0 ; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 ; CHECK-FP16-NEXT: sbcs r1, r9, r1 ; CHECK-FP16-NEXT: sbcs r1, r9, r3 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r0 -; CHECK-FP16-NEXT: moveq r4, r8 +; CHECK-FP16-NEXT: movge r4, #-2147483648 +; CHECK-FP16-NEXT: movlt r7, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: subs r6, r0, r9 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r1, r5 +; CHECK-FP16-NEXT: sbcs r6, r1, r8 ; CHECK-FP16-NEXT: sbcs r6, r2, #0 ; CHECK-FP16-NEXT: sbcs r6, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: mvnge r1, #-2147483648 ; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r3, r6 -; CHECK-FP16-NEXT: movne r6, r2 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r6 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r0 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: vmov.32 d0[0], r10 +; CHECK-FP16-NEXT: mvnge r0, #0 +; CHECK-FP16-NEXT: movge r3, r6 +; CHECK-FP16-NEXT: movlt r6, r2 +; CHECK-FP16-NEXT: rsbs r2, r0, #0 +; CHECK-FP16-NEXT: rscs r2, r1, #-2147483648 +; CHECK-FP16-NEXT: sbcs r2, r9, r6 +; CHECK-FP16-NEXT: sbcs r2, r9, r3 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: movge r1, #-2147483648 +; CHECK-FP16-NEXT: movlt r5, r0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: vmov.32 d0[1], r1 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -2131,17 +2045,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movwlo r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: movhs r4, r5 +; CHECK-NEON-NEXT: movlo r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti ; CHECK-NEON-NEXT: subs r2, r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r0, r6 -; CHECK-NEON-NEXT: movne r6, r1 +; CHECK-NEON-NEXT: movhs r0, r6 +; CHECK-NEON-NEXT: movlo r6, r1 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 ; CHECK-NEON-NEXT: vmov.32 d0[1], r6 @@ -2163,17 +2075,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: movne r6, r0 +; CHECK-FP16-NEXT: movhs r4, r6 +; CHECK-FP16-NEXT: movlo r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti ; CHECK-FP16-NEXT: subs r2, r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlo r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: movne r5, r1 +; CHECK-FP16-NEXT: movhs r0, r5 +; CHECK-FP16-NEXT: movlo r5, r1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 ; CHECK-FP16-NEXT: vmov.32 d0[1], r5 @@ -2204,47 +2114,43 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: subs r1, r2, #1 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #1 +; CHECK-NEON-NEXT: vmov s0, r5 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: movge r2, r8 +; CHECK-NEON-NEXT: mov r5, #1 ; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: moveq r4, r1 -; CHECK-NEON-NEXT: movne r1, r0 +; CHECK-NEON-NEXT: movge r2, r5 +; CHECK-NEON-NEXT: movge r3, r1 +; CHECK-NEON-NEXT: movge r4, r1 +; CHECK-NEON-NEXT: movlt r1, r0 ; CHECK-NEON-NEXT: rsbs r0, r1, #0 ; CHECK-NEON-NEXT: rscs r0, r4, #0 -; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: rscs r0, r2, #0 ; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: rscs r0, r2, #0 +; CHECK-NEON-NEXT: mov r8, #0 ; CHECK-NEON-NEXT: rscs r0, r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r4, r7 -; CHECK-NEON-NEXT: movne r7, r1 +; CHECK-NEON-NEXT: movge r4, r7 +; CHECK-NEON-NEXT: movlt r7, r1 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: subs r6, r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r7 ; CHECK-NEON-NEXT: sbcs r6, r3, #0 -; CHECK-NEON-NEXT: movlt r8, r2 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r3, r2 -; CHECK-NEON-NEXT: moveq r1, r2 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: rsbs r0, r2, #0 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: movlt r5, r2 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: movge r3, r6 +; CHECK-NEON-NEXT: movge r1, r6 +; CHECK-NEON-NEXT: movlt r6, r0 +; CHECK-NEON-NEXT: rsbs r0, r6, #0 ; CHECK-NEON-NEXT: rscs r0, r1, #0 -; CHECK-NEON-NEXT: rscs r0, r8, #0 +; CHECK-NEON-NEXT: rscs r0, r5, #0 ; CHECK-NEON-NEXT: rscs r0, r3, #0 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r2, r5 -; CHECK-NEON-NEXT: movne r5, r1 -; CHECK-NEON-NEXT: vmov.32 d0[0], r2 +; CHECK-NEON-NEXT: movwlt r8, #1 +; CHECK-NEON-NEXT: movge r6, r8 +; CHECK-NEON-NEXT: movlt r8, r1 +; CHECK-NEON-NEXT: vmov.32 d0[0], r6 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r5 +; CHECK-NEON-NEXT: vmov.32 d0[1], r8 ; CHECK-NEON-NEXT: vpop {d8} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} ; @@ -2253,53 +2159,49 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: subs r1, r2, #1 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #1 +; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: movge r2, r8 +; CHECK-FP16-NEXT: mov r6, #1 ; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: moveq r4, r1 -; CHECK-FP16-NEXT: movne r1, r0 +; CHECK-FP16-NEXT: movge r2, r6 +; CHECK-FP16-NEXT: movge r3, r1 +; CHECK-FP16-NEXT: movge r4, r1 +; CHECK-FP16-NEXT: movlt r1, r0 ; CHECK-FP16-NEXT: rsbs r0, r1, #0 ; CHECK-FP16-NEXT: rscs r0, r4, #0 -; CHECK-FP16-NEXT: vmov s0, r5 -; CHECK-FP16-NEXT: rscs r0, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: rscs r0, r2, #0 +; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: rscs r0, r3, #0 -; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r4, r7 -; CHECK-FP16-NEXT: movne r7, r1 +; CHECK-FP16-NEXT: movge r4, r7 +; CHECK-FP16-NEXT: movlt r7, r1 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r6, r2, #1 +; CHECK-FP16-NEXT: subs r5, r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r3, #0 -; CHECK-FP16-NEXT: movlt r8, r2 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r3, r2 -; CHECK-FP16-NEXT: moveq r1, r2 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: rsbs r0, r2, #0 +; CHECK-FP16-NEXT: sbcs r5, r3, #0 +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: movlt r6, r2 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: movge r3, r5 +; CHECK-FP16-NEXT: movge r1, r5 +; CHECK-FP16-NEXT: movlt r5, r0 +; CHECK-FP16-NEXT: rsbs r0, r5, #0 ; CHECK-FP16-NEXT: rscs r0, r1, #0 -; CHECK-FP16-NEXT: rscs r0, r8, #0 +; CHECK-FP16-NEXT: rscs r0, r6, #0 ; CHECK-FP16-NEXT: rscs r0, r3, #0 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r2, r5 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: vmov.32 d0[0], r2 +; CHECK-FP16-NEXT: movwlt r8, #1 +; CHECK-FP16-NEXT: movge r5, r8 +; CHECK-FP16-NEXT: movlt r8, r1 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: vmov.32 d0[1], r8 ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -2326,34 +2228,32 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: subs r2, r4, r6 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: sbcs r2, r8, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movge r4, r6 -; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: vmov r0, r2, d9 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: subs r3, r4, r5 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movge r4, r5 +; CHECK-NEXT: movwlt r3, #1 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: movlt r3, r1 +; CHECK-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-NEXT: sbcs r1, r7, r3 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movge r4, r8 +; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r6 -; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movlt r6, r0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movne r7, r1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r8 -; CHECK-NEXT: rsbs r2, r4, #-2147483648 -; CHECK-NEXT: mvn r1, #0 -; CHECK-NEXT: sbcs r2, r1, r5 -; CHECK-NEXT: movge r4, r0 -; CHECK-NEXT: rsbs r2, r6, #-2147483648 +; CHECK-NEXT: subs r2, r0, r5 ; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: sbcs r1, r1, r7 -; CHECK-NEXT: movge r6, r0 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: sbcs r2, r1, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: movlt r6, r1 +; CHECK-NEXT: rsbs r0, r5, #-2147483648 +; CHECK-NEXT: sbcs r0, r7, r6 +; CHECK-NEXT: movge r5, r8 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: @@ -2406,38 +2306,34 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r12, d9 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: subs r5, r0, r4 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: sbcs r5, r1, #0 +; CHECK-NEXT: mvn r5, #0 +; CHECK-NEXT: subs r3, r0, r5 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movge r0, r4 +; CHECK-NEXT: sbcs r3, r1, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: movge r0, r5 ; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: movlt r3, r1 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r3, #0 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movlt r6, r0 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r4 +; CHECK-NEXT: subs r2, r0, r5 ; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movlt r4, r0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, r1 -; CHECK-NEXT: rsbs r1, r4, #0 -; CHECK-NEXT: rscs r0, r0, #0 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: movlt r2, r1 +; CHECK-NEXT: rsbs r0, r5, #0 +; CHECK-NEXT: rscs r0, r2, #0 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: movlt r4, r5 +; CHECK-NEXT: vmov.32 d0[1], r4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: @@ -2451,81 +2347,68 @@ entry: define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, sp, #8 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vmov r2, s18 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: mov r3, #-2147483648 -; CHECK-NEXT: mvn r10, #0 -; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: subs r2, r11, r6 -; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movge r11, r6 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: rsbs r1, r11, #-2147483648 -; CHECK-NEXT: sbcs r1, r10, r2 -; CHECK-NEXT: movge r11, r3 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mvn r7, #-2147483648 +; CHECK-NEXT: subs r0, r0, r7 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movge r4, r7 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-NEXT: sbcs r0, r9, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: movge r4, r8 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r0, r6 +; CHECK-NEXT: subs r0, r0, r7 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: movge r5, r6 -; CHECK-NEXT: movwlt r9, #1 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: movne r9, r1 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: movlt r2, r1 +; CHECK-NEXT: movge r5, r7 +; CHECK-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r2 +; CHECK-NEXT: movge r5, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: subs r0, r0, r6 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: subs r0, r0, r7 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r8, #0 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: movge r7, r6 -; CHECK-NEXT: movwlt r8, #1 -; CHECK-NEXT: cmp r8, #0 -; CHECK-NEXT: movne r8, r1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movge r6, r7 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r9, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: movge r6, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: subs r2, r0, r7 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movlt r6, r0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r1 -; CHECK-NEXT: rsbs r0, r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r10, r4 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: movge r6, r1 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: movlt r7, r0 +; CHECK-NEXT: movlt r10, r1 ; CHECK-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-NEXT: sbcs r0, r10, r8 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: movge r7, r1 -; CHECK-NEXT: rsbs r0, r5, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r7 -; CHECK-NEXT: sbcs r0, r10, r9 -; CHECK-NEXT: movge r5, r1 -; CHECK-NEXT: vmov.32 d1[1], r11 -; CHECK-NEXT: vmov.32 d0[1], r5 -; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: vmov.32 d0[0], r5 +; CHECK-NEXT: sbcs r0, r9, r10 +; CHECK-NEXT: movge r7, r8 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2591,70 +2474,62 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: subs r3, r0, r6 +; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: subs r3, r0, r7 ; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: sbcs r3, r1, #0 -; CHECK-NEXT: vmov r8, s17 +; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movge r0, r6 +; CHECK-NEXT: movge r0, r7 ; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: vmov r9, s18 +; CHECK-NEXT: movlt r3, r1 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r3, #0 -; CHECK-NEXT: vmov r9, s18 +; CHECK-NEXT: vmov r8, s17 ; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: movlt r4, r0 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: subs r2, r0, r7 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movge r0, r6 +; CHECK-NEXT: movge r0, r7 ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: movlt r2, r1 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r2, #0 ; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r0 +; CHECK-NEXT: movlt r5, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 -; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: subs r2, r0, r7 +; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movge r0, r6 +; CHECK-NEXT: movge r0, r7 ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r2, r1 +; CHECK-NEXT: movlt r2, r1 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r2, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: movlt r6, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r6 -; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: subs r2, r0, r7 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movlt r6, r0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, r1 -; CHECK-NEXT: rsbs r1, r6, #0 -; CHECK-NEXT: rscs r0, r0, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r7, r0 +; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movlt r2, r1 +; CHECK-NEXT: rsbs r0, r7, #0 +; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movne r10, r6 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: movlt r10, r7 ; CHECK-NEXT: vmov.32 d0[1], r10 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} @@ -2669,164 +2544,142 @@ entry: define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} -; CHECK-NEON-NEXT: .pad #8 -; CHECK-NEON-NEXT: sub sp, sp, #8 ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s16, s2 -; CHECK-NEON-NEXT: vmov.f32 s18, s1 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: vmov r2, s16 -; CHECK-NEON-NEXT: mov r11, r0 -; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: mov r3, #-2147483648 -; CHECK-NEON-NEXT: mvn r10, #0 -; CHECK-NEON-NEXT: vmov r7, s20 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill -; CHECK-NEON-NEXT: subs r2, r11, r6 +; CHECK-NEON-NEXT: mov r4, r0 +; CHECK-NEON-NEXT: vmov r0, s20 +; CHECK-NEON-NEXT: mvn r7, #-2147483648 +; CHECK-NEON-NEXT: subs r2, r4, r7 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 +; CHECK-NEON-NEXT: mov r8, #-2147483648 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movge r11, r6 +; CHECK-NEON-NEXT: movge r4, r7 ; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: rsbs r1, r11, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r10, r2 -; CHECK-NEON-NEXT: movge r11, r3 +; CHECK-NEON-NEXT: mvn r9, #0 +; CHECK-NEON-NEXT: movlt r2, r1 +; CHECK-NEON-NEXT: rsbs r1, r4, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r2 +; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: movge r4, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: subs r0, r0, r6 +; CHECK-NEON-NEXT: subs r0, r0, r7 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: mov r8, #0 -; CHECK-NEON-NEXT: mov r0, r7 -; CHECK-NEON-NEXT: movge r5, r6 -; CHECK-NEON-NEXT: movwlt r8, #1 -; CHECK-NEON-NEXT: cmp r8, #0 -; CHECK-NEON-NEXT: movne r8, r1 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: movlt r2, r1 +; CHECK-NEON-NEXT: movge r5, r7 +; CHECK-NEON-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r2 +; CHECK-NEON-NEXT: movge r5, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r7, r0 -; CHECK-NEON-NEXT: subs r0, r0, r6 +; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: subs r0, r0, r7 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: mov r9, #0 -; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEON-NEXT: movge r7, r6 -; CHECK-NEON-NEXT: movwlt r9, #1 -; CHECK-NEON-NEXT: cmp r9, #0 -; CHECK-NEON-NEXT: movne r9, r1 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: movge r6, r7 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: movlt r0, r1 +; CHECK-NEON-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r9, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: movge r6, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 -; CHECK-NEON-NEXT: movlt r6, r0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r4, r1 -; CHECK-NEON-NEXT: rsbs r0, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r10, r4 -; CHECK-NEON-NEXT: mov r1, #-2147483648 -; CHECK-NEON-NEXT: movge r6, r1 +; CHECK-NEON-NEXT: movwlt r10, #1 +; CHECK-NEON-NEXT: movlt r7, r0 +; CHECK-NEON-NEXT: movlt r10, r1 ; CHECK-NEON-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r10, r9 -; CHECK-NEON-NEXT: vmov.32 d1[0], r6 -; CHECK-NEON-NEXT: movge r7, r1 -; CHECK-NEON-NEXT: rsbs r0, r5, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d0[0], r7 -; CHECK-NEON-NEXT: sbcs r0, r10, r8 -; CHECK-NEON-NEXT: movge r5, r1 -; CHECK-NEON-NEXT: vmov.32 d1[1], r11 -; CHECK-NEON-NEXT: vmov.32 d0[1], r5 -; CHECK-NEON-NEXT: add sp, sp, #8 +; CHECK-NEON-NEXT: vmov.32 d0[0], r5 +; CHECK-NEON-NEXT: sbcs r0, r9, r10 +; CHECK-NEON-NEXT: movge r7, r8 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: stest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-FP16-NEXT: .pad #4 -; CHECK-FP16-NEXT: sub sp, sp, #4 +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-FP16-NEXT: .vsave {d8, d9} ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] -; CHECK-FP16-NEXT: vmov.u16 r4, d0[2] -; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] -; CHECK-FP16-NEXT: vmov.u16 r6, d0[1] +; CHECK-FP16-NEXT: vorr d8, d0, d0 +; CHECK-FP16-NEXT: vmov.u16 r5, d0[2] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r10, r0 +; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 ; CHECK-FP16-NEXT: subs r0, r0, r7 ; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: mov r2, #-2147483648 +; CHECK-FP16-NEXT: mov r8, #-2147483648 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movge r10, r7 +; CHECK-FP16-NEXT: movge r4, r7 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r0, r1 -; CHECK-FP16-NEXT: rsbs r1, r10, #-2147483648 ; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: movlt r0, r1 +; CHECK-FP16-NEXT: rsbs r1, r4, #-2147483648 ; CHECK-FP16-NEXT: sbcs r0, r9, r0 -; CHECK-FP16-NEXT: vmov s16, r4 -; CHECK-FP16-NEXT: mov r11, #0 +; CHECK-FP16-NEXT: mov r10, #0 ; CHECK-FP16-NEXT: vmov s18, r5 -; CHECK-FP16-NEXT: movge r10, r2 +; CHECK-FP16-NEXT: movge r4, r8 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: mov r5, r0 ; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: mov r4, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movge r5, r7 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r4, r1 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: movlt r0, r1 +; CHECK-FP16-NEXT: rsbs r1, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: movge r5, r8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: mov r6, r0 ; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movge r6, r7 -; CHECK-FP16-NEXT: movwlt r8, #1 -; CHECK-FP16-NEXT: cmp r8, #0 -; CHECK-FP16-NEXT: movne r8, r1 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: movlt r0, r1 +; CHECK-FP16-NEXT: vmov.u16 r1, d8[1] +; CHECK-FP16-NEXT: vmov s0, r1 +; CHECK-FP16-NEXT: rsbs r1, r6, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: movge r6, r8 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: subs r2, r0, r7 +; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 +; CHECK-FP16-NEXT: movwlt r10, #1 ; CHECK-FP16-NEXT: movlt r7, r0 -; CHECK-FP16-NEXT: movwlt r11, #1 -; CHECK-FP16-NEXT: cmp r11, #0 -; CHECK-FP16-NEXT: movne r11, r1 +; CHECK-FP16-NEXT: movlt r10, r1 ; CHECK-FP16-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r9, r11 -; CHECK-FP16-NEXT: mov r1, #-2147483648 -; CHECK-FP16-NEXT: movge r7, r1 -; CHECK-FP16-NEXT: rsbs r0, r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r9, r8 -; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: movge r6, r1 -; CHECK-FP16-NEXT: rsbs r0, r5, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d0[0], r6 -; CHECK-FP16-NEXT: sbcs r0, r9, r4 -; CHECK-FP16-NEXT: movge r5, r1 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 +; CHECK-FP16-NEXT: sbcs r0, r9, r10 +; CHECK-FP16-NEXT: movge r7, r8 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: vpop {d8, d9} -; CHECK-FP16-NEXT: add sp, sp, #4 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2944,73 +2797,65 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s20 -; CHECK-NEON-NEXT: mvn r6, #0 -; CHECK-NEON-NEXT: subs r3, r0, r6 +; CHECK-NEON-NEXT: mvn r7, #0 +; CHECK-NEON-NEXT: subs r3, r0, r7 ; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: sbcs r3, r1, #0 -; CHECK-NEON-NEXT: vmov r8, s18 +; CHECK-NEON-NEXT: mov r10, #0 ; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: movge r0, r6 +; CHECK-NEON-NEXT: movge r0, r7 ; CHECK-NEON-NEXT: movwlt r3, #1 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movne r3, r1 +; CHECK-NEON-NEXT: vmov r8, s18 +; CHECK-NEON-NEXT: movlt r3, r1 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 ; CHECK-NEON-NEXT: rscs r1, r3, #0 ; CHECK-NEON-NEXT: vmov r9, s16 ; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r4, r0 -; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: movlt r4, r0 ; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: subs r2, r0, r7 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movge r0, r6 +; CHECK-NEON-NEXT: movge r0, r7 ; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: movlt r2, r1 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 ; CHECK-NEON-NEXT: rscs r1, r2, #0 ; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: movlt r5, r0 ; CHECK-NEON-NEXT: mov r0, r9 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 -; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movge r0, r6 +; CHECK-NEON-NEXT: movge r0, r7 ; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r2, r1 +; CHECK-NEON-NEXT: movlt r2, r1 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 ; CHECK-NEON-NEXT: rscs r1, r2, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r0 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: movlt r6, r0 ; CHECK-NEON-NEXT: mov r0, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r6 -; CHECK-NEON-NEXT: vmov.32 d1[0], r7 +; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 -; CHECK-NEON-NEXT: movlt r6, r0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: movne r0, r1 -; CHECK-NEON-NEXT: rsbs r1, r6, #0 -; CHECK-NEON-NEXT: rscs r0, r0, #0 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movlt r7, r0 +; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 +; CHECK-NEON-NEXT: movlt r2, r1 +; CHECK-NEON-NEXT: rsbs r0, r7, #0 +; CHECK-NEON-NEXT: rscs r0, r2, #0 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movne r10, r6 +; CHECK-NEON-NEXT: movwlt r10, #1 +; CHECK-NEON-NEXT: movlt r10, r7 ; CHECK-NEON-NEXT: vmov.32 d0[1], r10 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} @@ -3023,75 +2868,67 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[2] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] -; CHECK-FP16-NEXT: mvn r4, #0 -; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] -; CHECK-FP16-NEXT: vmov s0, r5 +; CHECK-FP16-NEXT: mvn r5, #0 +; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r8, #0 +; CHECK-FP16-NEXT: vmov s18, r7 ; CHECK-FP16-NEXT: vmov s16, r2 -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: vmov s18, r3 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movge r0, r4 +; CHECK-FP16-NEXT: movge r0, r5 ; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: movlt r2, r1 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 ; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: movne r6, r0 +; CHECK-FP16-NEXT: movlt r6, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movge r0, r4 +; CHECK-FP16-NEXT: movge r0, r5 ; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: movlt r2, r1 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: movlt r7, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: subs r2, r0, r5 ; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: mov r4, #0 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movge r0, r4 +; CHECK-FP16-NEXT: movge r0, r5 ; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r2, r1 +; CHECK-FP16-NEXT: movlt r2, r1 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movne r5, r0 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: movlt r4, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r4 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 +; CHECK-FP16-NEXT: subs r2, r0, r5 +; CHECK-FP16-NEXT: vmov.32 d1[0], r4 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: movlt r4, r0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r0, r1 -; CHECK-FP16-NEXT: rsbs r1, r4, #0 -; CHECK-FP16-NEXT: rscs r0, r0, #0 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movlt r5, r0 +; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r7 -; CHECK-FP16-NEXT: movwlt r8, #1 -; CHECK-FP16-NEXT: cmp r8, #0 +; CHECK-FP16-NEXT: movlt r2, r1 +; CHECK-FP16-NEXT: rsbs r0, r5, #0 +; CHECK-FP16-NEXT: rscs r0, r2, #0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r6 -; CHECK-FP16-NEXT: movne r8, r4 +; CHECK-FP16-NEXT: movwlt r8, #1 +; CHECK-FP16-NEXT: movlt r8, r5 ; CHECK-FP16-NEXT: vmov.32 d0[1], r8 ; CHECK-FP16-NEXT: vpop {d8, d9} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} @@ -3592,8 +3429,8 @@ entry: define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 @@ -3602,55 +3439,50 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r8 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mvnge r4, #-2147483648 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movlt r1, r2 ; CHECK-NEXT: rsbs r2, r0, #0 ; CHECK-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEXT: sbcs r1, r9, r1 ; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 +; CHECK-NEXT: movge r4, #-2147483648 +; CHECK-NEXT: movlt r7, r0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r6, r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r1, r8 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvnge r1, #-2147483648 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movlt r6, r2 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEXT: sbcs r2, r9, r6 +; CHECK-NEXT: sbcs r2, r9, r3 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: movge r1, #-2147483648 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3676,17 +3508,15 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movhs r4, r6 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: movhs r0, r5 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 @@ -3716,12 +3546,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: movne r0, r3 -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movge r5, r0 +; CHECK-NEXT: movge r4, r0 +; CHECK-NEXT: movlt r0, r3 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: movwmi r5, #0 ; CHECK-NEXT: bl __fixdfti @@ -3729,10 +3558,9 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r1, r6 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: movge r1, r6 +; CHECK-NEXT: movge r0, r6 +; CHECK-NEXT: movlt r6, r3 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: movwmi r0, #0 ; CHECK-NEXT: movwmi r1, #0 @@ -3752,8 +3580,8 @@ entry: define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 @@ -3762,55 +3590,50 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: subs r1, r0, r9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r8 ; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: mvnge r4, #-2147483648 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r3, r1 -; CHECK-NEXT: movne r1, r2 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r1 +; CHECK-NEXT: movlt r1, r2 ; CHECK-NEXT: rsbs r2, r0, #0 ; CHECK-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEXT: sbcs r1, r9, r1 ; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: moveq r4, r8 +; CHECK-NEXT: movge r4, #-2147483648 +; CHECK-NEXT: movlt r7, r0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r6, r0, r9 ; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r1, r8 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvnge r1, #-2147483648 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: movne r6, r2 -; CHECK-NEXT: movne r5, r1 -; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r6 -; CHECK-NEXT: sbcs r1, r9, r3 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: mvnge r0, #0 +; CHECK-NEXT: movge r3, r6 +; CHECK-NEXT: movlt r6, r2 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEXT: sbcs r2, r9, r6 +; CHECK-NEXT: sbcs r2, r9, r3 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: movge r1, #-2147483648 +; CHECK-NEXT: movlt r5, r0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3836,17 +3659,15 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movhs r4, r6 +; CHECK-NEXT: movlo r6, r0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, r2, #1 ; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlo r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r0, r5 -; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: movhs r0, r5 +; CHECK-NEXT: movlo r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 @@ -3877,10 +3698,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: moveq r4, r0 -; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: movge r5, r0 +; CHECK-NEXT: movge r4, r0 +; CHECK-NEXT: movlt r0, r3 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movwmi r4, #0 ; CHECK-NEXT: movwmi r5, #0 @@ -3889,10 +3709,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r1, r6 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: movge r1, r6 +; CHECK-NEXT: movge r0, r6 +; CHECK-NEXT: movlt r6, r3 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: movwmi r0, #0 ; CHECK-NEXT: movwmi r1, #0 @@ -3912,14 +3731,14 @@ entry: define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r8, r0 +; CHECK-NEON-NEXT: mov r5, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 @@ -3927,60 +3746,55 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 ; CHECK-NEON-NEXT: subs r1, r0, r9 -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r4, r6 -; CHECK-NEON-NEXT: vmov s0, r8 +; CHECK-NEON-NEXT: mvn r8, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r4, r8 +; CHECK-NEON-NEXT: vmov s0, r5 ; CHECK-NEON-NEXT: sbcs r1, r2, #0 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r8, #-2147483648 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mov r10, #0 +; CHECK-NEON-NEXT: mvnge r4, #-2147483648 ; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r3, r1 -; CHECK-NEON-NEXT: movne r1, r2 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: moveq r0, r9 +; CHECK-NEON-NEXT: mvnge r0, #0 +; CHECK-NEON-NEXT: movge r3, r1 +; CHECK-NEON-NEXT: movlt r1, r2 ; CHECK-NEON-NEXT: rsbs r2, r0, #0 ; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 ; CHECK-NEON-NEXT: sbcs r1, r9, r1 ; CHECK-NEON-NEXT: sbcs r1, r9, r3 ; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: moveq r4, r8 +; CHECK-NEON-NEXT: movge r4, #-2147483648 +; CHECK-NEON-NEXT: movlt r5, r0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: subs r7, r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r7, r1, r6 +; CHECK-NEON-NEXT: sbcs r7, r1, r8 ; CHECK-NEON-NEXT: sbcs r7, r2, #0 ; CHECK-NEON-NEXT: sbcs r7, r3, #0 ; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: mvnge r1, #-2147483648 ; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r3, r7 -; CHECK-NEON-NEXT: movne r7, r2 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: moveq r0, r9 -; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r7 -; CHECK-NEON-NEXT: sbcs r1, r9, r3 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: moveq r6, r8 -; CHECK-NEON-NEXT: vmov.32 d0[0], r10 +; CHECK-NEON-NEXT: mvnge r0, #0 +; CHECK-NEON-NEXT: movge r3, r7 +; CHECK-NEON-NEXT: movlt r7, r2 +; CHECK-NEON-NEXT: rsbs r2, r0, #0 +; CHECK-NEON-NEXT: rscs r2, r1, #-2147483648 +; CHECK-NEON-NEXT: sbcs r2, r9, r7 +; CHECK-NEON-NEXT: sbcs r2, r9, r3 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: movge r1, #-2147483648 +; CHECK-NEON-NEXT: movlt r6, r0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r6 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: vmov.32 d0[1], r1 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-FP16-LABEL: stest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 @@ -3988,54 +3802,49 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: mvn r9, #0 ; CHECK-FP16-NEXT: subs r1, r0, r9 -; CHECK-FP16-NEXT: mvn r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r4, r5 +; CHECK-FP16-NEXT: mvn r8, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r4, r8 ; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r8, #-2147483648 +; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: mvnge r4, #-2147483648 ; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r3, r1 -; CHECK-FP16-NEXT: movne r1, r2 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: moveq r0, r9 +; CHECK-FP16-NEXT: mvnge r0, #0 +; CHECK-FP16-NEXT: movge r3, r1 +; CHECK-FP16-NEXT: movlt r1, r2 ; CHECK-FP16-NEXT: rsbs r2, r0, #0 ; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 ; CHECK-FP16-NEXT: sbcs r1, r9, r1 ; CHECK-FP16-NEXT: sbcs r1, r9, r3 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r0 -; CHECK-FP16-NEXT: moveq r4, r8 +; CHECK-FP16-NEXT: movge r4, #-2147483648 +; CHECK-FP16-NEXT: movlt r7, r0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: subs r6, r0, r9 ; CHECK-FP16-NEXT: vmov.32 d1[0], r7 -; CHECK-FP16-NEXT: sbcs r6, r1, r5 +; CHECK-FP16-NEXT: sbcs r6, r1, r8 ; CHECK-FP16-NEXT: sbcs r6, r2, #0 ; CHECK-FP16-NEXT: sbcs r6, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: mvnge r1, #-2147483648 ; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r3, r6 -; CHECK-FP16-NEXT: movne r6, r2 -; CHECK-FP16-NEXT: movne r5, r1 -; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r9, r6 -; CHECK-FP16-NEXT: sbcs r1, r9, r3 -; CHECK-FP16-NEXT: movwlt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r0 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: vmov.32 d0[0], r10 +; CHECK-FP16-NEXT: mvnge r0, #0 +; CHECK-FP16-NEXT: movge r3, r6 +; CHECK-FP16-NEXT: movlt r6, r2 +; CHECK-FP16-NEXT: rsbs r2, r0, #0 +; CHECK-FP16-NEXT: rscs r2, r1, #-2147483648 +; CHECK-FP16-NEXT: sbcs r2, r9, r6 +; CHECK-FP16-NEXT: sbcs r2, r9, r3 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: movge r1, #-2147483648 +; CHECK-FP16-NEXT: movlt r5, r0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: vmov.32 d0[1], r1 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4066,17 +3875,15 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movwlo r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r4, r5 -; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: movhs r4, r5 +; CHECK-NEON-NEXT: movlo r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti ; CHECK-NEON-NEXT: subs r2, r2, #1 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlo r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r0, r6 -; CHECK-NEON-NEXT: movne r6, r1 +; CHECK-NEON-NEXT: movhs r0, r6 +; CHECK-NEON-NEXT: movlo r6, r1 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 ; CHECK-NEON-NEXT: vmov.32 d0[1], r6 @@ -4098,17 +3905,15 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r4, r6 -; CHECK-FP16-NEXT: movne r6, r0 +; CHECK-FP16-NEXT: movhs r4, r6 +; CHECK-FP16-NEXT: movlo r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti ; CHECK-FP16-NEXT: subs r2, r2, #1 ; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlo r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r0, r5 -; CHECK-FP16-NEXT: movne r5, r1 +; CHECK-FP16-NEXT: movhs r0, r5 +; CHECK-FP16-NEXT: movlo r5, r1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 ; CHECK-FP16-NEXT: vmov.32 d0[1], r5 @@ -4142,12 +3947,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r5, r0 -; CHECK-NEON-NEXT: moveq r4, r0 -; CHECK-NEON-NEXT: movne r0, r3 -; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: movge r5, r0 +; CHECK-NEON-NEXT: movge r4, r0 +; CHECK-NEON-NEXT: movlt r0, r3 +; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: movwmi r4, #0 ; CHECK-NEON-NEXT: movwmi r5, #0 ; CHECK-NEON-NEXT: bl __fixsfti @@ -4155,10 +3959,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 ; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r1, r7 -; CHECK-NEON-NEXT: moveq r0, r7 -; CHECK-NEON-NEXT: movne r7, r3 +; CHECK-NEON-NEXT: movge r1, r7 +; CHECK-NEON-NEXT: movge r0, r7 +; CHECK-NEON-NEXT: movlt r7, r3 ; CHECK-NEON-NEXT: cmp r7, #0 ; CHECK-NEON-NEXT: movwmi r0, #0 ; CHECK-NEON-NEXT: movwmi r1, #0 @@ -4183,12 +3986,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r5, r0 -; CHECK-FP16-NEXT: moveq r4, r0 -; CHECK-FP16-NEXT: movne r0, r3 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: movge r5, r0 +; CHECK-FP16-NEXT: movge r4, r0 +; CHECK-FP16-NEXT: movlt r0, r3 +; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: movwmi r4, #0 ; CHECK-FP16-NEXT: movwmi r5, #0 ; CHECK-FP16-NEXT: bl __fixhfti @@ -4196,10 +3998,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 ; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r1, r6 -; CHECK-FP16-NEXT: moveq r0, r6 -; CHECK-FP16-NEXT: movne r6, r3 +; CHECK-FP16-NEXT: movge r1, r6 +; CHECK-FP16-NEXT: movge r0, r6 +; CHECK-FP16-NEXT: movlt r6, r3 ; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: movwmi r0, #0 ; CHECK-FP16-NEXT: movwmi r1, #0 diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll index ffc72b242f829..d8c038d081fd5 100644 --- a/llvm/test/CodeGen/ARM/neon_vabd.ll +++ b/llvm/test/CodeGen/ARM/neon_vabd.ll @@ -145,24 +145,23 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvn r6, #0 ; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 ; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs r0, r3, r1 +; CHECK-NEXT: sbcs r1, r3, r1 ; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: movlt r1, r6 +; CHECK-NEXT: subs r2, r4, r12 +; CHECK-NEXT: sbcs r2, r5, lr +; CHECK-NEXT: vdup.32 d19, r1 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r4, r12 -; CHECK-NEXT: sbcs r1, r5, lr -; CHECK-NEXT: vdup.32 d19, r0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: vdup.32 d18, r6 +; CHECK-NEXT: movlt r0, r6 +; CHECK-NEXT: vdup.32 d18, r0 ; CHECK-NEXT: veor q8, q8, q9 ; CHECK-NEXT: vsub.i64 q0, q9, q8 ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -476,24 +475,23 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r0, r1, d1 -; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mvn r6, #0 ; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov r12, lr, d0 ; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 ; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs r0, r3, r1 +; CHECK-NEXT: sbcs r1, r3, r1 ; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: movlt r1, r6 +; CHECK-NEXT: subs r2, r4, r12 +; CHECK-NEXT: sbcs r2, r5, lr +; CHECK-NEXT: vdup.32 d19, r1 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r4, r12 -; CHECK-NEXT: sbcs r1, r5, lr -; CHECK-NEXT: vdup.32 d19, r0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: vdup.32 d18, r6 +; CHECK-NEXT: movlt r0, r6 +; CHECK-NEXT: vdup.32 d18, r0 ; CHECK-NEXT: veor q8, q8, q9 ; CHECK-NEXT: vsub.i64 q0, q9, q8 ; CHECK-NEXT: pop {r4, r5, r6, pc} diff --git a/llvm/test/CodeGen/ARM/smml.ll b/llvm/test/CodeGen/ARM/smml.ll index a09ec504d8b78..f1f25adcdd726 100644 --- a/llvm/test/CodeGen/ARM/smml.ll +++ b/llvm/test/CodeGen/ARM/smml.ll @@ -165,10 +165,10 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-V6-NEXT: .save {r11, lr} ; CHECK-V6-NEXT: push {r11, lr} ; CHECK-V6-NEXT: smull r1, r2, r0, r1 -; CHECK-V6-NEXT: mov r0, #56 +; CHECK-V6-NEXT: mov r0, #42 ; CHECK-V6-NEXT: subs r1, r1, #1 ; CHECK-V6-NEXT: sbcs r1, r2, #0 -; CHECK-V6-NEXT: movlt r0, #42 +; CHECK-V6-NEXT: movge r0, #56 ; CHECK-V6-NEXT: bl opaque ; CHECK-V6-NEXT: pop {r11, pc} ; @@ -177,10 +177,10 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-V7-NEXT: .save {r11, lr} ; CHECK-V7-NEXT: push {r11, lr} ; CHECK-V7-NEXT: smull r1, r2, r0, r1 -; CHECK-V7-NEXT: mov r0, #56 +; CHECK-V7-NEXT: mov r0, #42 ; CHECK-V7-NEXT: subs r1, r1, #1 ; CHECK-V7-NEXT: sbcs r1, r2, #0 -; CHECK-V7-NEXT: movwlt r0, #42 +; CHECK-V7-NEXT: movwge r0, #56 ; CHECK-V7-NEXT: bl opaque ; CHECK-V7-NEXT: pop {r11, pc} ; @@ -247,11 +247,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7-NEXT: .save {r7, lr} ; CHECK-THUMBV7-NEXT: push {r7, lr} ; CHECK-THUMBV7-NEXT: smull r1, r2, r0, r1 -; CHECK-THUMBV7-NEXT: movs r0, #56 +; CHECK-THUMBV7-NEXT: movs r0, #42 ; CHECK-THUMBV7-NEXT: subs r1, #1 ; CHECK-THUMBV7-NEXT: sbcs r1, r2, #0 -; CHECK-THUMBV7-NEXT: it lt -; CHECK-THUMBV7-NEXT: movlt r0, #42 +; CHECK-THUMBV7-NEXT: it ge +; CHECK-THUMBV7-NEXT: movge r0, #56 ; CHECK-THUMBV7-NEXT: bl opaque ; CHECK-THUMBV7-NEXT: pop {r7, pc} ; @@ -260,11 +260,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7M-NEXT: .save {r7, lr} ; CHECK-THUMBV7M-NEXT: push {r7, lr} ; CHECK-THUMBV7M-NEXT: smull r1, r2, r0, r1 -; CHECK-THUMBV7M-NEXT: movs r0, #56 +; CHECK-THUMBV7M-NEXT: movs r0, #42 ; CHECK-THUMBV7M-NEXT: subs r1, #1 ; CHECK-THUMBV7M-NEXT: sbcs r1, r2, #0 -; CHECK-THUMBV7M-NEXT: it lt -; CHECK-THUMBV7M-NEXT: movlt r0, #42 +; CHECK-THUMBV7M-NEXT: it ge +; CHECK-THUMBV7M-NEXT: movge r0, #56 ; CHECK-THUMBV7M-NEXT: bl opaque ; CHECK-THUMBV7M-NEXT: pop {r7, pc} ; @@ -273,11 +273,11 @@ define void @test_used_flags(i32 %in1, i32 %in2) { ; CHECK-THUMBV7EM-NEXT: .save {r7, lr} ; CHECK-THUMBV7EM-NEXT: push {r7, lr} ; CHECK-THUMBV7EM-NEXT: smull r1, r2, r0, r1 -; CHECK-THUMBV7EM-NEXT: movs r0, #56 +; CHECK-THUMBV7EM-NEXT: movs r0, #42 ; CHECK-THUMBV7EM-NEXT: subs r1, #1 ; CHECK-THUMBV7EM-NEXT: sbcs r1, r2, #0 -; CHECK-THUMBV7EM-NEXT: it lt -; CHECK-THUMBV7EM-NEXT: movlt r0, #42 +; CHECK-THUMBV7EM-NEXT: it ge +; CHECK-THUMBV7EM-NEXT: movge r0, #56 ; CHECK-THUMBV7EM-NEXT: bl opaque ; CHECK-THUMBV7EM-NEXT: pop {r7, pc} %in1.64 = sext i32 %in1 to i64 @@ -294,3 +294,5 @@ false: call void @opaque(i32 56) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-CALLSITE: {{.*}} diff --git a/llvm/test/CodeGen/ARM/vector-trunc.ll b/llvm/test/CodeGen/ARM/vector-trunc.ll index 9acf463c2be93..17bc5059d736d 100644 --- a/llvm/test/CodeGen/ARM/vector-trunc.ll +++ b/llvm/test/CodeGen/ARM/vector-trunc.ll @@ -10,8 +10,7 @@ define i32 @test(i64 %arg1) { ; LE-NEXT: sbcs r0, r1, #0 ; LE-NEXT: vldr s0, .LCPI0_0 ; LE-NEXT: movwhs r2, #1 -; LE-NEXT: cmp r2, #0 -; LE-NEXT: mvnne r2, #0 +; LE-NEXT: mvnhs r2, #0 ; LE-NEXT: vmov s1, r2 ; LE-NEXT: vmovn.i32 d16, q0 ; LE-NEXT: vmovn.i16 d16, q8 @@ -30,8 +29,7 @@ define i32 @test(i64 %arg1) { ; BE-NEXT: sbcs r0, r0, #0 ; BE-NEXT: vldr s0, .LCPI0_0 ; BE-NEXT: movwhs r2, #1 -; BE-NEXT: cmp r2, #0 -; BE-NEXT: mvnne r2, #0 +; BE-NEXT: mvnhs r2, #0 ; BE-NEXT: vmov s1, r2 ; BE-NEXT: vmovn.i32 d16, q0 ; BE-NEXT: vmovn.i16 d16, q8 diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll index bd5e3061f0d18..ec2ce65097430 100644 --- a/llvm/test/CodeGen/ARM/vselect_imax.ll +++ b/llvm/test/CodeGen/ARM/vselect_imax.ll @@ -111,53 +111,50 @@ define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { ; CHECK-LABEL: func_blend18: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! +; CHECK-NEXT: mvn r12, #0 ; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! -; CHECK-NEXT: vmov r4, r6, d16 ; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] +; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] -; CHECK-NEXT: vmov lr, r12, d18 +; CHECK-NEXT: vmov r2, lr, d18 +; CHECK-NEXT: vmov r0, r4, d20 +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbcs r2, r4, lr +; CHECK-NEXT: vmov r0, r6, d16 +; CHECK-NEXT: vmov r4, r5, d22 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: movlt r2, r12 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: sbcs r0, r5, r6 +; CHECK-NEXT: vmov r6, lr, d17 +; CHECK-NEXT: vmov r4, r5, d23 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vmov r2, r1, d20 -; CHECK-NEXT: subs r2, r2, lr -; CHECK-NEXT: vmov r2, r5, d22 -; CHECK-NEXT: sbcs r1, r1, r12 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: subs r2, r2, r4 -; CHECK-NEXT: sbcs r6, r5, r6 -; CHECK-NEXT: vmov r2, r12, d17 -; CHECK-NEXT: vmov r5, r4, d23 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: movlt r0, r12 +; CHECK-NEXT: subs r4, r4, r6 +; CHECK-NEXT: sbcs r6, r5, lr +; CHECK-NEXT: vmov r5, lr, d19 +; CHECK-NEXT: vmov r4, r7, d21 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: subs r2, r5, r2 -; CHECK-NEXT: sbcs r2, r4, r12 -; CHECK-NEXT: vmov lr, r12, d19 -; CHECK-NEXT: vmov r4, r5, d21 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d25, r2 -; CHECK-NEXT: vdup.32 d24, r6 +; CHECK-NEXT: movlt r6, r12 +; CHECK-NEXT: vdup.32 d25, r6 +; CHECK-NEXT: vdup.32 d24, r0 ; CHECK-NEXT: vbit q8, q11, q12 -; CHECK-NEXT: subs r4, r4, lr -; CHECK-NEXT: sbcs r5, r5, r12 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d27, r0 -; CHECK-NEXT: vdup.32 d26, r1 +; CHECK-NEXT: subs r4, r4, r5 +; CHECK-NEXT: sbcs r7, r7, lr +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: movlt r1, r12 +; CHECK-NEXT: vdup.32 d27, r1 +; CHECK-NEXT: vdup.32 d26, r2 ; CHECK-NEXT: vbit q9, q10, q13 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] -; CHECK-NEXT: pop {r4, r5, r6, lr} +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} ; CHECK-NEXT: mov pc, lr ; COST: func_blend18 ; COST: cost of 0 {{.*}} icmp @@ -180,8 +177,9 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]! ; CHECK-NEXT: mov lr, #0 ; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]! -; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]! ; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! +; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]! +; CHECK-NEXT: vmov r4, r5, d25 ; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]! ; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]! ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] @@ -189,78 +187,70 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vmov r0, r12, d16 ; CHECK-NEXT: vmov r1, r2, d18 ; CHECK-NEXT: subs r0, r1, r0 -; CHECK-NEXT: vmov r1, r4, d25 +; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: sbcs r0, r2, r12 -; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: mvn r12, #0 ; CHECK-NEXT: vmov r2, r0, d21 -; CHECK-NEXT: movlt r12, #1 -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: mvnne r12, #0 -; CHECK-NEXT: subs r1, r1, r2 -; CHECK-NEXT: sbcs r0, r4, r0 -; CHECK-NEXT: vmov r2, r4, d24 +; CHECK-NEXT: movlt lr, #1 +; CHECK-NEXT: movlt lr, r12 +; CHECK-NEXT: subs r2, r4, r2 +; CHECK-NEXT: sbcs r0, r5, r0 +; CHECK-NEXT: vmov r4, r5, d24 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r12 ; CHECK-NEXT: vdup.32 d1, r0 -; CHECK-NEXT: vmov r0, r1, d20 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs r0, r4, r1 -; CHECK-NEXT: vmov r2, r4, d26 +; CHECK-NEXT: vmov r0, r2, d20 +; CHECK-NEXT: subs r0, r4, r0 +; CHECK-NEXT: sbcs r0, r5, r2 +; CHECK-NEXT: vmov r4, r5, d31 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r12 ; CHECK-NEXT: vdup.32 d0, r0 -; CHECK-NEXT: vmov r0, r1, d22 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: sbcs r0, r4, r1 -; CHECK-NEXT: vmov r4, r5, d31 -; CHECK-NEXT: vmov r0, r1, d29 -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vmov r0, r2, d29 ; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r1 +; CHECK-NEXT: sbcs r0, r5, r2 ; CHECK-NEXT: vmov r4, r5, d30 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r12 ; CHECK-NEXT: vdup.32 d3, r0 -; CHECK-NEXT: vmov r0, r1, d28 +; CHECK-NEXT: vmov r0, r2, d28 ; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r1 +; CHECK-NEXT: sbcs r0, r5, r2 ; CHECK-NEXT: vmov r4, r5, d27 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r12 ; CHECK-NEXT: vdup.32 d2, r0 -; CHECK-NEXT: vmov r0, r1, d23 +; CHECK-NEXT: vmov r0, r2, d23 ; CHECK-NEXT: vbit q14, q15, q1 ; CHECK-NEXT: vbit q10, q12, q0 ; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r1 -; CHECK-NEXT: vmov r1, r4, d17 -; CHECK-NEXT: vmov r5, r6, d19 +; CHECK-NEXT: sbcs r0, r5, r2 +; CHECK-NEXT: vmov r2, r4, d22 +; CHECK-NEXT: vmov r5, r6, d26 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: movlt r0, r12 ; CHECK-NEXT: vdup.32 d31, r0 -; CHECK-NEXT: vdup.32 d30, r2 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: vmov r5, r6, d19 +; CHECK-NEXT: vmov r2, r4, d17 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: movlt r0, r12 +; CHECK-NEXT: vdup.32 d30, r0 ; CHECK-NEXT: vbit q11, q13, q15 ; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! -; CHECK-NEXT: subs r1, r5, r1 -; CHECK-NEXT: sbcs r1, r6, r4 -; CHECK-NEXT: movlt lr, #1 -; CHECK-NEXT: cmp lr, #0 -; CHECK-NEXT: mvnne lr, #0 -; CHECK-NEXT: vdup.32 d3, lr -; CHECK-NEXT: vdup.32 d2, r12 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: movlt r1, r12 +; CHECK-NEXT: vdup.32 d3, r1 +; CHECK-NEXT: vdup.32 d2, lr ; CHECK-NEXT: vbit q8, q9, q1 ; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]! ; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]! @@ -283,194 +273,180 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { ; CHECK-LABEL: func_blend20: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add r8, r1, #64 -; CHECK-NEXT: add lr, r0, #64 -; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! +; CHECK-NEXT: add r9, r0, #64 +; CHECK-NEXT: vld1.64 {d18, d19}, [r9:128]! +; CHECK-NEXT: mvn lr, #0 ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! -; CHECK-NEXT: vmov r4, r5, d17 -; CHECK-NEXT: vmov r6, r7, d25 -; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]! -; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]! -; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]! -; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]! -; CHECK-NEXT: subs r4, r6, r4 -; CHECK-NEXT: sbcs r4, r7, r5 +; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]! +; CHECK-NEXT: vmov r2, r7, d19 +; CHECK-NEXT: vmov r5, r6, d17 +; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]! +; CHECK-NEXT: vld1.64 {d24, d25}, [r1:128]! +; CHECK-NEXT: vld1.64 {d30, d31}, [r1:128]! +; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128]! +; CHECK-NEXT: vld1.64 {d22, d23}, [r9:128]! +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r7, r6 ; CHECK-NEXT: vmov r5, r6, d16 -; CHECK-NEXT: vmov r7, r2, d24 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d27, r4 +; CHECK-NEXT: vmov r7, r4, d18 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d29, r2 ; CHECK-NEXT: subs r5, r7, r5 -; CHECK-NEXT: sbcs r2, r2, r6 +; CHECK-NEXT: sbcs r4, r4, r6 ; CHECK-NEXT: vmov r5, r6, d1 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: movlt r4, lr +; CHECK-NEXT: vdup.32 d28, r4 +; CHECK-NEXT: vmov r2, r4, d31 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: vmov r5, r6, d0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d3, r2 +; CHECK-NEXT: vmov r2, r4, d30 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: vmov r5, r6, d21 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d2, r2 +; CHECK-NEXT: vmov r2, r4, d25 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: vmov r5, r6, d20 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d27, r2 +; CHECK-NEXT: vmov r2, r4, d24 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: movlt r2, lr ; CHECK-NEXT: vdup.32 d26, r2 -; CHECK-NEXT: vmov r2, r4, d23 -; CHECK-NEXT: vbit q8, q12, q13 -; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! -; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]! -; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]! +; CHECK-NEXT: vbif q10, q12, q13 +; CHECK-NEXT: vld1.64 {d26, d27}, [r8:128]! +; CHECK-NEXT: vld1.64 {d24, d25}, [r8:128]! +; CHECK-NEXT: vbit q8, q9, q14 +; CHECK-NEXT: vorr q9, q1, q1 +; CHECK-NEXT: vmov r2, r4, d25 +; CHECK-NEXT: vbsl q9, q0, q15 +; CHECK-NEXT: vld1.64 {d28, d29}, [r9:128]! +; CHECK-NEXT: vmov r5, r6, d29 +; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128]! +; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128]! +; CHECK-NEXT: vld1.64 {d2, d3}, [r1:128] +; CHECK-NEXT: vld1.64 {d4, d5}, [r0:128] +; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128] ; CHECK-NEXT: subs r2, r5, r2 ; CHECK-NEXT: sbcs r2, r6, r4 -; CHECK-NEXT: vmov r4, r5, d22 -; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: vmov r5, r6, d28 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d3, r2 -; CHECK-NEXT: subs r4, r6, r4 -; CHECK-NEXT: sbcs r4, r7, r5 -; CHECK-NEXT: vmov r2, r5, d27 -; CHECK-NEXT: vmov r6, r7, d25 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d2, r4 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d31, r2 +; CHECK-NEXT: vmov r2, r4, d24 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: sbcs r2, r6, r4 +; CHECK-NEXT: vmov r6, r7, d23 +; CHECK-NEXT: vmov r2, r4, d27 +; CHECK-NEXT: movlt r5, #1 +; CHECK-NEXT: movlt r5, lr +; CHECK-NEXT: vdup.32 d30, r5 ; CHECK-NEXT: subs r2, r6, r2 -; CHECK-NEXT: sbcs r2, r7, r5 -; CHECK-NEXT: vmov r6, r7, d24 +; CHECK-NEXT: sbcs r2, r7, r4 +; CHECK-NEXT: vmov r6, r7, d22 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d5, r2 -; CHECK-NEXT: vmov r2, r5, d26 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d7, r2 +; CHECK-NEXT: vmov r2, r4, d26 ; CHECK-NEXT: subs r2, r6, r2 -; CHECK-NEXT: sbcs r2, r7, r5 -; CHECK-NEXT: vmov r6, r7, d19 +; CHECK-NEXT: sbcs r2, r7, r4 +; CHECK-NEXT: vmov r6, r7, d9 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d4, r2 -; CHECK-NEXT: vmov r2, r5, d21 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d6, r2 +; CHECK-NEXT: vmov r2, r4, d11 +; CHECK-NEXT: vbif q11, q13, q3 ; CHECK-NEXT: subs r2, r6, r2 -; CHECK-NEXT: sbcs r2, r7, r5 -; CHECK-NEXT: vmov r6, r7, d18 +; CHECK-NEXT: sbcs r2, r7, r4 +; CHECK-NEXT: vmov r6, r7, d8 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d31, r2 -; CHECK-NEXT: vmov r2, r5, d20 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d13, r2 +; CHECK-NEXT: vmov r2, r4, d10 ; CHECK-NEXT: subs r2, r6, r2 -; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: vmov r5, r6, d2 +; CHECK-NEXT: sbcs r2, r7, r4 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d30, r2 -; CHECK-NEXT: vbif q9, q10, q15 -; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]! -; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128] -; CHECK-NEXT: vbit q13, q12, q2 -; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128] -; CHECK-NEXT: vmov r2, r7, d21 -; CHECK-NEXT: vbit q11, q0, q1 -; CHECK-NEXT: mov lr, #0 -; CHECK-NEXT: vmov r6, r5, d25 -; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]! -; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]! -; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128] -; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128] -; CHECK-NEXT: subs r1, r6, r2 -; CHECK-NEXT: vmov r0, r6, d2 -; CHECK-NEXT: sbcs r1, r5, r7 -; CHECK-NEXT: vmov r2, r7, d0 -; CHECK-NEXT: movlt lr, #1 -; CHECK-NEXT: cmp lr, #0 -; CHECK-NEXT: mvnne lr, #0 -; CHECK-NEXT: subs r0, r0, r2 -; CHECK-NEXT: sbcs r0, r6, r7 -; CHECK-NEXT: vmov r2, r7, d30 -; CHECK-NEXT: vmov r6, r5, d28 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: vdup.32 d12, r2 +; CHECK-NEXT: vmov r2, r4, d1 +; CHECK-NEXT: vorr q13, q6, q6 +; CHECK-NEXT: vbsl q13, q4, q5 +; CHECK-NEXT: vbit q12, q14, q15 +; CHECK-NEXT: vld1.64 {d28, d29}, [r8:128] +; CHECK-NEXT: vmov r0, r1, d29 +; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: vmov r2, r7, d4 +; CHECK-NEXT: sbcs r0, r4, r1 +; CHECK-NEXT: vmov r1, r4, d3 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r2, r6, r2 -; CHECK-NEXT: sbcs r2, r5, r7 -; CHECK-NEXT: vmov r7, r6, d31 -; CHECK-NEXT: vmov r5, r4, d29 +; CHECK-NEXT: movlt r0, lr +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r7, r6 +; CHECK-NEXT: vmov r5, r6, d5 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: subs r7, r5, r7 -; CHECK-NEXT: vmov r5, r1, d7 -; CHECK-NEXT: sbcs r7, r4, r6 +; CHECK-NEXT: movlt r2, lr +; CHECK-NEXT: subs r1, r5, r1 +; CHECK-NEXT: sbcs r4, r6, r4 +; CHECK-NEXT: vmov r1, r5, d28 ; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: vmov r7, r6, d5 ; CHECK-NEXT: movlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: subs r5, r5, r7 -; CHECK-NEXT: sbcs r1, r1, r6 -; CHECK-NEXT: vmov r6, r7, d6 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: vdup.32 d9, r1 -; CHECK-NEXT: vmov r1, r5, d4 -; CHECK-NEXT: subs r1, r6, r1 -; CHECK-NEXT: sbcs r1, r7, r5 -; CHECK-NEXT: vmov r6, r7, d3 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: vdup.32 d8, r1 -; CHECK-NEXT: vmov r1, r5, d1 -; CHECK-NEXT: vbit q2, q3, q4 -; CHECK-NEXT: vdup.32 d9, r4 -; CHECK-NEXT: vdup.32 d8, r2 -; CHECK-NEXT: subs r1, r6, r1 -; CHECK-NEXT: sbcs r1, r7, r5 -; CHECK-NEXT: vmov r5, r6, d24 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: vdup.32 d7, r1 -; CHECK-NEXT: vmov r1, r4, d20 -; CHECK-NEXT: vdup.32 d6, r0 -; CHECK-NEXT: subs r1, r5, r1 -; CHECK-NEXT: mov r1, r3 -; CHECK-NEXT: sbcs r0, r6, r4 -; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! -; CHECK-NEXT: vorr q8, q4, q4 -; CHECK-NEXT: movlt r12, #1 -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: vbsl q8, q14, q15 -; CHECK-NEXT: vdup.32 d29, lr -; CHECK-NEXT: vorr q15, q3, q3 -; CHECK-NEXT: mvnne r12, #0 -; CHECK-NEXT: vdup.32 d28, r12 +; CHECK-NEXT: movlt r4, lr +; CHECK-NEXT: vdup.32 d31, r4 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: vdup.32 d30, r2 +; CHECK-NEXT: vmov r2, r6, d0 +; CHECK-NEXT: vst1.64 {d20, d21}, [r4:128]! +; CHECK-NEXT: vbsl q15, q2, q1 +; CHECK-NEXT: vdup.32 d21, r0 ; CHECK-NEXT: add r0, r3, #64 -; CHECK-NEXT: vbsl q15, q1, q0 -; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]! -; CHECK-NEXT: vbit q10, q12, q14 -; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]! -; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]! -; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]! +; CHECK-NEXT: vst1.64 {d18, d19}, [r4:128]! +; CHECK-NEXT: subs r1, r2, r1 +; CHECK-NEXT: sbcs r1, r6, r5 +; CHECK-NEXT: movlt r12, #1 +; CHECK-NEXT: movlt r12, lr +; CHECK-NEXT: vdup.32 d20, r12 +; CHECK-NEXT: vorr q9, q10, q10 +; CHECK-NEXT: vbsl q9, q0, q14 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]! -; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128] -; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128] -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]! +; CHECK-NEXT: vst1.64 {d26, d27}, [r4:128]! +; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]! +; CHECK-NEXT: vst1.64 {d30, d31}, [r4:128] +; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128] +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: mov pc, lr ; COST: func_blend20 ; COST: cost of 0 {{.*}} icmp diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll index 9acf8d249ddf1..38f31682cc2a7 100644 --- a/llvm/test/CodeGen/ARM/wide-compares.ll +++ b/llvm/test/CodeGen/ARM/wide-compares.ll @@ -8,9 +8,9 @@ define i32 @test_slt1(i64 %a, i64 %b) { ; CHECK-ARM-LABEL: test_slt1: ; CHECK-ARM: @ %bb.0: @ %entry ; CHECK-ARM-NEXT: subs r0, r0, r2 -; CHECK-ARM-NEXT: mov r12, #2 +; CHECK-ARM-NEXT: mov r12, #1 ; CHECK-ARM-NEXT: sbcs r0, r1, r3 -; CHECK-ARM-NEXT: movwlt r12, #1 +; CHECK-ARM-NEXT: movwge r12, #2 ; CHECK-ARM-NEXT: mov r0, r12 ; CHECK-ARM-NEXT: bx lr ; @@ -41,10 +41,10 @@ define i32 @test_slt1(i64 %a, i64 %b) { ; CHECK-THUMB2-LABEL: test_slt1: ; CHECK-THUMB2: @ %bb.0: @ %entry ; CHECK-THUMB2-NEXT: subs r0, r0, r2 -; CHECK-THUMB2-NEXT: mov.w r12, #2 +; CHECK-THUMB2-NEXT: mov.w r12, #1 ; CHECK-THUMB2-NEXT: sbcs.w r0, r1, r3 -; CHECK-THUMB2-NEXT: it lt -; CHECK-THUMB2-NEXT: movlt.w r12, #1 +; CHECK-THUMB2-NEXT: it ge +; CHECK-THUMB2-NEXT: movge.w r12, #2 ; CHECK-THUMB2-NEXT: mov r0, r12 ; CHECK-THUMB2-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 742f2a75a1aa8..ebdcd7c43cab0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -675,53 +675,53 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d9 +; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs.w r5, r0, #-1 ; CHECK-NEXT: mvn r4, #-2147483648 ; CHECK-NEXT: sbcs.w r5, r1, r4 ; CHECK-NEXT: sbcs r5, r2, #0 -; CHECK-NEXT: mov.w r7, #-2147483648 +; CHECK-NEXT: mov.w r6, #-1 ; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r0, #-1 ; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r3, r5, r3, ge +; CHECK-NEXT: csel r2, r5, r2, ge ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r2, r2, r5, ne -; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: mov.w r5, #-2147483648 ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 -; CHECK-NEXT: csel r8, r1, r7, lt -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r9, r0, r1, ne +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: csel r8, r1, r5, lt +; CHECK-NEXT: csel r9, r2, r0, ge ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r4 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: sbcs.w r7, r1, r4 +; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r0, #-1 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: csel r3, r7, r3, ge +; CHECK-NEXT: csel r2, r7, r2, ge +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r7, r1 -; CHECK-NEXT: sbcs.w r2, r5, r2 -; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: csel r1, r1, r7, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r9, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 +; CHECK-NEXT: csel r1, r1, r5, lt +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} @@ -749,18 +749,16 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r1, r2, ne -; CHECK-NEXT: csel r5, r0, r2, ne +; CHECK-NEXT: csel r4, r2, r1, hs +; CHECK-NEXT: csel r5, r2, r0, hs ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, hs +; CHECK-NEXT: csel r1, r2, r1, hs ; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} @@ -788,41 +786,39 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: csel r3, r4, r3, ge +; CHECK-NEXT: csel r1, r4, r1, ge +; CHECK-NEXT: csel r0, r4, r0, ge ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: rsbs r6, r0, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r5, r4, r1 +; CHECK-NEXT: sbcs.w r6, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r1, r2, ne -; CHECK-NEXT: csel r7, r0, r2, ne +; CHECK-NEXT: csel r6, r2, r1, ge +; CHECK-NEXT: csel r7, r2, r0, ge ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r1, r1, r6, ne +; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r3, r5, r3, ge +; CHECK-NEXT: csel r1, r5, r1, ge +; CHECK-NEXT: csel r0, r5, r0, ge +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r4, r1 +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: csel r1, r2, r1, ge ; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r6 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: @@ -840,51 +836,51 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: vmov r0, r9, d0 +; CHECK-NEXT: vmov r8, r0, d0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs.w r7, r0, #-1 ; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r1, r5 -; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: mov.w r6, #-2147483648 ; CHECK-NEXT: sbcs r7, r2, #0 ; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r0, #-1 ; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: csel r3, r7, r3, ge +; CHECK-NEXT: csel r2, r7, r2, ge ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: mov.w r7, #-1 ; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: mov.w r7, #-2147483648 -; CHECK-NEXT: sbcs.w r4, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: csel r8, r1, r7, lt -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r10, r0, r1, ne -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: sbcs.w r4, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: csel r9, r1, r6, lt +; CHECK-NEXT: csel r10, r2, r0, ge +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs.w r4, r0, #-1 ; CHECK-NEXT: sbcs.w r4, r1, r5 ; CHECK-NEXT: sbcs r4, r2, #0 ; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r0, #-1 ; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: csel r3, r4, r3, ge +; CHECK-NEXT: csel r2, r4, r2, ge ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne ; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r7, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs.w r5, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: csel r1, r1, r7, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r10, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 +; CHECK-NEXT: csel r1, r1, r6, lt +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: vmov q0[2], q0[0], r0, r10 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r9 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -906,17 +902,15 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne +; CHECK-NEXT: csel r6, r2, r0, hs ; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK-NEXT: csel r5, r2, r1, hs ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, hs +; CHECK-NEXT: csel r1, r2, r1, hs ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -938,41 +932,39 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: subs r4, r2, #1 ; CHECK-NEXT: mov.w r8, #1 ; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: csel r3, r4, r3, ge +; CHECK-NEXT: csel r1, r4, r1, ge +; CHECK-NEXT: csel r0, r4, r0, ge ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r6, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r7, r0, r2, ne +; CHECK-NEXT: csel r7, r2, r0, ge ; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: csel r6, r2, r1, ge ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r5, r2, #1 ; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r3, r5, r3, ge +; CHECK-NEXT: csel r1, r5, r1, ge +; CHECK-NEXT: csel r0, r5, r0, ge ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: csel r2, r2, r8, ne ; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r6, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs.w r5, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: csel r1, r2, r1, ge ; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r6 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -1056,19 +1048,17 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: sbcs.w r2, r5, r2 ; CHECK-NEXT: sbcs.w r2, r5, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne +; CHECK-NEXT: csel r6, r2, r0, ge ; CHECK-NEXT: vmov.u16 r0, q4[0] -; CHECK-NEXT: csel r7, r1, r2, ne +; CHECK-NEXT: csel r7, r2, r1, ge ; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: rsbs r4, r0, #0 ; CHECK-NEXT: sbcs.w r4, r5, r1 ; CHECK-NEXT: sbcs.w r2, r5, r2 ; CHECK-NEXT: sbcs.w r2, r5, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: csel r1, r2, r1, ge ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} @@ -1732,51 +1722,51 @@ define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs.w r5, r0, #-1 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: sbcs.w r5, r1, r4 -; CHECK-NEXT: sbcs r5, r2, #0 -; CHECK-NEXT: mov.w r6, #-1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r2, r2, r5, ne -; CHECK-NEXT: csel r1, r1, r4, ne -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: mov.w r5, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r5, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r9, r0, r2, ne -; CHECK-NEXT: csel r8, r1, r5, ne -; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: mov r1, lr -; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vmov r12, r1, d8 ; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: sbcs.w r7, r1, r4 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r4, r8 +; CHECK-NEXT: mov.w r9, #-2147483648 ; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: mov.w r5, #-1 ; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: itt ge +; CHECK-NEXT: mvnge r4, #-2147483648 +; CHECK-NEXT: movge.w r0, #-1 ; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: csel r3, r7, r3, ge +; CHECK-NEXT: csel r2, r7, r2, ge ; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r5, r1 -; CHECK-NEXT: sbcs.w r2, r6, r2 -; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: sbcs.w r7, r9, r4 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r4, #-2147483648 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 +; CHECK-NEXT: csel r7, r2, r0, ge +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: subs.w r6, r0, #-1 +; CHECK-NEXT: sbcs.w r6, r1, r8 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: itt ge +; CHECK-NEXT: mvnge r1, #-2147483648 +; CHECK-NEXT: movge.w r0, #-1 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: csel r3, r6, r3, ge +; CHECK-NEXT: csel r2, r6, r2, ge +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r9, r1 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r1, #-2147483648 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} @@ -1802,18 +1792,16 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r4, r1, r2, ne -; CHECK-NEXT: csel r5, r0, r2, ne +; CHECK-NEXT: csel r4, r2, r1, hs +; CHECK-NEXT: csel r5, r2, r0, hs ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, hs +; CHECK-NEXT: csel r1, r2, r1, hs ; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} @@ -1839,10 +1827,9 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r5, r0, r2, ne -; CHECK-NEXT: csel r0, r3, r2, ne -; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: csel r5, r2, r0, ge +; CHECK-NEXT: csel r0, r2, r3, ge +; CHECK-NEXT: csel r4, r2, r1, ge ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: itt mi ; CHECK-NEXT: movmi r4, #0 @@ -1853,10 +1840,9 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r2, r3, r2, ne +; CHECK-NEXT: csel r1, r2, r1, ge +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: csel r2, r2, r3, ge ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: itt mi ; CHECK-NEXT: movmi r0, #0 @@ -1876,54 +1862,58 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: vmov r8, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vmov r5, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r5 -; CHECK-NEXT: mov.w r6, #-2147483648 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: rsbs r4, r0, #0 -; CHECK-NEXT: sbcs.w r4, r6, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r10, r0, r2, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: csel r9, r1, r6, ne +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: subs.w r1, r0, #-1 +; CHECK-NEXT: mvn r8, #-2147483648 +; CHECK-NEXT: sbcs.w r1, r4, r8 +; CHECK-NEXT: sbcs r1, r2, #0 +; CHECK-NEXT: mov.w r9, #-2147483648 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: itt ge +; CHECK-NEXT: mvnge r4, #-2147483648 +; CHECK-NEXT: movge.w r0, #-1 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: csel r3, r1, r3, ge +; CHECK-NEXT: csel r1, r1, r2, ge +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: sbcs.w r2, r9, r4 +; CHECK-NEXT: sbcs.w r1, r6, r1 +; CHECK-NEXT: sbcs.w r1, r6, r3 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r4, #-2147483648 +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: csel r7, r1, r0, ge +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r4, r0, #-1 -; CHECK-NEXT: sbcs.w r4, r1, r5 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne -; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: sbcs.w r5, r1, r8 +; CHECK-NEXT: sbcs r5, r2, #0 +; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: itt ge +; CHECK-NEXT: mvnge r1, #-2147483648 +; CHECK-NEXT: movge.w r0, #-1 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r3, r5, r3, ge +; CHECK-NEXT: csel r2, r5, r2, ge ; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r6, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: sbcs.w r5, r9, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r1, #-2147483648 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r10 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r9 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1942,17 +1932,15 @@ define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne +; CHECK-NEXT: csel r6, r2, r0, hs ; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK-NEXT: csel r5, r2, r1, hs ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r2, ne +; CHECK-NEXT: csel r0, r2, r0, hs +; CHECK-NEXT: csel r1, r2, r1, hs ; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1973,29 +1961,27 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: csel r0, r3, r2, ne -; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK-NEXT: csel r5, r2, r0, ge +; CHECK-NEXT: csel r0, r2, r3, ge +; CHECK-NEXT: csel r6, r2, r1, ge ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: itt mi -; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: movmi r6, #0 +; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r2, r3, r2, ne +; CHECK-NEXT: csel r1, r2, r1, ge +; CHECK-NEXT: csel r0, r2, r0, ge +; CHECK-NEXT: csel r2, r2, r3, ge ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: itt mi ; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r6 ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 29b56639bd769..e94c692e1e2a9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -94,7 +94,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: csel r4, r5, r0, lt ; CHECK-NEXT: subs r5, r2, r3 ; CHECK-NEXT: sbcs r4, r4, #0 -; CHECK-NEXT: csel r2, r2, r3, lt +; CHECK-NEXT: it ge +; CHECK-NEXT: mvnge r2, #-2147483648 ; CHECK-NEXT: str r2, [r10], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup @@ -323,8 +324,9 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: csel r1, r1, r0, lt ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r1, r1, #0 -; CHECK-NEXT: csel r1, r4, r2, lt -; CHECK-NEXT: str r1, [r11], #4 +; CHECK-NEXT: it ge +; CHECK-NEXT: mvnge r4, #-2147483648 +; CHECK-NEXT: str r4, [r11], #4 ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index bf0d92b5e0303..08f6d67481567 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -499,38 +499,36 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: csel r3, r7, r3, ge +; CHECK-NEXT: csel r1, r7, r1, ge +; CHECK-NEXT: csel r0, r7, r0, ge ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r1, r1, r7, ne ; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: rsbs r7, r0, #0 ; CHECK-NEXT: sbcs.w r7, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r6, r0, r2, ne -; CHECK-NEXT: csel r7, r1, r2, ne +; CHECK-NEXT: csel r6, r2, r0, ge +; CHECK-NEXT: csel r7, r2, r1, ge ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r5, r2, #1 ; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: csel r3, r5, r3, ge +; CHECK-NEXT: csel r1, r5, r1, ge +; CHECK-NEXT: csel r0, r5, r0, ge ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: rsbs r5, r0, #0 ; CHECK-NEXT: sbcs.w r5, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r2, r0, r3, ne -; CHECK-NEXT: csel r3, r1, r3, ne +; CHECK-NEXT: csel r2, r3, r0, ge +; CHECK-NEXT: csel r3, r3, r1, ge ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: add sp, #4